LLVM  mainline
PPCISelLowering.cpp
Go to the documentation of this file.
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PPCISelLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "PPCISelLowering.h"
00015 #include "MCTargetDesc/PPCPredicates.h"
00016 #include "PPCCallingConv.h"
00017 #include "PPCMachineFunctionInfo.h"
00018 #include "PPCPerfectShuffle.h"
00019 #include "PPCTargetMachine.h"
00020 #include "PPCTargetObjectFile.h"
00021 #include "llvm/ADT/STLExtras.h"
00022 #include "llvm/ADT/StringSwitch.h"
00023 #include "llvm/ADT/Triple.h"
00024 #include "llvm/CodeGen/CallingConvLower.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineFunction.h"
00027 #include "llvm/CodeGen/MachineInstrBuilder.h"
00028 #include "llvm/CodeGen/MachineLoopInfo.h"
00029 #include "llvm/CodeGen/MachineRegisterInfo.h"
00030 #include "llvm/CodeGen/SelectionDAG.h"
00031 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00032 #include "llvm/IR/CallingConv.h"
00033 #include "llvm/IR/Constants.h"
00034 #include "llvm/IR/DerivedTypes.h"
00035 #include "llvm/IR/Function.h"
00036 #include "llvm/IR/Intrinsics.h"
00037 #include "llvm/Support/CommandLine.h"
00038 #include "llvm/Support/ErrorHandling.h"
00039 #include "llvm/Support/MathExtras.h"
00040 #include "llvm/Support/raw_ostream.h"
00041 #include "llvm/Target/TargetOptions.h"
00042 
00043 using namespace llvm;
00044 
00045 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
00046 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
00047 
00048 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
00049 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
00050 
00051 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
00052 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
00053 
00054 // FIXME: Remove this once the bug has been fixed!
00055 extern cl::opt<bool> ANDIGlueBug;
00056 
00057 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
00058                                      const PPCSubtarget &STI)
00059     : TargetLowering(TM), Subtarget(STI) {
00060   // Use _setjmp/_longjmp instead of setjmp/longjmp.
00061   setUseUnderscoreSetJmp(true);
00062   setUseUnderscoreLongJmp(true);
00063 
00064   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
00065   // arguments are at least 4/8 bytes aligned.
00066   bool isPPC64 = Subtarget.isPPC64();
00067   setMinStackArgumentAlignment(isPPC64 ? 8:4);
00068 
00069   // Set up the register classes.
00070   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
00071   if (!Subtarget.useSoftFloat()) {
00072     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
00073     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
00074   }
00075 
00076   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
00077   for (MVT VT : MVT::integer_valuetypes()) {
00078     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00079     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
00080   }
00081 
00082   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00083 
00084   // PowerPC has pre-inc load and store's.
00085   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
00086   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
00087   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
00088   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
00089   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
00090   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
00091   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
00092   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
00093   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
00094   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
00095   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
00096   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
00097   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
00098   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
00099 
00100   if (Subtarget.useCRBits()) {
00101     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00102 
00103     if (isPPC64 || Subtarget.hasFPCVT()) {
00104       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
00105       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
00106                          isPPC64 ? MVT::i64 : MVT::i32);
00107       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
00108       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
00109                         isPPC64 ? MVT::i64 : MVT::i32);
00110     } else {
00111       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
00112       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
00113     }
00114 
00115     // PowerPC does not support direct load / store of condition registers
00116     setOperationAction(ISD::LOAD, MVT::i1, Custom);
00117     setOperationAction(ISD::STORE, MVT::i1, Custom);
00118 
00119     // FIXME: Remove this once the ANDI glue bug is fixed:
00120     if (ANDIGlueBug)
00121       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
00122 
00123     for (MVT VT : MVT::integer_valuetypes()) {
00124       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00125       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00126       setTruncStoreAction(VT, MVT::i1, Expand);
00127     }
00128 
00129     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
00130   }
00131 
00132   // This is used in the ppcf128->int sequence.  Note it has different semantics
00133   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
00134   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
00135 
00136   // We do not currently implement these libm ops for PowerPC.
00137   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
00138   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
00139   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
00140   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
00141   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
00142   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
00143 
00144   // PowerPC has no SREM/UREM instructions
00145   setOperationAction(ISD::SREM, MVT::i32, Expand);
00146   setOperationAction(ISD::UREM, MVT::i32, Expand);
00147   setOperationAction(ISD::SREM, MVT::i64, Expand);
00148   setOperationAction(ISD::UREM, MVT::i64, Expand);
00149 
00150   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
00151   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00152   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00153   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
00154   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
00155   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00156   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00157   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
00158   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
00159 
00160   // We don't support sin/cos/sqrt/fmod/pow
00161   setOperationAction(ISD::FSIN , MVT::f64, Expand);
00162   setOperationAction(ISD::FCOS , MVT::f64, Expand);
00163   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00164   setOperationAction(ISD::FREM , MVT::f64, Expand);
00165   setOperationAction(ISD::FPOW , MVT::f64, Expand);
00166   setOperationAction(ISD::FMA  , MVT::f64, Legal);
00167   setOperationAction(ISD::FSIN , MVT::f32, Expand);
00168   setOperationAction(ISD::FCOS , MVT::f32, Expand);
00169   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00170   setOperationAction(ISD::FREM , MVT::f32, Expand);
00171   setOperationAction(ISD::FPOW , MVT::f32, Expand);
00172   setOperationAction(ISD::FMA  , MVT::f32, Legal);
00173 
00174   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00175 
00176   // If we're enabling GP optimizations, use hardware square root
00177   if (!Subtarget.hasFSQRT() &&
00178       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
00179         Subtarget.hasFRE()))
00180     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
00181 
00182   if (!Subtarget.hasFSQRT() &&
00183       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
00184         Subtarget.hasFRES()))
00185     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
00186 
00187   if (Subtarget.hasFCPSGN()) {
00188     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
00189     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
00190   } else {
00191     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00192     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00193   }
00194 
00195   if (Subtarget.hasFPRND()) {
00196     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
00197     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
00198     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
00199     setOperationAction(ISD::FROUND, MVT::f64, Legal);
00200 
00201     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
00202     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
00203     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
00204     setOperationAction(ISD::FROUND, MVT::f32, Legal);
00205   }
00206 
00207   // PowerPC does not have BSWAP, CTPOP or CTTZ
00208   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
00209   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
00210   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
00211   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
00212   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
00213   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
00214   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00215   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00216 
00217   if (Subtarget.hasPOPCNTD()) {
00218     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
00219     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
00220   } else {
00221     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
00222     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
00223   }
00224 
00225   // PowerPC does not have ROTR
00226   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
00227   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
00228 
00229   if (!Subtarget.useCRBits()) {
00230     // PowerPC does not have Select
00231     setOperationAction(ISD::SELECT, MVT::i32, Expand);
00232     setOperationAction(ISD::SELECT, MVT::i64, Expand);
00233     setOperationAction(ISD::SELECT, MVT::f32, Expand);
00234     setOperationAction(ISD::SELECT, MVT::f64, Expand);
00235   }
00236 
00237   // PowerPC wants to turn select_cc of FP into fsel when possible.
00238   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00239   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00240 
00241   // PowerPC wants to optimize integer setcc a bit
00242   if (!Subtarget.useCRBits())
00243     setOperationAction(ISD::SETCC, MVT::i32, Custom);
00244 
00245   // PowerPC does not have BRCOND which requires SetCC
00246   if (!Subtarget.useCRBits())
00247     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00248 
00249   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
00250 
00251   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
00252   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00253 
00254   // PowerPC does not have [U|S]INT_TO_FP
00255   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
00256   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
00257 
00258   if (Subtarget.hasDirectMove()) {
00259     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
00260     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
00261     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
00262     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
00263   } else {
00264     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
00265     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
00266     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
00267     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
00268   }
00269 
00270   // We cannot sextinreg(i1).  Expand to shifts.
00271   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00272 
00273   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00274   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00275   // support continuation, user-level threading, and etc.. As a result, no
00276   // other SjLj exception interfaces are implemented and please don't build
00277   // your own exception handling based on them.
00278   // LLVM/Clang supports zero-cost DWARF exception handling.
00279   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00280   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00281 
00282   // We want to legalize GlobalAddress and ConstantPool nodes into the
00283   // appropriate instructions to materialize the address.
00284   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00285   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00286   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
00287   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
00288   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
00289   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00290   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00291   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
00292   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
00293   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
00294 
00295   // TRAP is legal.
00296   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00297 
00298   // TRAMPOLINE is custom lowered.
00299   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00300   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00301 
00302   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00303   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00304 
00305   if (Subtarget.isSVR4ABI()) {
00306     if (isPPC64) {
00307       // VAARG always uses double-word chunks, so promote anything smaller.
00308       setOperationAction(ISD::VAARG, MVT::i1, Promote);
00309       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
00310       setOperationAction(ISD::VAARG, MVT::i8, Promote);
00311       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
00312       setOperationAction(ISD::VAARG, MVT::i16, Promote);
00313       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
00314       setOperationAction(ISD::VAARG, MVT::i32, Promote);
00315       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
00316       setOperationAction(ISD::VAARG, MVT::Other, Expand);
00317     } else {
00318       // VAARG is custom lowered with the 32-bit SVR4 ABI.
00319       setOperationAction(ISD::VAARG, MVT::Other, Custom);
00320       setOperationAction(ISD::VAARG, MVT::i64, Custom);
00321     }
00322   } else
00323     setOperationAction(ISD::VAARG, MVT::Other, Expand);
00324 
00325   if (Subtarget.isSVR4ABI() && !isPPC64)
00326     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
00327     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
00328   else
00329     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00330 
00331   // Use the default implementation.
00332   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00333   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
00334   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
00335   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
00336   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
00337   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
00338   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
00339 
00340   // We want to custom lower some of our intrinsics.
00341   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00342 
00343   // To handle counter-based loop conditions.
00344   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
00345 
00346   // Comparisons that require checking two conditions.
00347   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
00348   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
00349   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
00350   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
00351   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
00352   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
00353   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
00354   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
00355   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
00356   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
00357   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
00358   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
00359 
00360   if (Subtarget.has64BitSupport()) {
00361     // They also have instructions for converting between i64 and fp.
00362     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00363     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
00364     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00365     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00366     // This is just the low 32 bits of a (signed) fp->i64 conversion.
00367     // We cannot do this with Promote because i64 is not a legal type.
00368     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00369 
00370     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
00371       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00372   } else {
00373     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
00374     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
00375   }
00376 
00377   // With the instructions enabled under FPCVT, we can do everything.
00378   if (Subtarget.hasFPCVT()) {
00379     if (Subtarget.has64BitSupport()) {
00380       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00381       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
00382       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00383       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
00384     }
00385 
00386     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00387     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00388     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00389     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00390   }
00391 
00392   if (Subtarget.use64BitRegs()) {
00393     // 64-bit PowerPC implementations can support i64 types directly
00394     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
00395     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
00396     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
00397     // 64-bit PowerPC wants to expand i128 shifts itself.
00398     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
00399     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
00400     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
00401   } else {
00402     // 32-bit PowerPC wants to expand i64 shifts itself.
00403     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00404     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00405     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00406   }
00407 
00408   if (Subtarget.hasAltivec()) {
00409     // First set operation action for all vector types to expand. Then we
00410     // will selectively turn on ones that can be effectively codegen'd.
00411     for (MVT VT : MVT::vector_valuetypes()) {
00412       // add/sub are legal for all supported vector VT's.
00413       setOperationAction(ISD::ADD, VT, Legal);
00414       setOperationAction(ISD::SUB, VT, Legal);
00415 
00416       // Vector instructions introduced in P8
00417       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
00418         setOperationAction(ISD::CTPOP, VT, Legal);
00419         setOperationAction(ISD::CTLZ, VT, Legal);
00420       }
00421       else {
00422         setOperationAction(ISD::CTPOP, VT, Expand);
00423         setOperationAction(ISD::CTLZ, VT, Expand);
00424       }
00425 
00426       // We promote all shuffles to v16i8.
00427       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
00428       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
00429 
00430       // We promote all non-typed operations to v4i32.
00431       setOperationAction(ISD::AND   , VT, Promote);
00432       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
00433       setOperationAction(ISD::OR    , VT, Promote);
00434       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
00435       setOperationAction(ISD::XOR   , VT, Promote);
00436       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
00437       setOperationAction(ISD::LOAD  , VT, Promote);
00438       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
00439       setOperationAction(ISD::SELECT, VT, Promote);
00440       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
00441       setOperationAction(ISD::SELECT_CC, VT, Promote);
00442       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
00443       setOperationAction(ISD::STORE, VT, Promote);
00444       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
00445 
00446       // No other operations are legal.
00447       setOperationAction(ISD::MUL , VT, Expand);
00448       setOperationAction(ISD::SDIV, VT, Expand);
00449       setOperationAction(ISD::SREM, VT, Expand);
00450       setOperationAction(ISD::UDIV, VT, Expand);
00451       setOperationAction(ISD::UREM, VT, Expand);
00452       setOperationAction(ISD::FDIV, VT, Expand);
00453       setOperationAction(ISD::FREM, VT, Expand);
00454       setOperationAction(ISD::FNEG, VT, Expand);
00455       setOperationAction(ISD::FSQRT, VT, Expand);
00456       setOperationAction(ISD::FLOG, VT, Expand);
00457       setOperationAction(ISD::FLOG10, VT, Expand);
00458       setOperationAction(ISD::FLOG2, VT, Expand);
00459       setOperationAction(ISD::FEXP, VT, Expand);
00460       setOperationAction(ISD::FEXP2, VT, Expand);
00461       setOperationAction(ISD::FSIN, VT, Expand);
00462       setOperationAction(ISD::FCOS, VT, Expand);
00463       setOperationAction(ISD::FABS, VT, Expand);
00464       setOperationAction(ISD::FPOWI, VT, Expand);
00465       setOperationAction(ISD::FFLOOR, VT, Expand);
00466       setOperationAction(ISD::FCEIL,  VT, Expand);
00467       setOperationAction(ISD::FTRUNC, VT, Expand);
00468       setOperationAction(ISD::FRINT,  VT, Expand);
00469       setOperationAction(ISD::FNEARBYINT, VT, Expand);
00470       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
00471       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00472       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
00473       setOperationAction(ISD::MULHU, VT, Expand);
00474       setOperationAction(ISD::MULHS, VT, Expand);
00475       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00476       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00477       setOperationAction(ISD::UDIVREM, VT, Expand);
00478       setOperationAction(ISD::SDIVREM, VT, Expand);
00479       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
00480       setOperationAction(ISD::FPOW, VT, Expand);
00481       setOperationAction(ISD::BSWAP, VT, Expand);
00482       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00483       setOperationAction(ISD::CTTZ, VT, Expand);
00484       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00485       setOperationAction(ISD::VSELECT, VT, Expand);
00486       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00487       setOperationAction(ISD::ROTL, VT, Expand);
00488       setOperationAction(ISD::ROTR, VT, Expand);
00489 
00490       for (MVT InnerVT : MVT::vector_valuetypes()) {
00491         setTruncStoreAction(VT, InnerVT, Expand);
00492         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00493         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00494         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00495       }
00496     }
00497 
00498     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
00499     // with merges, splats, etc.
00500     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
00501 
00502     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
00503     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
00504     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
00505     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
00506     setOperationAction(ISD::SELECT, MVT::v4i32,
00507                        Subtarget.useCRBits() ? Legal : Expand);
00508     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
00509     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
00510     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
00511     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
00512     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
00513     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00514     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
00515     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00516     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
00517 
00518     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
00519     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
00520     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
00521     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
00522 
00523     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
00524     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
00525 
00526     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
00527       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00528       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00529     }
00530 
00531     if (Subtarget.hasP8Altivec())
00532       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
00533     else
00534       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00535 
00536     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00537     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
00538 
00539     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
00540     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
00541 
00542     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
00543     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
00544     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
00545     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00546 
00547     // Altivec does not contain unordered floating-point compare instructions
00548     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
00549     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
00550     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
00551     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
00552 
00553     if (Subtarget.hasVSX()) {
00554       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00555       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
00556       if (Subtarget.hasP8Vector()) {
00557         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00558         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
00559       }
00560       if (Subtarget.hasDirectMove()) {
00561         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
00562         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
00563         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
00564         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
00565         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
00566         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
00567         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
00568         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
00569       }
00570       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
00571 
00572       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00573       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00574       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00575       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00576       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00577 
00578       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00579 
00580       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
00581       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00582 
00583       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00584       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00585 
00586       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
00587       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
00588       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
00589       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00590       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
00591 
00592       // Share the Altivec comparison restrictions.
00593       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
00594       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
00595       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
00596       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
00597 
00598       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
00599       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
00600 
00601       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
00602 
00603       if (Subtarget.hasP8Vector())
00604         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
00605 
00606       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
00607 
00608       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
00609       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
00610       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
00611 
00612       if (Subtarget.hasP8Altivec()) {
00613         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
00614         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
00615         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
00616 
00617         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
00618       }
00619       else {
00620         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
00621         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
00622         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
00623 
00624         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
00625 
00626         // VSX v2i64 only supports non-arithmetic operations.
00627         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
00628         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
00629       }
00630 
00631       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
00632       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
00633       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
00634       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
00635 
00636       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
00637 
00638       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00639       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00640       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00641       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00642 
00643       // Vector operation legalization checks the result type of
00644       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
00645       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
00646       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
00647       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
00648       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
00649 
00650       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
00651     }
00652 
00653     if (Subtarget.hasP8Altivec()) {
00654       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
00655       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
00656     }
00657   }
00658 
00659   if (Subtarget.hasQPX()) {
00660     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
00661     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
00662     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
00663     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
00664 
00665     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
00666     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
00667 
00668     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
00669     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
00670 
00671     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
00672     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
00673 
00674     if (!Subtarget.useCRBits())
00675       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
00676     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
00677 
00678     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
00679     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
00680     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
00681     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
00682     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
00683     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
00684     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
00685 
00686     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
00687     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
00688 
00689     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
00690     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
00691     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
00692 
00693     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
00694     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
00695     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
00696     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
00697     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
00698     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
00699     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
00700     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
00701     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
00702     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
00703     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
00704 
00705     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
00706     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
00707 
00708     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
00709     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
00710 
00711     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
00712 
00713     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
00714     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
00715     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
00716     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
00717 
00718     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
00719     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
00720 
00721     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
00722     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
00723 
00724     if (!Subtarget.useCRBits())
00725       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
00726     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00727 
00728     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
00729     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
00730     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
00731     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
00732     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
00733     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00734     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00735 
00736     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
00737     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
00738 
00739     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
00740     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
00741     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
00742     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
00743     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
00744     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
00745     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
00746     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
00747     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
00748     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
00749     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
00750 
00751     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
00752     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
00753 
00754     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
00755     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
00756 
00757     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
00758 
00759     setOperationAction(ISD::AND , MVT::v4i1, Legal);
00760     setOperationAction(ISD::OR , MVT::v4i1, Legal);
00761     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
00762 
00763     if (!Subtarget.useCRBits())
00764       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
00765     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
00766 
00767     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
00768     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
00769 
00770     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
00771     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
00772     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
00773     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
00774     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
00775     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
00776     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
00777 
00778     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
00779     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
00780 
00781     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
00782 
00783     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
00784     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
00785     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
00786     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
00787 
00788     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00789     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
00790     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00791     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00792 
00793     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
00794     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00795 
00796     // These need to set FE_INEXACT, and so cannot be vectorized here.
00797     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
00798     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00799 
00800     if (TM.Options.UnsafeFPMath) {
00801       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
00802       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
00803 
00804       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00805       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00806     } else {
00807       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
00808       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
00809 
00810       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
00811       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00812     }
00813   }
00814 
00815   if (Subtarget.has64BitSupport())
00816     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
00817 
00818   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
00819 
00820   if (!isPPC64) {
00821     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
00822     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
00823   }
00824 
00825   setBooleanContents(ZeroOrOneBooleanContent);
00826 
00827   if (Subtarget.hasAltivec()) {
00828     // Altivec instructions set fields to all zeros or all ones.
00829     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00830   }
00831 
00832   if (!isPPC64) {
00833     // These libcalls are not available in 32-bit.
00834     setLibcallName(RTLIB::SHL_I128, nullptr);
00835     setLibcallName(RTLIB::SRL_I128, nullptr);
00836     setLibcallName(RTLIB::SRA_I128, nullptr);
00837   }
00838 
00839   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
00840 
00841   // We have target-specific dag combine patterns for the following nodes:
00842   setTargetDAGCombine(ISD::SINT_TO_FP);
00843   if (Subtarget.hasFPCVT())
00844     setTargetDAGCombine(ISD::UINT_TO_FP);
00845   setTargetDAGCombine(ISD::LOAD);
00846   setTargetDAGCombine(ISD::STORE);
00847   setTargetDAGCombine(ISD::BR_CC);
00848   if (Subtarget.useCRBits())
00849     setTargetDAGCombine(ISD::BRCOND);
00850   setTargetDAGCombine(ISD::BSWAP);
00851   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00852   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00853   setTargetDAGCombine(ISD::INTRINSIC_VOID);
00854 
00855   setTargetDAGCombine(ISD::SIGN_EXTEND);
00856   setTargetDAGCombine(ISD::ZERO_EXTEND);
00857   setTargetDAGCombine(ISD::ANY_EXTEND);
00858 
00859   if (Subtarget.useCRBits()) {
00860     setTargetDAGCombine(ISD::TRUNCATE);
00861     setTargetDAGCombine(ISD::SETCC);
00862     setTargetDAGCombine(ISD::SELECT_CC);
00863   }
00864 
00865   // Use reciprocal estimates.
00866   if (TM.Options.UnsafeFPMath) {
00867     setTargetDAGCombine(ISD::FDIV);
00868     setTargetDAGCombine(ISD::FSQRT);
00869   }
00870 
00871   // Darwin long double math library functions have $LDBL128 appended.
00872   if (Subtarget.isDarwin()) {
00873     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
00874     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
00875     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
00876     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
00877     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
00878     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
00879     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
00880     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
00881     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
00882     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
00883   }
00884 
00885   // With 32 condition bits, we don't need to sink (and duplicate) compares
00886   // aggressively in CodeGenPrep.
00887   if (Subtarget.useCRBits()) {
00888     setHasMultipleConditionRegisters();
00889     setJumpIsExpensive();
00890   }
00891 
00892   setMinFunctionAlignment(2);
00893   if (Subtarget.isDarwin())
00894     setPrefFunctionAlignment(4);
00895 
00896   switch (Subtarget.getDarwinDirective()) {
00897   default: break;
00898   case PPC::DIR_970:
00899   case PPC::DIR_A2:
00900   case PPC::DIR_E500mc:
00901   case PPC::DIR_E5500:
00902   case PPC::DIR_PWR4:
00903   case PPC::DIR_PWR5:
00904   case PPC::DIR_PWR5X:
00905   case PPC::DIR_PWR6:
00906   case PPC::DIR_PWR6X:
00907   case PPC::DIR_PWR7:
00908   case PPC::DIR_PWR8:
00909     setPrefFunctionAlignment(4);
00910     setPrefLoopAlignment(4);
00911     break;
00912   }
00913 
00914   setInsertFencesForAtomic(true);
00915 
00916   if (Subtarget.enableMachineScheduler())
00917     setSchedulingPreference(Sched::Source);
00918   else
00919     setSchedulingPreference(Sched::Hybrid);
00920 
00921   computeRegisterProperties(STI.getRegisterInfo());
00922 
00923   // The Freescale cores do better with aggressive inlining of memcpy and
00924   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
00925   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
00926       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
00927     MaxStoresPerMemset = 32;
00928     MaxStoresPerMemsetOptSize = 16;
00929     MaxStoresPerMemcpy = 32;
00930     MaxStoresPerMemcpyOptSize = 8;
00931     MaxStoresPerMemmove = 32;
00932     MaxStoresPerMemmoveOptSize = 8;
00933   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
00934     // The A2 also benefits from (very) aggressive inlining of memcpy and
00935     // friends. The overhead of a the function call, even when warm, can be
00936     // over one hundred cycles.
00937     MaxStoresPerMemset = 128;
00938     MaxStoresPerMemcpy = 128;
00939     MaxStoresPerMemmove = 128;
00940   }
00941 }
00942 
00943 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
00944 /// the desired ByVal argument alignment.
00945 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
00946                              unsigned MaxMaxAlign) {
00947   if (MaxAlign == MaxMaxAlign)
00948     return;
00949   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
00950     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
00951       MaxAlign = 32;
00952     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
00953       MaxAlign = 16;
00954   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
00955     unsigned EltAlign = 0;
00956     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
00957     if (EltAlign > MaxAlign)
00958       MaxAlign = EltAlign;
00959   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
00960     for (auto *EltTy : STy->elements()) {
00961       unsigned EltAlign = 0;
00962       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
00963       if (EltAlign > MaxAlign)
00964         MaxAlign = EltAlign;
00965       if (MaxAlign == MaxMaxAlign)
00966         break;
00967     }
00968   }
00969 }
00970 
00971 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
00972 /// function arguments in the caller parameter area.
00973 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
00974                                                   const DataLayout &DL) const {
00975   // Darwin passes everything on 4 byte boundary.
00976   if (Subtarget.isDarwin())
00977     return 4;
00978 
00979   // 16byte and wider vectors are passed on 16byte boundary.
00980   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
00981   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
00982   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
00983     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
00984   return Align;
00985 }
00986 
00987 bool PPCTargetLowering::useSoftFloat() const {
00988   return Subtarget.useSoftFloat();
00989 }
00990 
00991 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
00992   switch ((PPCISD::NodeType)Opcode) {
00993   case PPCISD::FIRST_NUMBER:    break;
00994   case PPCISD::FSEL:            return "PPCISD::FSEL";
00995   case PPCISD::FCFID:           return "PPCISD::FCFID";
00996   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
00997   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
00998   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
00999   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
01000   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
01001   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
01002   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
01003   case PPCISD::FRE:             return "PPCISD::FRE";
01004   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
01005   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
01006   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
01007   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
01008   case PPCISD::VPERM:           return "PPCISD::VPERM";
01009   case PPCISD::CMPB:            return "PPCISD::CMPB";
01010   case PPCISD::Hi:              return "PPCISD::Hi";
01011   case PPCISD::Lo:              return "PPCISD::Lo";
01012   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
01013   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
01014   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
01015   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
01016   case PPCISD::SRL:             return "PPCISD::SRL";
01017   case PPCISD::SRA:             return "PPCISD::SRA";
01018   case PPCISD::SHL:             return "PPCISD::SHL";
01019   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
01020   case PPCISD::CALL:            return "PPCISD::CALL";
01021   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
01022   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
01023   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
01024   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
01025   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
01026   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
01027   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
01028   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
01029   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
01030   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
01031   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
01032   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
01033   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
01034   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
01035   case PPCISD::VCMP:            return "PPCISD::VCMP";
01036   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
01037   case PPCISD::LBRX:            return "PPCISD::LBRX";
01038   case PPCISD::STBRX:           return "PPCISD::STBRX";
01039   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
01040   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
01041   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
01042   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
01043   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
01044   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
01045   case PPCISD::BDZ:             return "PPCISD::BDZ";
01046   case PPCISD::MFFS:            return "PPCISD::MFFS";
01047   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
01048   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
01049   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
01050   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
01051   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
01052   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
01053   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
01054   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
01055   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
01056   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
01057   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
01058   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
01059   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
01060   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
01061   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
01062   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
01063   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
01064   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
01065   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
01066   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
01067   case PPCISD::SC:              return "PPCISD::SC";
01068   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
01069   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
01070   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
01071   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
01072   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
01073   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
01074   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
01075   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
01076   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
01077   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
01078   }
01079   return nullptr;
01080 }
01081 
01082 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
01083                                           EVT VT) const {
01084   if (!VT.isVector())
01085     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
01086 
01087   if (Subtarget.hasQPX())
01088     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
01089 
01090   return VT.changeVectorElementTypeToInteger();
01091 }
01092 
01093 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
01094   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
01095   return true;
01096 }
01097 
01098 //===----------------------------------------------------------------------===//
01099 // Node matching predicates, for use by the tblgen matching code.
01100 //===----------------------------------------------------------------------===//
01101 
01102 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
01103 static bool isFloatingPointZero(SDValue Op) {
01104   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
01105     return CFP->getValueAPF().isZero();
01106   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
01107     // Maybe this has already been legalized into the constant pool?
01108     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
01109       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
01110         return CFP->getValueAPF().isZero();
01111   }
01112   return false;
01113 }
01114 
01115 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
01116 /// true if Op is undef or if it matches the specified value.
01117 static bool isConstantOrUndef(int Op, int Val) {
01118   return Op < 0 || Op == Val;
01119 }
01120 
01121 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
01122 /// VPKUHUM instruction.
01123 /// The ShuffleKind distinguishes between big-endian operations with
01124 /// two different inputs (0), either-endian operations with two identical
01125 /// inputs (1), and little-endian operations with two different inputs (2).
01126 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01127 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01128                                SelectionDAG &DAG) {
01129   bool IsLE = DAG.getDataLayout().isLittleEndian();
01130   if (ShuffleKind == 0) {
01131     if (IsLE)
01132       return false;
01133     for (unsigned i = 0; i != 16; ++i)
01134       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
01135         return false;
01136   } else if (ShuffleKind == 2) {
01137     if (!IsLE)
01138       return false;
01139     for (unsigned i = 0; i != 16; ++i)
01140       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
01141         return false;
01142   } else if (ShuffleKind == 1) {
01143     unsigned j = IsLE ? 0 : 1;
01144     for (unsigned i = 0; i != 8; ++i)
01145       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
01146           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
01147         return false;
01148   }
01149   return true;
01150 }
01151 
01152 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
01153 /// VPKUWUM instruction.
01154 /// The ShuffleKind distinguishes between big-endian operations with
01155 /// two different inputs (0), either-endian operations with two identical
01156 /// inputs (1), and little-endian operations with two different inputs (2).
01157 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01158 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01159                                SelectionDAG &DAG) {
01160   bool IsLE = DAG.getDataLayout().isLittleEndian();
01161   if (ShuffleKind == 0) {
01162     if (IsLE)
01163       return false;
01164     for (unsigned i = 0; i != 16; i += 2)
01165       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
01166           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
01167         return false;
01168   } else if (ShuffleKind == 2) {
01169     if (!IsLE)
01170       return false;
01171     for (unsigned i = 0; i != 16; i += 2)
01172       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
01173           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
01174         return false;
01175   } else if (ShuffleKind == 1) {
01176     unsigned j = IsLE ? 0 : 2;
01177     for (unsigned i = 0; i != 8; i += 2)
01178       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
01179           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
01180           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
01181           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
01182         return false;
01183   }
01184   return true;
01185 }
01186 
01187 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
01188 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
01189 /// current subtarget.
01190 ///
01191 /// The ShuffleKind distinguishes between big-endian operations with
01192 /// two different inputs (0), either-endian operations with two identical
01193 /// inputs (1), and little-endian operations with two different inputs (2).
01194 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01195 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01196                                SelectionDAG &DAG) {
01197   const PPCSubtarget& Subtarget =
01198     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
01199   if (!Subtarget.hasP8Vector())
01200     return false;
01201 
01202   bool IsLE = DAG.getDataLayout().isLittleEndian();
01203   if (ShuffleKind == 0) {
01204     if (IsLE)
01205       return false;
01206     for (unsigned i = 0; i != 16; i += 4)
01207       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
01208           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
01209           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
01210           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
01211         return false;
01212   } else if (ShuffleKind == 2) {
01213     if (!IsLE)
01214       return false;
01215     for (unsigned i = 0; i != 16; i += 4)
01216       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
01217           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
01218           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
01219           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
01220         return false;
01221   } else if (ShuffleKind == 1) {
01222     unsigned j = IsLE ? 0 : 4;
01223     for (unsigned i = 0; i != 8; i += 4)
01224       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
01225           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
01226           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
01227           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
01228           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
01229           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
01230           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
01231           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
01232         return false;
01233   }
01234   return true;
01235 }
01236 
01237 /// isVMerge - Common function, used to match vmrg* shuffles.
01238 ///
01239 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
01240                      unsigned LHSStart, unsigned RHSStart) {
01241   if (N->getValueType(0) != MVT::v16i8)
01242     return false;
01243   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
01244          "Unsupported merge size!");
01245 
01246   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
01247     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
01248       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
01249                              LHSStart+j+i*UnitSize) ||
01250           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
01251                              RHSStart+j+i*UnitSize))
01252         return false;
01253     }
01254   return true;
01255 }
01256 
01257 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
01258 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
01259 /// The ShuffleKind distinguishes between big-endian merges with two
01260 /// different inputs (0), either-endian merges with two identical inputs (1),
01261 /// and little-endian merges with two different inputs (2).  For the latter,
01262 /// the input operands are swapped (see PPCInstrAltivec.td).
01263 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01264                              unsigned ShuffleKind, SelectionDAG &DAG) {
01265   if (DAG.getDataLayout().isLittleEndian()) {
01266     if (ShuffleKind == 1) // unary
01267       return isVMerge(N, UnitSize, 0, 0);
01268     else if (ShuffleKind == 2) // swapped
01269       return isVMerge(N, UnitSize, 0, 16);
01270     else
01271       return false;
01272   } else {
01273     if (ShuffleKind == 1) // unary
01274       return isVMerge(N, UnitSize, 8, 8);
01275     else if (ShuffleKind == 0) // normal
01276       return isVMerge(N, UnitSize, 8, 24);
01277     else
01278       return false;
01279   }
01280 }
01281 
01282 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
01283 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
01284 /// The ShuffleKind distinguishes between big-endian merges with two
01285 /// different inputs (0), either-endian merges with two identical inputs (1),
01286 /// and little-endian merges with two different inputs (2).  For the latter,
01287 /// the input operands are swapped (see PPCInstrAltivec.td).
01288 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01289                              unsigned ShuffleKind, SelectionDAG &DAG) {
01290   if (DAG.getDataLayout().isLittleEndian()) {
01291     if (ShuffleKind == 1) // unary
01292       return isVMerge(N, UnitSize, 8, 8);
01293     else if (ShuffleKind == 2) // swapped
01294       return isVMerge(N, UnitSize, 8, 24);
01295     else
01296       return false;
01297   } else {
01298     if (ShuffleKind == 1) // unary
01299       return isVMerge(N, UnitSize, 0, 0);
01300     else if (ShuffleKind == 0) // normal
01301       return isVMerge(N, UnitSize, 0, 16);
01302     else
01303       return false;
01304   }
01305 }
01306 
01307 /**
01308  * \brief Common function used to match vmrgew and vmrgow shuffles
01309  *
01310  * The indexOffset determines whether to look for even or odd words in
01311  * the shuffle mask. This is based on the of the endianness of the target
01312  * machine.
01313  *   - Little Endian:
01314  *     - Use offset of 0 to check for odd elements
01315  *     - Use offset of 4 to check for even elements
01316  *   - Big Endian:
01317  *     - Use offset of 0 to check for even elements
01318  *     - Use offset of 4 to check for odd elements
01319  * A detailed description of the vector element ordering for little endian and
01320  * big endian can be found at
01321  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
01322  * Targeting your applications - what little endian and big endian IBM XL C/C++
01323  * compiler differences mean to you
01324  *
01325  * The mask to the shuffle vector instruction specifies the indices of the
01326  * elements from the two input vectors to place in the result. The elements are
01327  * numbered in array-access order, starting with the first vector. These vectors
01328  * are always of type v16i8, thus each vector will contain 16 elements of size
01329  * 8. More info on the shuffle vector can be found in the
01330  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
01331  * Language Reference.
01332  *
01333  * The RHSStartValue indicates whether the same input vectors are used (unary)
01334  * or two different input vectors are used, based on the following:
01335  *   - If the instruction uses the same vector for both inputs, the range of the
01336  *     indices will be 0 to 15. In this case, the RHSStart value passed should
01337  *     be 0.
01338  *   - If the instruction has two different vectors then the range of the
01339  *     indices will be 0 to 31. In this case, the RHSStart value passed should
01340  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
01341  *     to 31 specify elements in the second vector).
01342  *
01343  * \param[in] N The shuffle vector SD Node to analyze
01344  * \param[in] IndexOffset Specifies whether to look for even or odd elements
01345  * \param[in] RHSStartValue Specifies the starting index for the righthand input
01346  * vector to the shuffle_vector instruction
01347  * \return true iff this shuffle vector represents an even or odd word merge
01348  */
01349 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
01350                      unsigned RHSStartValue) {
01351   if (N->getValueType(0) != MVT::v16i8)
01352     return false;
01353 
01354   for (unsigned i = 0; i < 2; ++i)
01355     for (unsigned j = 0; j < 4; ++j)
01356       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
01357                              i*RHSStartValue+j+IndexOffset) ||
01358           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
01359                              i*RHSStartValue+j+IndexOffset+8))
01360         return false;
01361   return true;
01362 }
01363 
01364 /**
01365  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
01366  * vmrgow instructions.
01367  *
01368  * \param[in] N The shuffle vector SD Node to analyze
01369  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
01370  * \param[in] ShuffleKind Identify the type of merge:
01371  *   - 0 = big-endian merge with two different inputs;
01372  *   - 1 = either-endian merge with two identical inputs;
01373  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
01374  *     little-endian merges).
01375  * \param[in] DAG The current SelectionDAG
01376  * \return true iff this shuffle mask
01377  */
01378 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
01379                               unsigned ShuffleKind, SelectionDAG &DAG) {
01380   if (DAG.getDataLayout().isLittleEndian()) {
01381     unsigned indexOffset = CheckEven ? 4 : 0;
01382     if (ShuffleKind == 1) // Unary
01383       return isVMerge(N, indexOffset, 0);
01384     else if (ShuffleKind == 2) // swapped
01385       return isVMerge(N, indexOffset, 16);
01386     else
01387       return false;
01388   }
01389   else {
01390     unsigned indexOffset = CheckEven ? 0 : 4;
01391     if (ShuffleKind == 1) // Unary
01392       return isVMerge(N, indexOffset, 0);
01393     else if (ShuffleKind == 0) // Normal
01394       return isVMerge(N, indexOffset, 16);
01395     else
01396       return false;
01397   }
01398   return false;
01399 }
01400 
01401 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
01402 /// amount, otherwise return -1.
01403 /// The ShuffleKind distinguishes between big-endian operations with two
01404 /// different inputs (0), either-endian operations with two identical inputs
01405 /// (1), and little-endian operations with two different inputs (2).  For the
01406 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
01407 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
01408                              SelectionDAG &DAG) {
01409   if (N->getValueType(0) != MVT::v16i8)
01410     return -1;
01411 
01412   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01413 
01414   // Find the first non-undef value in the shuffle mask.
01415   unsigned i;
01416   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
01417     /*search*/;
01418 
01419   if (i == 16) return -1;  // all undef.
01420 
01421   // Otherwise, check to see if the rest of the elements are consecutively
01422   // numbered from this value.
01423   unsigned ShiftAmt = SVOp->getMaskElt(i);
01424   if (ShiftAmt < i) return -1;
01425 
01426   ShiftAmt -= i;
01427   bool isLE = DAG.getDataLayout().isLittleEndian();
01428 
01429   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
01430     // Check the rest of the elements to see if they are consecutive.
01431     for (++i; i != 16; ++i)
01432       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01433         return -1;
01434   } else if (ShuffleKind == 1) {
01435     // Check the rest of the elements to see if they are consecutive.
01436     for (++i; i != 16; ++i)
01437       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
01438         return -1;
01439   } else
01440     return -1;
01441 
01442   if (isLE)
01443     ShiftAmt = 16 - ShiftAmt;
01444 
01445   return ShiftAmt;
01446 }
01447 
01448 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
01449 /// specifies a splat of a single element that is suitable for input to
01450 /// VSPLTB/VSPLTH/VSPLTW.
01451 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
01452   assert(N->getValueType(0) == MVT::v16i8 &&
01453          (EltSize == 1 || EltSize == 2 || EltSize == 4));
01454 
01455   // The consecutive indices need to specify an element, not part of two
01456   // different elements.  So abandon ship early if this isn't the case.
01457   if (N->getMaskElt(0) % EltSize != 0)
01458     return false;
01459 
01460   // This is a splat operation if each element of the permute is the same, and
01461   // if the value doesn't reference the second vector.
01462   unsigned ElementBase = N->getMaskElt(0);
01463 
01464   // FIXME: Handle UNDEF elements too!
01465   if (ElementBase >= 16)
01466     return false;
01467 
01468   // Check that the indices are consecutive, in the case of a multi-byte element
01469   // splatted with a v16i8 mask.
01470   for (unsigned i = 1; i != EltSize; ++i)
01471     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
01472       return false;
01473 
01474   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
01475     if (N->getMaskElt(i) < 0) continue;
01476     for (unsigned j = 0; j != EltSize; ++j)
01477       if (N->getMaskElt(i+j) != N->getMaskElt(j))
01478         return false;
01479   }
01480   return true;
01481 }
01482 
01483 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
01484 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
01485 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
01486                                 SelectionDAG &DAG) {
01487   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01488   assert(isSplatShuffleMask(SVOp, EltSize));
01489   if (DAG.getDataLayout().isLittleEndian())
01490     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
01491   else
01492     return SVOp->getMaskElt(0) / EltSize;
01493 }
01494 
01495 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
01496 /// by using a vspltis[bhw] instruction of the specified element size, return
01497 /// the constant being splatted.  The ByteSize field indicates the number of
01498 /// bytes of each element [124] -> [bhw].
01499 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
01500   SDValue OpVal(nullptr, 0);
01501 
01502   // If ByteSize of the splat is bigger than the element size of the
01503   // build_vector, then we have a case where we are checking for a splat where
01504   // multiple elements of the buildvector are folded together into a single
01505   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
01506   unsigned EltSize = 16/N->getNumOperands();
01507   if (EltSize < ByteSize) {
01508     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
01509     SDValue UniquedVals[4];
01510     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
01511 
01512     // See if all of the elements in the buildvector agree across.
01513     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01514       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01515       // If the element isn't a constant, bail fully out.
01516       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
01517 
01518 
01519       if (!UniquedVals[i&(Multiple-1)].getNode())
01520         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
01521       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
01522         return SDValue();  // no match.
01523     }
01524 
01525     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
01526     // either constant or undef values that are identical for each chunk.  See
01527     // if these chunks can form into a larger vspltis*.
01528 
01529     // Check to see if all of the leading entries are either 0 or -1.  If
01530     // neither, then this won't fit into the immediate field.
01531     bool LeadingZero = true;
01532     bool LeadingOnes = true;
01533     for (unsigned i = 0; i != Multiple-1; ++i) {
01534       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
01535 
01536       LeadingZero &= isNullConstant(UniquedVals[i]);
01537       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
01538     }
01539     // Finally, check the least significant entry.
01540     if (LeadingZero) {
01541       if (!UniquedVals[Multiple-1].getNode())
01542         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
01543       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
01544       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
01545         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
01546     }
01547     if (LeadingOnes) {
01548       if (!UniquedVals[Multiple-1].getNode())
01549         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
01550       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
01551       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
01552         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
01553     }
01554 
01555     return SDValue();
01556   }
01557 
01558   // Check to see if this buildvec has a single non-undef value in its elements.
01559   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01560     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01561     if (!OpVal.getNode())
01562       OpVal = N->getOperand(i);
01563     else if (OpVal != N->getOperand(i))
01564       return SDValue();
01565   }
01566 
01567   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
01568 
01569   unsigned ValSizeInBytes = EltSize;
01570   uint64_t Value = 0;
01571   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
01572     Value = CN->getZExtValue();
01573   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
01574     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
01575     Value = FloatToBits(CN->getValueAPF().convertToFloat());
01576   }
01577 
01578   // If the splat value is larger than the element value, then we can never do
01579   // this splat.  The only case that we could fit the replicated bits into our
01580   // immediate field for would be zero, and we prefer to use vxor for it.
01581   if (ValSizeInBytes < ByteSize) return SDValue();
01582 
01583   // If the element value is larger than the splat value, check if it consists
01584   // of a repeated bit pattern of size ByteSize.
01585   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
01586     return SDValue();
01587 
01588   // Properly sign extend the value.
01589   int MaskVal = SignExtend32(Value, ByteSize * 8);
01590 
01591   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
01592   if (MaskVal == 0) return SDValue();
01593 
01594   // Finally, if this value fits in a 5 bit sext field, return it
01595   if (SignExtend32<5>(MaskVal) == MaskVal)
01596     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
01597   return SDValue();
01598 }
01599 
01600 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
01601 /// amount, otherwise return -1.
01602 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
01603   EVT VT = N->getValueType(0);
01604   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
01605     return -1;
01606 
01607   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01608 
01609   // Find the first non-undef value in the shuffle mask.
01610   unsigned i;
01611   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
01612     /*search*/;
01613 
01614   if (i == 4) return -1;  // all undef.
01615 
01616   // Otherwise, check to see if the rest of the elements are consecutively
01617   // numbered from this value.
01618   unsigned ShiftAmt = SVOp->getMaskElt(i);
01619   if (ShiftAmt < i) return -1;
01620   ShiftAmt -= i;
01621 
01622   // Check the rest of the elements to see if they are consecutive.
01623   for (++i; i != 4; ++i)
01624     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01625       return -1;
01626 
01627   return ShiftAmt;
01628 }
01629 
01630 //===----------------------------------------------------------------------===//
01631 //  Addressing Mode Selection
01632 //===----------------------------------------------------------------------===//
01633 
01634 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
01635 /// or 64-bit immediate, and if the value can be accurately represented as a
01636 /// sign extension from a 16-bit value.  If so, this returns true and the
01637 /// immediate.
01638 static bool isIntS16Immediate(SDNode *N, short &Imm) {
01639   if (!isa<ConstantSDNode>(N))
01640     return false;
01641 
01642   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
01643   if (N->getValueType(0) == MVT::i32)
01644     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
01645   else
01646     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
01647 }
01648 static bool isIntS16Immediate(SDValue Op, short &Imm) {
01649   return isIntS16Immediate(Op.getNode(), Imm);
01650 }
01651 
01652 /// SelectAddressRegReg - Given the specified addressed, check to see if it
01653 /// can be represented as an indexed [r+r] operation.  Returns false if it
01654 /// can be more efficiently represented with [r+imm].
01655 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
01656                                             SDValue &Index,
01657                                             SelectionDAG &DAG) const {
01658   short imm = 0;
01659   if (N.getOpcode() == ISD::ADD) {
01660     if (isIntS16Immediate(N.getOperand(1), imm))
01661       return false;    // r+i
01662     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
01663       return false;    // r+i
01664 
01665     Base = N.getOperand(0);
01666     Index = N.getOperand(1);
01667     return true;
01668   } else if (N.getOpcode() == ISD::OR) {
01669     if (isIntS16Immediate(N.getOperand(1), imm))
01670       return false;    // r+i can fold it if we can.
01671 
01672     // If this is an or of disjoint bitfields, we can codegen this as an add
01673     // (for better address arithmetic) if the LHS and RHS of the OR are provably
01674     // disjoint.
01675     APInt LHSKnownZero, LHSKnownOne;
01676     APInt RHSKnownZero, RHSKnownOne;
01677     DAG.computeKnownBits(N.getOperand(0),
01678                          LHSKnownZero, LHSKnownOne);
01679 
01680     if (LHSKnownZero.getBoolValue()) {
01681       DAG.computeKnownBits(N.getOperand(1),
01682                            RHSKnownZero, RHSKnownOne);
01683       // If all of the bits are known zero on the LHS or RHS, the add won't
01684       // carry.
01685       if (~(LHSKnownZero | RHSKnownZero) == 0) {
01686         Base = N.getOperand(0);
01687         Index = N.getOperand(1);
01688         return true;
01689       }
01690     }
01691   }
01692 
01693   return false;
01694 }
01695 
01696 // If we happen to be doing an i64 load or store into a stack slot that has
01697 // less than a 4-byte alignment, then the frame-index elimination may need to
01698 // use an indexed load or store instruction (because the offset may not be a
01699 // multiple of 4). The extra register needed to hold the offset comes from the
01700 // register scavenger, and it is possible that the scavenger will need to use
01701 // an emergency spill slot. As a result, we need to make sure that a spill slot
01702 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
01703 // stack slot.
01704 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
01705   // FIXME: This does not handle the LWA case.
01706   if (VT != MVT::i64)
01707     return;
01708 
01709   // NOTE: We'll exclude negative FIs here, which come from argument
01710   // lowering, because there are no known test cases triggering this problem
01711   // using packed structures (or similar). We can remove this exclusion if
01712   // we find such a test case. The reason why this is so test-case driven is
01713   // because this entire 'fixup' is only to prevent crashes (from the
01714   // register scavenger) on not-really-valid inputs. For example, if we have:
01715   //   %a = alloca i1
01716   //   %b = bitcast i1* %a to i64*
01717   //   store i64* a, i64 b
01718   // then the store should really be marked as 'align 1', but is not. If it
01719   // were marked as 'align 1' then the indexed form would have been
01720   // instruction-selected initially, and the problem this 'fixup' is preventing
01721   // won't happen regardless.
01722   if (FrameIdx < 0)
01723     return;
01724 
01725   MachineFunction &MF = DAG.getMachineFunction();
01726   MachineFrameInfo *MFI = MF.getFrameInfo();
01727 
01728   unsigned Align = MFI->getObjectAlignment(FrameIdx);
01729   if (Align >= 4)
01730     return;
01731 
01732   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01733   FuncInfo->setHasNonRISpills();
01734 }
01735 
01736 /// Returns true if the address N can be represented by a base register plus
01737 /// a signed 16-bit displacement [r+imm], and if it is not better
01738 /// represented as reg+reg.  If Aligned is true, only accept displacements
01739 /// suitable for STD and friends, i.e. multiples of 4.
01740 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
01741                                             SDValue &Base,
01742                                             SelectionDAG &DAG,
01743                                             bool Aligned) const {
01744   // FIXME dl should come from parent load or store, not from address
01745   SDLoc dl(N);
01746   // If this can be more profitably realized as r+r, fail.
01747   if (SelectAddressRegReg(N, Disp, Base, DAG))
01748     return false;
01749 
01750   if (N.getOpcode() == ISD::ADD) {
01751     short imm = 0;
01752     if (isIntS16Immediate(N.getOperand(1), imm) &&
01753         (!Aligned || (imm & 3) == 0)) {
01754       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
01755       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01756         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01757         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01758       } else {
01759         Base = N.getOperand(0);
01760       }
01761       return true; // [r+i]
01762     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
01763       // Match LOAD (ADD (X, Lo(G))).
01764       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
01765              && "Cannot handle constant offsets yet!");
01766       Disp = N.getOperand(1).getOperand(0);  // The global address.
01767       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
01768              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
01769              Disp.getOpcode() == ISD::TargetConstantPool ||
01770              Disp.getOpcode() == ISD::TargetJumpTable);
01771       Base = N.getOperand(0);
01772       return true;  // [&g+r]
01773     }
01774   } else if (N.getOpcode() == ISD::OR) {
01775     short imm = 0;
01776     if (isIntS16Immediate(N.getOperand(1), imm) &&
01777         (!Aligned || (imm & 3) == 0)) {
01778       // If this is an or of disjoint bitfields, we can codegen this as an add
01779       // (for better address arithmetic) if the LHS and RHS of the OR are
01780       // provably disjoint.
01781       APInt LHSKnownZero, LHSKnownOne;
01782       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
01783 
01784       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
01785         // If all of the bits are known zero on the LHS or RHS, the add won't
01786         // carry.
01787         if (FrameIndexSDNode *FI =
01788               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01789           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01790           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01791         } else {
01792           Base = N.getOperand(0);
01793         }
01794         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
01795         return true;
01796       }
01797     }
01798   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
01799     // Loading from a constant address.
01800 
01801     // If this address fits entirely in a 16-bit sext immediate field, codegen
01802     // this as "d, 0"
01803     short Imm;
01804     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
01805       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
01806       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01807                              CN->getValueType(0));
01808       return true;
01809     }
01810 
01811     // Handle 32-bit sext immediates with LIS + addr mode.
01812     if ((CN->getValueType(0) == MVT::i32 ||
01813          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
01814         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
01815       int Addr = (int)CN->getZExtValue();
01816 
01817       // Otherwise, break this down into an LIS + disp.
01818       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
01819 
01820       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
01821                                    MVT::i32);
01822       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
01823       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
01824       return true;
01825     }
01826   }
01827 
01828   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
01829   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
01830     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01831     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01832   } else
01833     Base = N;
01834   return true;      // [r+0]
01835 }
01836 
01837 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
01838 /// represented as an indexed [r+r] operation.
01839 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
01840                                                 SDValue &Index,
01841                                                 SelectionDAG &DAG) const {
01842   // Check to see if we can easily represent this as an [r+r] address.  This
01843   // will fail if it thinks that the address is more profitably represented as
01844   // reg+imm, e.g. where imm = 0.
01845   if (SelectAddressRegReg(N, Base, Index, DAG))
01846     return true;
01847 
01848   // If the operand is an addition, always emit this as [r+r], since this is
01849   // better (for code size, and execution, as the memop does the add for free)
01850   // than emitting an explicit add.
01851   if (N.getOpcode() == ISD::ADD) {
01852     Base = N.getOperand(0);
01853     Index = N.getOperand(1);
01854     return true;
01855   }
01856 
01857   // Otherwise, do it the hard way, using R0 as the base register.
01858   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01859                          N.getValueType());
01860   Index = N;
01861   return true;
01862 }
01863 
01864 /// getPreIndexedAddressParts - returns true by value, base pointer and
01865 /// offset pointer and addressing mode by reference if the node's address
01866 /// can be legally represented as pre-indexed load / store address.
01867 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
01868                                                   SDValue &Offset,
01869                                                   ISD::MemIndexedMode &AM,
01870                                                   SelectionDAG &DAG) const {
01871   if (DisablePPCPreinc) return false;
01872 
01873   bool isLoad = true;
01874   SDValue Ptr;
01875   EVT VT;
01876   unsigned Alignment;
01877   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01878     Ptr = LD->getBasePtr();
01879     VT = LD->getMemoryVT();
01880     Alignment = LD->getAlignment();
01881   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
01882     Ptr = ST->getBasePtr();
01883     VT  = ST->getMemoryVT();
01884     Alignment = ST->getAlignment();
01885     isLoad = false;
01886   } else
01887     return false;
01888 
01889   // PowerPC doesn't have preinc load/store instructions for vectors (except
01890   // for QPX, which does have preinc r+r forms).
01891   if (VT.isVector()) {
01892     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
01893       return false;
01894     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
01895       AM = ISD::PRE_INC;
01896       return true;
01897     }
01898   }
01899 
01900   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
01901 
01902     // Common code will reject creating a pre-inc form if the base pointer
01903     // is a frame index, or if N is a store and the base pointer is either
01904     // the same as or a predecessor of the value being stored.  Check for
01905     // those situations here, and try with swapped Base/Offset instead.
01906     bool Swap = false;
01907 
01908     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
01909       Swap = true;
01910     else if (!isLoad) {
01911       SDValue Val = cast<StoreSDNode>(N)->getValue();
01912       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
01913         Swap = true;
01914     }
01915 
01916     if (Swap)
01917       std::swap(Base, Offset);
01918 
01919     AM = ISD::PRE_INC;
01920     return true;
01921   }
01922 
01923   // LDU/STU can only handle immediates that are a multiple of 4.
01924   if (VT != MVT::i64) {
01925     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
01926       return false;
01927   } else {
01928     // LDU/STU need an address with at least 4-byte alignment.
01929     if (Alignment < 4)
01930       return false;
01931 
01932     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
01933       return false;
01934   }
01935 
01936   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01937     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
01938     // sext i32 to i64 when addr mode is r+i.
01939     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
01940         LD->getExtensionType() == ISD::SEXTLOAD &&
01941         isa<ConstantSDNode>(Offset))
01942       return false;
01943   }
01944 
01945   AM = ISD::PRE_INC;
01946   return true;
01947 }
01948 
01949 //===----------------------------------------------------------------------===//
01950 //  LowerOperation implementation
01951 //===----------------------------------------------------------------------===//
01952 
01953 /// GetLabelAccessInfo - Return true if we should reference labels using a
01954 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
01955 static bool GetLabelAccessInfo(const TargetMachine &TM,
01956                                const PPCSubtarget &Subtarget,
01957                                unsigned &HiOpFlags, unsigned &LoOpFlags,
01958                                const GlobalValue *GV = nullptr) {
01959   HiOpFlags = PPCII::MO_HA;
01960   LoOpFlags = PPCII::MO_LO;
01961 
01962   // Don't use the pic base if not in PIC relocation model.
01963   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
01964 
01965   if (isPIC) {
01966     HiOpFlags |= PPCII::MO_PIC_FLAG;
01967     LoOpFlags |= PPCII::MO_PIC_FLAG;
01968   }
01969 
01970   // If this is a reference to a global value that requires a non-lazy-ptr, make
01971   // sure that instruction lowering adds it.
01972   if (GV && Subtarget.hasLazyResolverStub(GV)) {
01973     HiOpFlags |= PPCII::MO_NLP_FLAG;
01974     LoOpFlags |= PPCII::MO_NLP_FLAG;
01975 
01976     if (GV->hasHiddenVisibility()) {
01977       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01978       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01979     }
01980   }
01981 
01982   return isPIC;
01983 }
01984 
01985 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
01986                              SelectionDAG &DAG) {
01987   SDLoc DL(HiPart);
01988   EVT PtrVT = HiPart.getValueType();
01989   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
01990 
01991   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
01992   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
01993 
01994   // With PIC, the first instruction is actually "GR+hi(&G)".
01995   if (isPIC)
01996     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
01997                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
01998 
01999   // Generate non-pic code that has direct accesses to the constant pool.
02000   // The address of the global is just (hi(&g)+lo(&g)).
02001   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
02002 }
02003 
02004 static void setUsesTOCBasePtr(MachineFunction &MF) {
02005   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02006   FuncInfo->setUsesTOCBasePtr();
02007 }
02008 
02009 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
02010   setUsesTOCBasePtr(DAG.getMachineFunction());
02011 }
02012 
02013 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
02014                            SDValue GA) {
02015   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
02016   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
02017                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
02018 
02019   SDValue Ops[] = { GA, Reg };
02020   return DAG.getMemIntrinsicNode(
02021       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
02022       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
02023       false, 0);
02024 }
02025 
02026 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
02027                                              SelectionDAG &DAG) const {
02028   EVT PtrVT = Op.getValueType();
02029   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02030   const Constant *C = CP->getConstVal();
02031 
02032   // 64-bit SVR4 ABI code is always position-independent.
02033   // The actual address of the GlobalValue is stored in the TOC.
02034   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02035     setUsesTOCBasePtr(DAG);
02036     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
02037     return getTOCEntry(DAG, SDLoc(CP), true, GA);
02038   }
02039 
02040   unsigned MOHiFlag, MOLoFlag;
02041   bool isPIC =
02042       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
02043 
02044   if (isPIC && Subtarget.isSVR4ABI()) {
02045     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
02046                                            PPCII::MO_PIC_FLAG);
02047     return getTOCEntry(DAG, SDLoc(CP), false, GA);
02048   }
02049 
02050   SDValue CPIHi =
02051     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
02052   SDValue CPILo =
02053     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
02054   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
02055 }
02056 
02057 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
02058   EVT PtrVT = Op.getValueType();
02059   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
02060 
02061   // 64-bit SVR4 ABI code is always position-independent.
02062   // The actual address of the GlobalValue is stored in the TOC.
02063   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02064     setUsesTOCBasePtr(DAG);
02065     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
02066     return getTOCEntry(DAG, SDLoc(JT), true, GA);
02067   }
02068 
02069   unsigned MOHiFlag, MOLoFlag;
02070   bool isPIC =
02071       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
02072 
02073   if (isPIC && Subtarget.isSVR4ABI()) {
02074     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
02075                                         PPCII::MO_PIC_FLAG);
02076     return getTOCEntry(DAG, SDLoc(GA), false, GA);
02077   }
02078 
02079   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
02080   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
02081   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
02082 }
02083 
02084 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
02085                                              SelectionDAG &DAG) const {
02086   EVT PtrVT = Op.getValueType();
02087   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
02088   const BlockAddress *BA = BASDN->getBlockAddress();
02089 
02090   // 64-bit SVR4 ABI code is always position-independent.
02091   // The actual BlockAddress is stored in the TOC.
02092   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02093     setUsesTOCBasePtr(DAG);
02094     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
02095     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
02096   }
02097 
02098   unsigned MOHiFlag, MOLoFlag;
02099   bool isPIC =
02100       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
02101   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
02102   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
02103   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
02104 }
02105 
02106 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
02107                                               SelectionDAG &DAG) const {
02108 
02109   // FIXME: TLS addresses currently use medium model code sequences,
02110   // which is the most useful form.  Eventually support for small and
02111   // large models could be added if users need it, at the cost of
02112   // additional complexity.
02113   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
02114   if (DAG.getTarget().Options.EmulatedTLS)
02115     return LowerToTLSEmulatedModel(GA, DAG);
02116 
02117   SDLoc dl(GA);
02118   const GlobalValue *GV = GA->getGlobal();
02119   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02120   bool is64bit = Subtarget.isPPC64();
02121   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
02122   PICLevel::Level picLevel = M->getPICLevel();
02123 
02124   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
02125 
02126   if (Model == TLSModel::LocalExec) {
02127     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
02128                                                PPCII::MO_TPREL_HA);
02129     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
02130                                                PPCII::MO_TPREL_LO);
02131     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
02132                                      is64bit ? MVT::i64 : MVT::i32);
02133     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
02134     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
02135   }
02136 
02137   if (Model == TLSModel::InitialExec) {
02138     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
02139     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
02140                                                 PPCII::MO_TLS);
02141     SDValue GOTPtr;
02142     if (is64bit) {
02143       setUsesTOCBasePtr(DAG);
02144       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
02145       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
02146                            PtrVT, GOTReg, TGA);
02147     } else
02148       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
02149     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
02150                                    PtrVT, TGA, GOTPtr);
02151     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
02152   }
02153 
02154   if (Model == TLSModel::GeneralDynamic) {
02155     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
02156     SDValue GOTPtr;
02157     if (is64bit) {
02158       setUsesTOCBasePtr(DAG);
02159       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
02160       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
02161                                    GOTReg, TGA);
02162     } else {
02163       if (picLevel == PICLevel::Small)
02164         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
02165       else
02166         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
02167     }
02168     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
02169                        GOTPtr, TGA, TGA);
02170   }
02171 
02172   if (Model == TLSModel::LocalDynamic) {
02173     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
02174     SDValue GOTPtr;
02175     if (is64bit) {
02176       setUsesTOCBasePtr(DAG);
02177       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
02178       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
02179                            GOTReg, TGA);
02180     } else {
02181       if (picLevel == PICLevel::Small)
02182         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
02183       else
02184         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
02185     }
02186     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
02187                                   PtrVT, GOTPtr, TGA, TGA);
02188     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
02189                                       PtrVT, TLSAddr, TGA);
02190     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
02191   }
02192 
02193   llvm_unreachable("Unknown TLS model!");
02194 }
02195 
02196 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
02197                                               SelectionDAG &DAG) const {
02198   EVT PtrVT = Op.getValueType();
02199   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
02200   SDLoc DL(GSDN);
02201   const GlobalValue *GV = GSDN->getGlobal();
02202 
02203   // 64-bit SVR4 ABI code is always position-independent.
02204   // The actual address of the GlobalValue is stored in the TOC.
02205   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02206     setUsesTOCBasePtr(DAG);
02207     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
02208     return getTOCEntry(DAG, DL, true, GA);
02209   }
02210 
02211   unsigned MOHiFlag, MOLoFlag;
02212   bool isPIC =
02213       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
02214 
02215   if (isPIC && Subtarget.isSVR4ABI()) {
02216     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
02217                                             GSDN->getOffset(),
02218                                             PPCII::MO_PIC_FLAG);
02219     return getTOCEntry(DAG, DL, false, GA);
02220   }
02221 
02222   SDValue GAHi =
02223     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
02224   SDValue GALo =
02225     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
02226 
02227   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
02228 
02229   // If the global reference is actually to a non-lazy-pointer, we have to do an
02230   // extra load to get the address of the global.
02231   if (MOHiFlag & PPCII::MO_NLP_FLAG)
02232     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
02233                       false, false, false, 0);
02234   return Ptr;
02235 }
02236 
02237 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
02238   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02239   SDLoc dl(Op);
02240 
02241   if (Op.getValueType() == MVT::v2i64) {
02242     // When the operands themselves are v2i64 values, we need to do something
02243     // special because VSX has no underlying comparison operations for these.
02244     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
02245       // Equality can be handled by casting to the legal type for Altivec
02246       // comparisons, everything else needs to be expanded.
02247       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
02248         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
02249                  DAG.getSetCC(dl, MVT::v4i32,
02250                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
02251                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
02252                    CC));
02253       }
02254 
02255       return SDValue();
02256     }
02257 
02258     // We handle most of these in the usual way.
02259     return Op;
02260   }
02261 
02262   // If we're comparing for equality to zero, expose the fact that this is
02263   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
02264   // fold the new nodes.
02265   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
02266     if (C->isNullValue() && CC == ISD::SETEQ) {
02267       EVT VT = Op.getOperand(0).getValueType();
02268       SDValue Zext = Op.getOperand(0);
02269       if (VT.bitsLT(MVT::i32)) {
02270         VT = MVT::i32;
02271         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
02272       }
02273       unsigned Log2b = Log2_32(VT.getSizeInBits());
02274       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
02275       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
02276                                 DAG.getConstant(Log2b, dl, MVT::i32));
02277       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
02278     }
02279     // Leave comparisons against 0 and -1 alone for now, since they're usually
02280     // optimized.  FIXME: revisit this when we can custom lower all setcc
02281     // optimizations.
02282     if (C->isAllOnesValue() || C->isNullValue())
02283       return SDValue();
02284   }
02285 
02286   // If we have an integer seteq/setne, turn it into a compare against zero
02287   // by xor'ing the rhs with the lhs, which is faster than setting a
02288   // condition register, reading it back out, and masking the correct bit.  The
02289   // normal approach here uses sub to do this instead of xor.  Using xor exposes
02290   // the result to other bit-twiddling opportunities.
02291   EVT LHSVT = Op.getOperand(0).getValueType();
02292   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
02293     EVT VT = Op.getValueType();
02294     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
02295                                 Op.getOperand(1));
02296     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
02297   }
02298   return SDValue();
02299 }
02300 
02301 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
02302                                       const PPCSubtarget &Subtarget) const {
02303   SDNode *Node = Op.getNode();
02304   EVT VT = Node->getValueType(0);
02305   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
02306   SDValue InChain = Node->getOperand(0);
02307   SDValue VAListPtr = Node->getOperand(1);
02308   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
02309   SDLoc dl(Node);
02310 
02311   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
02312 
02313   // gpr_index
02314   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02315                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
02316                                     false, false, false, 0);
02317   InChain = GprIndex.getValue(1);
02318 
02319   if (VT == MVT::i64) {
02320     // Check if GprIndex is even
02321     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
02322                                  DAG.getConstant(1, dl, MVT::i32));
02323     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
02324                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
02325     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
02326                                           DAG.getConstant(1, dl, MVT::i32));
02327     // Align GprIndex to be even if it isn't
02328     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
02329                            GprIndex);
02330   }
02331 
02332   // fpr index is 1 byte after gpr
02333   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02334                                DAG.getConstant(1, dl, MVT::i32));
02335 
02336   // fpr
02337   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02338                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
02339                                     false, false, false, 0);
02340   InChain = FprIndex.getValue(1);
02341 
02342   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02343                                        DAG.getConstant(8, dl, MVT::i32));
02344 
02345   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02346                                         DAG.getConstant(4, dl, MVT::i32));
02347 
02348   // areas
02349   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
02350                                      MachinePointerInfo(), false, false,
02351                                      false, 0);
02352   InChain = OverflowArea.getValue(1);
02353 
02354   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
02355                                     MachinePointerInfo(), false, false,
02356                                     false, 0);
02357   InChain = RegSaveArea.getValue(1);
02358 
02359   // select overflow_area if index > 8
02360   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
02361                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
02362 
02363   // adjustment constant gpr_index * 4/8
02364   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
02365                                     VT.isInteger() ? GprIndex : FprIndex,
02366                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
02367                                                     MVT::i32));
02368 
02369   // OurReg = RegSaveArea + RegConstant
02370   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
02371                                RegConstant);
02372 
02373   // Floating types are 32 bytes into RegSaveArea
02374   if (VT.isFloatingPoint())
02375     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
02376                          DAG.getConstant(32, dl, MVT::i32));
02377 
02378   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
02379   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
02380                                    VT.isInteger() ? GprIndex : FprIndex,
02381                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
02382                                                    MVT::i32));
02383 
02384   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
02385                               VT.isInteger() ? VAListPtr : FprPtr,
02386                               MachinePointerInfo(SV),
02387                               MVT::i8, false, false, 0);
02388 
02389   // determine if we should load from reg_save_area or overflow_area
02390   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
02391 
02392   // increase overflow_area by 4/8 if gpr/fpr > 8
02393   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
02394                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
02395                                           dl, MVT::i32));
02396 
02397   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
02398                              OverflowAreaPlusN);
02399 
02400   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
02401                               OverflowAreaPtr,
02402                               MachinePointerInfo(),
02403                               MVT::i32, false, false, 0);
02404 
02405   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
02406                      false, false, false, 0);
02407 }
02408 
02409 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
02410                                        const PPCSubtarget &Subtarget) const {
02411   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
02412 
02413   // We have to copy the entire va_list struct:
02414   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
02415   return DAG.getMemcpy(Op.getOperand(0), Op,
02416                        Op.getOperand(1), Op.getOperand(2),
02417                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
02418                        false, MachinePointerInfo(), MachinePointerInfo());
02419 }
02420 
02421 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
02422                                                   SelectionDAG &DAG) const {
02423   return Op.getOperand(0);
02424 }
02425 
02426 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
02427                                                 SelectionDAG &DAG) const {
02428   SDValue Chain = Op.getOperand(0);
02429   SDValue Trmp = Op.getOperand(1); // trampoline
02430   SDValue FPtr = Op.getOperand(2); // nested function
02431   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
02432   SDLoc dl(Op);
02433 
02434   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
02435   bool isPPC64 = (PtrVT == MVT::i64);
02436   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
02437 
02438   TargetLowering::ArgListTy Args;
02439   TargetLowering::ArgListEntry Entry;
02440 
02441   Entry.Ty = IntPtrTy;
02442   Entry.Node = Trmp; Args.push_back(Entry);
02443 
02444   // TrampSize == (isPPC64 ? 48 : 40);
02445   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
02446                                isPPC64 ? MVT::i64 : MVT::i32);
02447   Args.push_back(Entry);
02448 
02449   Entry.Node = FPtr; Args.push_back(Entry);
02450   Entry.Node = Nest; Args.push_back(Entry);
02451 
02452   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
02453   TargetLowering::CallLoweringInfo CLI(DAG);
02454   CLI.setDebugLoc(dl).setChain(Chain)
02455     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
02456                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
02457                std::move(Args), 0);
02458 
02459   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02460   return CallResult.second;
02461 }
02462 
02463 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
02464                                         const PPCSubtarget &Subtarget) const {
02465   MachineFunction &MF = DAG.getMachineFunction();
02466   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02467 
02468   SDLoc dl(Op);
02469 
02470   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
02471     // vastart just stores the address of the VarArgsFrameIndex slot into the
02472     // memory location argument.
02473     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
02474     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02475     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02476     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02477                         MachinePointerInfo(SV),
02478                         false, false, 0);
02479   }
02480 
02481   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
02482   // We suppose the given va_list is already allocated.
02483   //
02484   // typedef struct {
02485   //  char gpr;     /* index into the array of 8 GPRs
02486   //                 * stored in the register save area
02487   //                 * gpr=0 corresponds to r3,
02488   //                 * gpr=1 to r4, etc.
02489   //                 */
02490   //  char fpr;     /* index into the array of 8 FPRs
02491   //                 * stored in the register save area
02492   //                 * fpr=0 corresponds to f1,
02493   //                 * fpr=1 to f2, etc.
02494   //                 */
02495   //  char *overflow_arg_area;
02496   //                /* location on stack that holds
02497   //                 * the next overflow argument
02498   //                 */
02499   //  char *reg_save_area;
02500   //               /* where r3:r10 and f1:f8 (if saved)
02501   //                * are stored
02502   //                */
02503   // } va_list[1];
02504 
02505   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
02506   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
02507 
02508   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
02509 
02510   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
02511                                             PtrVT);
02512   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
02513                                  PtrVT);
02514 
02515   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
02516   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
02517 
02518   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
02519   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
02520 
02521   uint64_t FPROffset = 1;
02522   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
02523 
02524   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02525 
02526   // Store first byte : number of int regs
02527   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
02528                                          Op.getOperand(1),
02529                                          MachinePointerInfo(SV),
02530                                          MVT::i8, false, false, 0);
02531   uint64_t nextOffset = FPROffset;
02532   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
02533                                   ConstFPROffset);
02534 
02535   // Store second byte : number of float regs
02536   SDValue secondStore =
02537     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
02538                       MachinePointerInfo(SV, nextOffset), MVT::i8,
02539                       false, false, 0);
02540   nextOffset += StackOffset;
02541   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
02542 
02543   // Store second word : arguments given on stack
02544   SDValue thirdStore =
02545     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
02546                  MachinePointerInfo(SV, nextOffset),
02547                  false, false, 0);
02548   nextOffset += FrameOffset;
02549   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
02550 
02551   // Store third word : arguments given in registers
02552   return DAG.getStore(thirdStore, dl, FR, nextPtr,
02553                       MachinePointerInfo(SV, nextOffset),
02554                       false, false, 0);
02555 
02556 }
02557 
02558 #include "PPCGenCallingConv.inc"
02559 
02560 // Function whose sole purpose is to kill compiler warnings
02561 // stemming from unused functions included from PPCGenCallingConv.inc.
02562 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
02563   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
02564 }
02565 
02566 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
02567                                       CCValAssign::LocInfo &LocInfo,
02568                                       ISD::ArgFlagsTy &ArgFlags,
02569                                       CCState &State) {
02570   return true;
02571 }
02572 
02573 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
02574                                              MVT &LocVT,
02575                                              CCValAssign::LocInfo &LocInfo,
02576                                              ISD::ArgFlagsTy &ArgFlags,
02577                                              CCState &State) {
02578   static const MCPhysReg ArgRegs[] = {
02579     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02580     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02581   };
02582   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02583 
02584   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02585 
02586   // Skip one register if the first unallocated register has an even register
02587   // number and there are still argument registers available which have not been
02588   // allocated yet. RegNum is actually an index into ArgRegs, which means we
02589   // need to skip a register if RegNum is odd.
02590   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
02591     State.AllocateReg(ArgRegs[RegNum]);
02592   }
02593 
02594   // Always return false here, as this function only makes sure that the first
02595   // unallocated register has an odd register number and does not actually
02596   // allocate a register for the current argument.
02597   return false;
02598 }
02599 
02600 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
02601                                                MVT &LocVT,
02602                                                CCValAssign::LocInfo &LocInfo,
02603                                                ISD::ArgFlagsTy &ArgFlags,
02604                                                CCState &State) {
02605   static const MCPhysReg ArgRegs[] = {
02606     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02607     PPC::F8
02608   };
02609 
02610   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02611 
02612   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02613 
02614   // If there is only one Floating-point register left we need to put both f64
02615   // values of a split ppc_fp128 value on the stack.
02616   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
02617     State.AllocateReg(ArgRegs[RegNum]);
02618   }
02619 
02620   // Always return false here, as this function only makes sure that the two f64
02621   // values a ppc_fp128 value is split into are both passed in registers or both
02622   // passed on the stack and does not actually allocate a register for the
02623   // current argument.
02624   return false;
02625 }
02626 
02627 /// FPR - The set of FP registers that should be allocated for arguments,
02628 /// on Darwin.
02629 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
02630                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
02631                                 PPC::F11, PPC::F12, PPC::F13};
02632 
02633 /// QFPR - The set of QPX registers that should be allocated for arguments.
02634 static const MCPhysReg QFPR[] = {
02635     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
02636     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
02637 
02638 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
02639 /// the stack.
02640 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
02641                                        unsigned PtrByteSize) {
02642   unsigned ArgSize = ArgVT.getStoreSize();
02643   if (Flags.isByVal())
02644     ArgSize = Flags.getByValSize();
02645 
02646   // Round up to multiples of the pointer size, except for array members,
02647   // which are always packed.
02648   if (!Flags.isInConsecutiveRegs())
02649     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02650 
02651   return ArgSize;
02652 }
02653 
02654 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
02655 /// on the stack.
02656 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
02657                                             ISD::ArgFlagsTy Flags,
02658                                             unsigned PtrByteSize) {
02659   unsigned Align = PtrByteSize;
02660 
02661   // Altivec parameters are padded to a 16 byte boundary.
02662   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02663       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02664       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
02665       ArgVT == MVT::v1i128)
02666     Align = 16;
02667   // QPX vector types stored in double-precision are padded to a 32 byte
02668   // boundary.
02669   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
02670     Align = 32;
02671 
02672   // ByVal parameters are aligned as requested.
02673   if (Flags.isByVal()) {
02674     unsigned BVAlign = Flags.getByValAlign();
02675     if (BVAlign > PtrByteSize) {
02676       if (BVAlign % PtrByteSize != 0)
02677           llvm_unreachable(
02678             "ByVal alignment is not a multiple of the pointer size");
02679 
02680       Align = BVAlign;
02681     }
02682   }
02683 
02684   // Array members are always packed to their original alignment.
02685   if (Flags.isInConsecutiveRegs()) {
02686     // If the array member was split into multiple registers, the first
02687     // needs to be aligned to the size of the full type.  (Except for
02688     // ppcf128, which is only aligned as its f64 components.)
02689     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
02690       Align = OrigVT.getStoreSize();
02691     else
02692       Align = ArgVT.getStoreSize();
02693   }
02694 
02695   return Align;
02696 }
02697 
02698 /// CalculateStackSlotUsed - Return whether this argument will use its
02699 /// stack slot (instead of being passed in registers).  ArgOffset,
02700 /// AvailableFPRs, and AvailableVRs must hold the current argument
02701 /// position, and will be updated to account for this argument.
02702 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
02703                                    ISD::ArgFlagsTy Flags,
02704                                    unsigned PtrByteSize,
02705                                    unsigned LinkageSize,
02706                                    unsigned ParamAreaSize,
02707                                    unsigned &ArgOffset,
02708                                    unsigned &AvailableFPRs,
02709                                    unsigned &AvailableVRs, bool HasQPX) {
02710   bool UseMemory = false;
02711 
02712   // Respect alignment of argument on the stack.
02713   unsigned Align =
02714     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
02715   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02716   // If there's no space left in the argument save area, we must
02717   // use memory (this check also catches zero-sized arguments).
02718   if (ArgOffset >= LinkageSize + ParamAreaSize)
02719     UseMemory = true;
02720 
02721   // Allocate argument on the stack.
02722   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
02723   if (Flags.isInConsecutiveRegsLast())
02724     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02725   // If we overran the argument save area, we must use memory
02726   // (this check catches arguments passed partially in memory)
02727   if (ArgOffset > LinkageSize + ParamAreaSize)
02728     UseMemory = true;
02729 
02730   // However, if the argument is actually passed in an FPR or a VR,
02731   // we don't use memory after all.
02732   if (!Flags.isByVal()) {
02733     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
02734         // QPX registers overlap with the scalar FP registers.
02735         (HasQPX && (ArgVT == MVT::v4f32 ||
02736                     ArgVT == MVT::v4f64 ||
02737                     ArgVT == MVT::v4i1)))
02738       if (AvailableFPRs > 0) {
02739         --AvailableFPRs;
02740         return false;
02741       }
02742     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02743         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02744         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
02745         ArgVT == MVT::v1i128)
02746       if (AvailableVRs > 0) {
02747         --AvailableVRs;
02748         return false;
02749       }
02750   }
02751 
02752   return UseMemory;
02753 }
02754 
02755 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
02756 /// ensure minimum alignment required for target.
02757 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
02758                                      unsigned NumBytes) {
02759   unsigned TargetAlign = Lowering->getStackAlignment();
02760   unsigned AlignMask = TargetAlign - 1;
02761   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
02762   return NumBytes;
02763 }
02764 
02765 SDValue
02766 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
02767                                         CallingConv::ID CallConv, bool isVarArg,
02768                                         const SmallVectorImpl<ISD::InputArg>
02769                                           &Ins,
02770                                         SDLoc dl, SelectionDAG &DAG,
02771                                         SmallVectorImpl<SDValue> &InVals)
02772                                           const {
02773   if (Subtarget.isSVR4ABI()) {
02774     if (Subtarget.isPPC64())
02775       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
02776                                          dl, DAG, InVals);
02777     else
02778       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
02779                                          dl, DAG, InVals);
02780   } else {
02781     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
02782                                        dl, DAG, InVals);
02783   }
02784 }
02785 
02786 SDValue
02787 PPCTargetLowering::LowerFormalArguments_32SVR4(
02788                                       SDValue Chain,
02789                                       CallingConv::ID CallConv, bool isVarArg,
02790                                       const SmallVectorImpl<ISD::InputArg>
02791                                         &Ins,
02792                                       SDLoc dl, SelectionDAG &DAG,
02793                                       SmallVectorImpl<SDValue> &InVals) const {
02794 
02795   // 32-bit SVR4 ABI Stack Frame Layout:
02796   //              +-----------------------------------+
02797   //        +-->  |            Back chain             |
02798   //        |     +-----------------------------------+
02799   //        |     | Floating-point register save area |
02800   //        |     +-----------------------------------+
02801   //        |     |    General register save area     |
02802   //        |     +-----------------------------------+
02803   //        |     |          CR save word             |
02804   //        |     +-----------------------------------+
02805   //        |     |         VRSAVE save word          |
02806   //        |     +-----------------------------------+
02807   //        |     |         Alignment padding         |
02808   //        |     +-----------------------------------+
02809   //        |     |     Vector register save area     |
02810   //        |     +-----------------------------------+
02811   //        |     |       Local variable space        |
02812   //        |     +-----------------------------------+
02813   //        |     |        Parameter list area        |
02814   //        |     +-----------------------------------+
02815   //        |     |           LR save word            |
02816   //        |     +-----------------------------------+
02817   // SP-->  +---  |            Back chain             |
02818   //              +-----------------------------------+
02819   //
02820   // Specifications:
02821   //   System V Application Binary Interface PowerPC Processor Supplement
02822   //   AltiVec Technology Programming Interface Manual
02823 
02824   MachineFunction &MF = DAG.getMachineFunction();
02825   MachineFrameInfo *MFI = MF.getFrameInfo();
02826   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02827 
02828   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
02829   // Potential tail calls could cause overwriting of argument stack slots.
02830   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02831                        (CallConv == CallingConv::Fast));
02832   unsigned PtrByteSize = 4;
02833 
02834   // Assign locations to all of the incoming arguments.
02835   SmallVector<CCValAssign, 16> ArgLocs;
02836   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
02837                  *DAG.getContext());
02838 
02839   // Reserve space for the linkage area on the stack.
02840   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
02841   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
02842 
02843   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
02844 
02845   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02846     CCValAssign &VA = ArgLocs[i];
02847 
02848     // Arguments stored in registers.
02849     if (VA.isRegLoc()) {
02850       const TargetRegisterClass *RC;
02851       EVT ValVT = VA.getValVT();
02852 
02853       switch (ValVT.getSimpleVT().SimpleTy) {
02854         default:
02855           llvm_unreachable("ValVT not supported by formal arguments Lowering");
02856         case MVT::i1:
02857         case MVT::i32:
02858           RC = &PPC::GPRCRegClass;
02859           break;
02860         case MVT::f32:
02861           if (Subtarget.hasP8Vector())
02862             RC = &PPC::VSSRCRegClass;
02863           else
02864             RC = &PPC::F4RCRegClass;
02865           break;
02866         case MVT::f64:
02867           if (Subtarget.hasVSX())
02868             RC = &PPC::VSFRCRegClass;
02869           else
02870             RC = &PPC::F8RCRegClass;
02871           break;
02872         case MVT::v16i8:
02873         case MVT::v8i16:
02874         case MVT::v4i32:
02875           RC = &PPC::VRRCRegClass;
02876           break;
02877         case MVT::v4f32:
02878           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
02879           break;
02880         case MVT::v2f64:
02881         case MVT::v2i64:
02882           RC = &PPC::VSHRCRegClass;
02883           break;
02884         case MVT::v4f64:
02885           RC = &PPC::QFRCRegClass;
02886           break;
02887         case MVT::v4i1:
02888           RC = &PPC::QBRCRegClass;
02889           break;
02890       }
02891 
02892       // Transform the arguments stored in physical registers into virtual ones.
02893       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02894       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
02895                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
02896 
02897       if (ValVT == MVT::i1)
02898         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
02899 
02900       InVals.push_back(ArgValue);
02901     } else {
02902       // Argument stored in memory.
02903       assert(VA.isMemLoc());
02904 
02905       unsigned ArgSize = VA.getLocVT().getStoreSize();
02906       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
02907                                       isImmutable);
02908 
02909       // Create load nodes to retrieve arguments from the stack.
02910       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02911       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
02912                                    MachinePointerInfo(),
02913                                    false, false, false, 0));
02914     }
02915   }
02916 
02917   // Assign locations to all of the incoming aggregate by value arguments.
02918   // Aggregates passed by value are stored in the local variable space of the
02919   // caller's stack frame, right above the parameter list area.
02920   SmallVector<CCValAssign, 16> ByValArgLocs;
02921   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02922                       ByValArgLocs, *DAG.getContext());
02923 
02924   // Reserve stack space for the allocations in CCInfo.
02925   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
02926 
02927   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
02928 
02929   // Area that is at least reserved in the caller of this function.
02930   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
02931   MinReservedArea = std::max(MinReservedArea, LinkageSize);
02932 
02933   // Set the size that is at least reserved in caller of this function.  Tail
02934   // call optimized function's reserved stack space needs to be aligned so that
02935   // taking the difference between two stack areas will result in an aligned
02936   // stack.
02937   MinReservedArea =
02938       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
02939   FuncInfo->setMinReservedArea(MinReservedArea);
02940 
02941   SmallVector<SDValue, 8> MemOps;
02942 
02943   // If the function takes variable number of arguments, make a frame index for
02944   // the start of the first vararg value... for expansion of llvm.va_start.
02945   if (isVarArg) {
02946     static const MCPhysReg GPArgRegs[] = {
02947       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02948       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02949     };
02950     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
02951 
02952     static const MCPhysReg FPArgRegs[] = {
02953       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02954       PPC::F8
02955     };
02956     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
02957 
02958     if (Subtarget.useSoftFloat())
02959        NumFPArgRegs = 0;
02960 
02961     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
02962     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
02963 
02964     // Make room for NumGPArgRegs and NumFPArgRegs.
02965     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
02966                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
02967 
02968     FuncInfo->setVarArgsStackOffset(
02969       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
02970                              CCInfo.getNextStackOffset(), true));
02971 
02972     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
02973     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02974 
02975     // The fixed integer arguments of a variadic function are stored to the
02976     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
02977     // the result of va_next.
02978     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
02979       // Get an existing live-in vreg, or add a new one.
02980       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
02981       if (!VReg)
02982         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
02983 
02984       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02985       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02986                                    MachinePointerInfo(), false, false, 0);
02987       MemOps.push_back(Store);
02988       // Increment the address by four for the next argument to store
02989       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
02990       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02991     }
02992 
02993     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
02994     // is set.
02995     // The double arguments are stored to the VarArgsFrameIndex
02996     // on the stack.
02997     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
02998       // Get an existing live-in vreg, or add a new one.
02999       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
03000       if (!VReg)
03001         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
03002 
03003       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
03004       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03005                                    MachinePointerInfo(), false, false, 0);
03006       MemOps.push_back(Store);
03007       // Increment the address by eight for the next argument to store
03008       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
03009                                          PtrVT);
03010       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03011     }
03012   }
03013 
03014   if (!MemOps.empty())
03015     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03016 
03017   return Chain;
03018 }
03019 
03020 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03021 // value to MVT::i64 and then truncate to the correct register size.
03022 SDValue
03023 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
03024                                      SelectionDAG &DAG, SDValue ArgVal,
03025                                      SDLoc dl) const {
03026   if (Flags.isSExt())
03027     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
03028                          DAG.getValueType(ObjectVT));
03029   else if (Flags.isZExt())
03030     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
03031                          DAG.getValueType(ObjectVT));
03032 
03033   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
03034 }
03035 
03036 SDValue
03037 PPCTargetLowering::LowerFormalArguments_64SVR4(
03038                                       SDValue Chain,
03039                                       CallingConv::ID CallConv, bool isVarArg,
03040                                       const SmallVectorImpl<ISD::InputArg>
03041                                         &Ins,
03042                                       SDLoc dl, SelectionDAG &DAG,
03043                                       SmallVectorImpl<SDValue> &InVals) const {
03044   // TODO: add description of PPC stack frame format, or at least some docs.
03045   //
03046   bool isELFv2ABI = Subtarget.isELFv2ABI();
03047   bool isLittleEndian = Subtarget.isLittleEndian();
03048   MachineFunction &MF = DAG.getMachineFunction();
03049   MachineFrameInfo *MFI = MF.getFrameInfo();
03050   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
03051 
03052   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
03053          "fastcc not supported on varargs functions");
03054 
03055   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
03056   // Potential tail calls could cause overwriting of argument stack slots.
03057   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
03058                        (CallConv == CallingConv::Fast));
03059   unsigned PtrByteSize = 8;
03060   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
03061 
03062   static const MCPhysReg GPR[] = {
03063     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
03064     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
03065   };
03066   static const MCPhysReg VR[] = {
03067     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
03068     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
03069   };
03070   static const MCPhysReg VSRH[] = {
03071     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
03072     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
03073   };
03074 
03075   const unsigned Num_GPR_Regs = array_lengthof(GPR);
03076   const unsigned Num_FPR_Regs = 13;
03077   const unsigned Num_VR_Regs  = array_lengthof(VR);
03078   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
03079 
03080   // Do a first pass over the arguments to determine whether the ABI
03081   // guarantees that our caller has allocated the parameter save area
03082   // on its stack frame.  In the ELFv1 ABI, this is always the case;
03083   // in the ELFv2 ABI, it is true if this is a vararg function or if
03084   // any parameter is located in a stack slot.
03085 
03086   bool HasParameterArea = !isELFv2ABI || isVarArg;
03087   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
03088   unsigned NumBytes = LinkageSize;
03089   unsigned AvailableFPRs = Num_FPR_Regs;
03090   unsigned AvailableVRs = Num_VR_Regs;
03091   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03092     if (Ins[i].Flags.isNest())
03093       continue;
03094 
03095     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
03096                                PtrByteSize, LinkageSize, ParamAreaSize,
03097                                NumBytes, AvailableFPRs, AvailableVRs,
03098                                Subtarget.hasQPX()))
03099       HasParameterArea = true;
03100   }
03101 
03102   // Add DAG nodes to load the arguments or copy them out of registers.  On
03103   // entry to a function on PPC, the arguments start after the linkage area,
03104   // although the first ones are often in registers.
03105 
03106   unsigned ArgOffset = LinkageSize;
03107   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
03108   unsigned &QFPR_idx = FPR_idx;
03109   SmallVector<SDValue, 8> MemOps;
03110   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
03111   unsigned CurArgIdx = 0;
03112   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
03113     SDValue ArgVal;
03114     bool needsLoad = false;
03115     EVT ObjectVT = Ins[ArgNo].VT;
03116     EVT OrigVT = Ins[ArgNo].ArgVT;
03117     unsigned ObjSize = ObjectVT.getStoreSize();
03118     unsigned ArgSize = ObjSize;
03119     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03120     if (Ins[ArgNo].isOrigArg()) {
03121       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
03122       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
03123     }
03124     // We re-align the argument offset for each argument, except when using the
03125     // fast calling convention, when we need to make sure we do that only when
03126     // we'll actually use a stack slot.
03127     unsigned CurArgOffset, Align;
03128     auto ComputeArgOffset = [&]() {
03129       /* Respect alignment of argument on the stack.  */
03130       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
03131       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
03132       CurArgOffset = ArgOffset;
03133     };
03134 
03135     if (CallConv != CallingConv::Fast) {
03136       ComputeArgOffset();
03137 
03138       /* Compute GPR index associated with argument offset.  */
03139       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
03140       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
03141     }
03142 
03143     // FIXME the codegen can be much improved in some cases.
03144     // We do not have to keep everything in memory.
03145     if (Flags.isByVal()) {
03146       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
03147 
03148       if (CallConv == CallingConv::Fast)
03149         ComputeArgOffset();
03150 
03151       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
03152       ObjSize = Flags.getByValSize();
03153       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03154       // Empty aggregate parameters do not take up registers.  Examples:
03155       //   struct { } a;
03156       //   union  { } b;
03157       //   int c[0];
03158       // etc.  However, we have to provide a place-holder in InVals, so
03159       // pretend we have an 8-byte item at the current address for that
03160       // purpose.
03161       if (!ObjSize) {
03162         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
03163         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03164         InVals.push_back(FIN);
03165         continue;
03166       }
03167 
03168       // Create a stack object covering all stack doublewords occupied
03169       // by the argument.  If the argument is (fully or partially) on
03170       // the stack, or if the argument is fully in registers but the
03171       // caller has allocated the parameter save anyway, we can refer
03172       // directly to the caller's stack frame.  Otherwise, create a
03173       // local copy in our own frame.
03174       int FI;
03175       if (HasParameterArea ||
03176           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
03177         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
03178       else
03179         FI = MFI->CreateStackObject(ArgSize, Align, false);
03180       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03181 
03182       // Handle aggregates smaller than 8 bytes.
03183       if (ObjSize < PtrByteSize) {
03184         // The value of the object is its address, which differs from the
03185         // address of the enclosing doubleword on big-endian systems.
03186         SDValue Arg = FIN;
03187         if (!isLittleEndian) {
03188           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
03189           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
03190         }
03191         InVals.push_back(Arg);
03192 
03193         if (GPR_idx != Num_GPR_Regs) {
03194           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03195           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03196           SDValue Store;
03197 
03198           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
03199             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
03200                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
03201             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
03202                                       MachinePointerInfo(&*FuncArg), ObjType,
03203                                       false, false, 0);
03204           } else {
03205             // For sizes that don't fit a truncating store (3, 5, 6, 7),
03206             // store the whole register as-is to the parameter save area
03207             // slot.
03208             Store =
03209                 DAG.getStore(Val.getValue(1), dl, Val, FIN,
03210                              MachinePointerInfo(&*FuncArg), false, false, 0);
03211           }
03212 
03213           MemOps.push_back(Store);
03214         }
03215         // Whether we copied from a register or not, advance the offset
03216         // into the parameter save area by a full doubleword.
03217         ArgOffset += PtrByteSize;
03218         continue;
03219       }
03220 
03221       // The value of the object is its address, which is the address of
03222       // its first stack doubleword.
03223       InVals.push_back(FIN);
03224 
03225       // Store whatever pieces of the object are in registers to memory.
03226       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03227         if (GPR_idx == Num_GPR_Regs)
03228           break;
03229 
03230         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03231         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03232         SDValue Addr = FIN;
03233         if (j) {
03234           SDValue Off = DAG.getConstant(j, dl, PtrVT);
03235           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
03236         }
03237         SDValue Store =
03238             DAG.getStore(Val.getValue(1), dl, Val, Addr,
03239                          MachinePointerInfo(&*FuncArg, j), false, false, 0);
03240         MemOps.push_back(Store);
03241         ++GPR_idx;
03242       }
03243       ArgOffset += ArgSize;
03244       continue;
03245     }
03246 
03247     switch (ObjectVT.getSimpleVT().SimpleTy) {
03248     default: llvm_unreachable("Unhandled argument type!");
03249     case MVT::i1:
03250     case MVT::i32:
03251     case MVT::i64:
03252       if (Flags.isNest()) {
03253         // The 'nest' parameter, if any, is passed in R11.
03254         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
03255         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03256 
03257         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03258           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03259 
03260         break;
03261       }
03262 
03263       // These can be scalar arguments or elements of an integer array type
03264       // passed directly.  Clang may use those instead of "byval" aggregate
03265       // types to avoid forcing arguments to memory unnecessarily.
03266       if (GPR_idx != Num_GPR_Regs) {
03267         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03268         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03269 
03270         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03271           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03272           // value to MVT::i64 and then truncate to the correct register size.
03273           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03274       } else {
03275         if (CallConv == CallingConv::Fast)
03276           ComputeArgOffset();
03277 
03278         needsLoad = true;
03279         ArgSize = PtrByteSize;
03280       }
03281       if (CallConv != CallingConv::Fast || needsLoad)
03282         ArgOffset += 8;
03283       break;
03284 
03285     case MVT::f32:
03286     case MVT::f64:
03287       // These can be scalar arguments or elements of a float array type
03288       // passed directly.  The latter are used to implement ELFv2 homogenous
03289       // float aggregates.
03290       if (FPR_idx != Num_FPR_Regs) {
03291         unsigned VReg;
03292 
03293         if (ObjectVT == MVT::f32)
03294           VReg = MF.addLiveIn(FPR[FPR_idx],
03295                               Subtarget.hasP8Vector()
03296                                   ? &PPC::VSSRCRegClass
03297                                   : &PPC::F4RCRegClass);
03298         else
03299           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
03300                                                 ? &PPC::VSFRCRegClass
03301                                                 : &PPC::F8RCRegClass);
03302 
03303         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03304         ++FPR_idx;
03305       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
03306         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
03307         // once we support fp <-> gpr moves.
03308 
03309         // This can only ever happen in the presence of f32 array types,
03310         // since otherwise we never run out of FPRs before running out
03311         // of GPRs.
03312         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03313         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03314 
03315         if (ObjectVT == MVT::f32) {
03316           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
03317             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
03318                                  DAG.getConstant(32, dl, MVT::i32));
03319           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
03320         }
03321 
03322         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
03323       } else {
03324         if (CallConv == CallingConv::Fast)
03325           ComputeArgOffset();
03326 
03327         needsLoad = true;
03328       }
03329 
03330       // When passing an array of floats, the array occupies consecutive
03331       // space in the argument area; only round up to the next doubleword
03332       // at the end of the array.  Otherwise, each float takes 8 bytes.
03333       if (CallConv != CallingConv::Fast || needsLoad) {
03334         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
03335         ArgOffset += ArgSize;
03336         if (Flags.isInConsecutiveRegsLast())
03337           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03338       }
03339       break;
03340     case MVT::v4f32:
03341     case MVT::v4i32:
03342     case MVT::v8i16:
03343     case MVT::v16i8:
03344     case MVT::v2f64:
03345     case MVT::v2i64:
03346     case MVT::v1i128:
03347       if (!Subtarget.hasQPX()) {
03348       // These can be scalar arguments or elements of a vector array type
03349       // passed directly.  The latter are used to implement ELFv2 homogenous
03350       // vector aggregates.
03351       if (VR_idx != Num_VR_Regs) {
03352         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
03353                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
03354                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03355         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03356         ++VR_idx;
03357       } else {
03358         if (CallConv == CallingConv::Fast)
03359           ComputeArgOffset();
03360 
03361         needsLoad = true;
03362       }
03363       if (CallConv != CallingConv::Fast || needsLoad)
03364         ArgOffset += 16;
03365       break;
03366       } // not QPX
03367 
03368       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
03369              "Invalid QPX parameter type");
03370       /* fall through */
03371 
03372     case MVT::v4f64:
03373     case MVT::v4i1:
03374       // QPX vectors are treated like their scalar floating-point subregisters
03375       // (except that they're larger).
03376       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
03377       if (QFPR_idx != Num_QFPR_Regs) {
03378         const TargetRegisterClass *RC;
03379         switch (ObjectVT.getSimpleVT().SimpleTy) {
03380         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
03381         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
03382         default:         RC = &PPC::QBRCRegClass; break;
03383         }
03384 
03385         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
03386         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03387         ++QFPR_idx;
03388       } else {
03389         if (CallConv == CallingConv::Fast)
03390           ComputeArgOffset();
03391         needsLoad = true;
03392       }
03393       if (CallConv != CallingConv::Fast || needsLoad)
03394         ArgOffset += Sz;
03395       break;
03396     }
03397 
03398     // We need to load the argument to a virtual register if we determined
03399     // above that we ran out of physical registers of the appropriate type.
03400     if (needsLoad) {
03401       if (ObjSize < ArgSize && !isLittleEndian)
03402         CurArgOffset += ArgSize - ObjSize;
03403       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
03404       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03405       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03406                            false, false, false, 0);
03407     }
03408 
03409     InVals.push_back(ArgVal);
03410   }
03411 
03412   // Area that is at least reserved in the caller of this function.
03413   unsigned MinReservedArea;
03414   if (HasParameterArea)
03415     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
03416   else
03417     MinReservedArea = LinkageSize;
03418 
03419   // Set the size that is at least reserved in caller of this function.  Tail
03420   // call optimized functions' reserved stack space needs to be aligned so that
03421   // taking the difference between two stack areas will result in an aligned
03422   // stack.
03423   MinReservedArea =
03424       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03425   FuncInfo->setMinReservedArea(MinReservedArea);
03426 
03427   // If the function takes variable number of arguments, make a frame index for
03428   // the start of the first vararg value... for expansion of llvm.va_start.
03429   if (isVarArg) {
03430     int Depth = ArgOffset;
03431 
03432     FuncInfo->setVarArgsFrameIndex(
03433       MFI->CreateFixedObject(PtrByteSize, Depth, true));
03434     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03435 
03436     // If this function is vararg, store any remaining integer argument regs
03437     // to their spots on the stack so that they may be loaded by deferencing the
03438     // result of va_next.
03439     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
03440          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
03441       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03442       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03443       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03444                                    MachinePointerInfo(), false, false, 0);
03445       MemOps.push_back(Store);
03446       // Increment the address by four for the next argument to store
03447       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
03448       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03449     }
03450   }
03451 
03452   if (!MemOps.empty())
03453     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03454 
03455   return Chain;
03456 }
03457 
03458 SDValue
03459 PPCTargetLowering::LowerFormalArguments_Darwin(
03460                                       SDValue Chain,
03461                                       CallingConv::ID CallConv, bool isVarArg,
03462                                       const SmallVectorImpl<ISD::InputArg>
03463                                         &Ins,
03464                                       SDLoc dl, SelectionDAG &DAG,
03465                                       SmallVectorImpl<SDValue> &InVals) const {
03466   // TODO: add description of PPC stack frame format, or at least some docs.
03467   //
03468   MachineFunction &MF = DAG.getMachineFunction();
03469   MachineFrameInfo *MFI = MF.getFrameInfo();
03470   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
03471 
03472   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
03473   bool isPPC64 = PtrVT == MVT::i64;
03474   // Potential tail calls could cause overwriting of argument stack slots.
03475   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
03476                        (CallConv == CallingConv::Fast));
03477   unsigned PtrByteSize = isPPC64 ? 8 : 4;
03478   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
03479   unsigned ArgOffset = LinkageSize;
03480   // Area that is at least reserved in caller of this function.
03481   unsigned MinReservedArea = ArgOffset;
03482 
03483   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
03484     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
03485     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
03486   };
03487   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
03488     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
03489     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
03490   };
03491   static const MCPhysReg VR[] = {
03492     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
03493     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
03494   };
03495 
03496   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
03497   const unsigned Num_FPR_Regs = 13;
03498   const unsigned Num_VR_Regs  = array_lengthof( VR);
03499 
03500   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
03501 
03502   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
03503 
03504   // In 32-bit non-varargs functions, the stack space for vectors is after the
03505   // stack space for non-vectors.  We do not use this space unless we have
03506   // too many vectors to fit in registers, something that only occurs in
03507   // constructed examples:), but we have to walk the arglist to figure
03508   // that out...for the pathological case, compute VecArgOffset as the
03509   // start of the vector parameter area.  Computing VecArgOffset is the
03510   // entire point of the following loop.
03511   unsigned VecArgOffset = ArgOffset;
03512   if (!isVarArg && !isPPC64) {
03513     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
03514          ++ArgNo) {
03515       EVT ObjectVT = Ins[ArgNo].VT;
03516       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03517 
03518       if (Flags.isByVal()) {
03519         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
03520         unsigned ObjSize = Flags.getByValSize();
03521         unsigned ArgSize =
03522                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03523         VecArgOffset += ArgSize;
03524         continue;
03525       }
03526 
03527       switch(ObjectVT.getSimpleVT().SimpleTy) {
03528       default: llvm_unreachable("Unhandled argument type!");
03529       case MVT::i1:
03530       case MVT::i32:
03531       case MVT::f32:
03532         VecArgOffset += 4;
03533         break;
03534       case MVT::i64:  // PPC64
03535       case MVT::f64:
03536         // FIXME: We are guaranteed to be !isPPC64 at this point.
03537         // Does MVT::i64 apply?
03538         VecArgOffset += 8;
03539         break;
03540       case MVT::v4f32:
03541       case MVT::v4i32:
03542       case MVT::v8i16:
03543       case MVT::v16i8:
03544         // Nothing to do, we're only looking at Nonvector args here.
03545         break;
03546       }
03547     }
03548   }
03549   // We've found where the vector parameter area in memory is.  Skip the
03550   // first 12 parameters; these don't use that memory.
03551   VecArgOffset = ((VecArgOffset+15)/16)*16;
03552   VecArgOffset += 12*16;
03553 
03554   // Add DAG nodes to load the arguments or copy them out of registers.  On
03555   // entry to a function on PPC, the arguments start after the linkage area,
03556   // although the first ones are often in registers.
03557 
03558   SmallVector<SDValue, 8> MemOps;
03559   unsigned nAltivecParamsAtEnd = 0;
03560   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
03561   unsigned CurArgIdx = 0;
03562   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
03563     SDValue ArgVal;
03564     bool needsLoad = false;
03565     EVT ObjectVT = Ins[ArgNo].VT;
03566     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
03567     unsigned ArgSize = ObjSize;
03568     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03569     if (Ins[ArgNo].isOrigArg()) {
03570       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
03571       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
03572     }
03573     unsigned CurArgOffset = ArgOffset;
03574 
03575     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
03576     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
03577         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
03578       if (isVarArg || isPPC64) {
03579         MinReservedArea = ((MinReservedArea+15)/16)*16;
03580         MinReservedArea += CalculateStackSlotSize(ObjectVT,
03581                                                   Flags,
03582                                                   PtrByteSize);
03583       } else  nAltivecParamsAtEnd++;
03584     } else
03585       // Calculate min reserved area.
03586       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
03587                                                 Flags,
03588                                                 PtrByteSize);
03589 
03590     // FIXME the codegen can be much improved in some cases.
03591     // We do not have to keep everything in memory.
03592     if (Flags.isByVal()) {
03593       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
03594 
03595       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
03596       ObjSize = Flags.getByValSize();
03597       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03598       // Objects of size 1 and 2 are right justified, everything else is
03599       // left justified.  This means the memory address is adjusted forwards.
03600       if (ObjSize==1 || ObjSize==2) {
03601         CurArgOffset = CurArgOffset + (4 - ObjSize);
03602       }
03603       // The value of the object is its address.
03604       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
03605       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03606       InVals.push_back(FIN);
03607       if (ObjSize==1 || ObjSize==2) {
03608         if (GPR_idx != Num_GPR_Regs) {
03609           unsigned VReg;
03610           if (isPPC64)
03611             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03612           else
03613             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03614           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03615           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
03616           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
03617                                             MachinePointerInfo(&*FuncArg),
03618                                             ObjType, false, false, 0);
03619           MemOps.push_back(Store);
03620           ++GPR_idx;
03621         }
03622 
03623         ArgOffset += PtrByteSize;
03624 
03625         continue;
03626       }
03627       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03628         // Store whatever pieces of the object are in registers
03629         // to memory.  ArgOffset will be the address of the beginning
03630         // of the object.
03631         if (GPR_idx != Num_GPR_Regs) {
03632           unsigned VReg;
03633           if (isPPC64)
03634             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03635           else
03636             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03637           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
03638           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03639           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03640           SDValue Store =
03641               DAG.getStore(Val.getValue(1), dl, Val, FIN,
03642                            MachinePointerInfo(&*FuncArg, j), false, false, 0);
03643           MemOps.push_back(Store);
03644           ++GPR_idx;
03645           ArgOffset += PtrByteSize;
03646         } else {
03647           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
03648           break;
03649         }
03650       }
03651       continue;
03652     }
03653 
03654     switch (ObjectVT.getSimpleVT().SimpleTy) {
03655     default: llvm_unreachable("Unhandled argument type!");
03656     case MVT::i1:
03657     case MVT::i32:
03658       if (!isPPC64) {
03659         if (GPR_idx != Num_GPR_Regs) {
03660           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03661           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
03662 
03663           if (ObjectVT == MVT::i1)
03664             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
03665 
03666           ++GPR_idx;
03667         } else {
03668           needsLoad = true;
03669           ArgSize = PtrByteSize;
03670         }
03671         // All int arguments reserve stack space in the Darwin ABI.
03672         ArgOffset += PtrByteSize;
03673         break;
03674       }
03675       // FALLTHROUGH
03676     case MVT::i64:  // PPC64
03677       if (GPR_idx != Num_GPR_Regs) {
03678         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03679         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03680 
03681         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03682           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03683           // value to MVT::i64 and then truncate to the correct register size.
03684           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03685 
03686         ++GPR_idx;
03687       } else {
03688         needsLoad = true;
03689         ArgSize = PtrByteSize;
03690       }
03691       // All int arguments reserve stack space in the Darwin ABI.
03692       ArgOffset += 8;
03693       break;
03694 
03695     case MVT::f32:
03696     case MVT::f64:
03697       // Every 4 bytes of argument space consumes one of the GPRs available for
03698       // argument passing.
03699       if (GPR_idx != Num_GPR_Regs) {
03700         ++GPR_idx;
03701         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
03702           ++GPR_idx;
03703       }
03704       if (FPR_idx != Num_FPR_Regs) {
03705         unsigned VReg;
03706 
03707         if (ObjectVT == MVT::f32)
03708           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03709         else
03710           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
03711 
03712         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03713         ++FPR_idx;
03714       } else {
03715         needsLoad = true;
03716       }
03717 
03718       // All FP arguments reserve stack space in the Darwin ABI.
03719       ArgOffset += isPPC64 ? 8 : ObjSize;
03720       break;
03721     case MVT::v4f32:
03722     case MVT::v4i32:
03723     case MVT::v8i16:
03724     case MVT::v16i8:
03725       // Note that vector arguments in registers don't reserve stack space,
03726       // except in varargs functions.
03727       if (VR_idx != Num_VR_Regs) {
03728         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03729         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03730         if (isVarArg) {
03731           while ((ArgOffset % 16) != 0) {
03732             ArgOffset += PtrByteSize;
03733             if (GPR_idx != Num_GPR_Regs)
03734               GPR_idx++;
03735           }
03736           ArgOffset += 16;
03737           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
03738         }
03739         ++VR_idx;
03740       } else {
03741         if (!isVarArg && !isPPC64) {
03742           // Vectors go after all the nonvectors.
03743           CurArgOffset = VecArgOffset;
03744           VecArgOffset += 16;
03745         } else {
03746           // Vectors are aligned.
03747           ArgOffset = ((ArgOffset+15)/16)*16;
03748           CurArgOffset = ArgOffset;
03749           ArgOffset += 16;
03750         }
03751         needsLoad = true;
03752       }
03753       break;
03754     }
03755 
03756     // We need to load the argument to a virtual register if we determined above
03757     // that we ran out of physical registers of the appropriate type.
03758     if (needsLoad) {
03759       int FI = MFI->CreateFixedObject(ObjSize,
03760                                       CurArgOffset + (ArgSize - ObjSize),
03761                                       isImmutable);
03762       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03763       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03764                            false, false, false, 0);
03765     }
03766 
03767     InVals.push_back(ArgVal);
03768   }
03769 
03770   // Allow for Altivec parameters at the end, if needed.
03771   if (nAltivecParamsAtEnd) {
03772     MinReservedArea = ((MinReservedArea+15)/16)*16;
03773     MinReservedArea += 16*nAltivecParamsAtEnd;
03774   }
03775 
03776   // Area that is at least reserved in the caller of this function.
03777   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
03778 
03779   // Set the size that is at least reserved in caller of this function.  Tail
03780   // call optimized functions' reserved stack space needs to be aligned so that
03781   // taking the difference between two stack areas will result in an aligned
03782   // stack.
03783   MinReservedArea =
03784       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03785   FuncInfo->setMinReservedArea(MinReservedArea);
03786 
03787   // If the function takes variable number of arguments, make a frame index for
03788   // the start of the first vararg value... for expansion of llvm.va_start.
03789   if (isVarArg) {
03790     int Depth = ArgOffset;
03791 
03792     FuncInfo->setVarArgsFrameIndex(
03793       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
03794                              Depth, true));
03795     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03796 
03797     // If this function is vararg, store any remaining integer argument regs
03798     // to their spots on the stack so that they may be loaded by deferencing the
03799     // result of va_next.
03800     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
03801       unsigned VReg;
03802 
03803       if (isPPC64)
03804         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03805       else
03806         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03807 
03808       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03809       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03810                                    MachinePointerInfo(), false, false, 0);
03811       MemOps.push_back(Store);
03812       // Increment the address by four for the next argument to store
03813       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
03814       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03815     }
03816   }
03817 
03818   if (!MemOps.empty())
03819     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03820 
03821   return Chain;
03822 }
03823 
03824 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
03825 /// adjusted to accommodate the arguments for the tailcall.
03826 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
03827                                    unsigned ParamSize) {
03828 
03829   if (!isTailCall) return 0;
03830 
03831   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
03832   unsigned CallerMinReservedArea = FI->getMinReservedArea();
03833   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
03834   // Remember only if the new adjustement is bigger.
03835   if (SPDiff < FI->getTailCallSPDelta())
03836     FI->setTailCallSPDelta(SPDiff);
03837 
03838   return SPDiff;
03839 }
03840 
03841 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03842 /// for tail call optimization. Targets which want to do tail call
03843 /// optimization should implement this function.
03844 bool
03845 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03846                                                      CallingConv::ID CalleeCC,
03847                                                      bool isVarArg,
03848                                       const SmallVectorImpl<ISD::InputArg> &Ins,
03849                                                      SelectionDAG& DAG) const {
03850   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
03851     return false;
03852 
03853   // Variable argument functions are not supported.
03854   if (isVarArg)
03855     return false;
03856 
03857   MachineFunction &MF = DAG.getMachineFunction();
03858   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
03859   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
03860     // Functions containing by val parameters are not supported.
03861     for (unsigned i = 0; i != Ins.size(); i++) {
03862        ISD::ArgFlagsTy Flags = Ins[i].Flags;
03863        if (Flags.isByVal()) return false;
03864     }
03865 
03866     // Non-PIC/GOT tail calls are supported.
03867     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
03868       return true;
03869 
03870     // At the moment we can only do local tail calls (in same module, hidden
03871     // or protected) if we are generating PIC.
03872     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
03873       return G->getGlobal()->hasHiddenVisibility()
03874           || G->getGlobal()->hasProtectedVisibility();
03875   }
03876 
03877   return false;
03878 }
03879 
03880 /// isCallCompatibleAddress - Return the immediate to use if the specified
03881 /// 32-bit value is representable in the immediate field of a BxA instruction.
03882 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
03883   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
03884   if (!C) return nullptr;
03885 
03886   int Addr = C->getZExtValue();
03887   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
03888       SignExtend32<26>(Addr) != Addr)
03889     return nullptr;  // Top 6 bits have to be sext of immediate.
03890 
03891   return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
03892                          DAG.getTargetLoweringInfo().getPointerTy(
03893                              DAG.getDataLayout())).getNode();
03894 }
03895 
03896 namespace {
03897 
03898 struct TailCallArgumentInfo {
03899   SDValue Arg;
03900   SDValue FrameIdxOp;
03901   int       FrameIdx;
03902 
03903   TailCallArgumentInfo() : FrameIdx(0) {}
03904 };
03905 }
03906 
03907 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
03908 static void
03909 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
03910                                            SDValue Chain,
03911                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
03912                    SmallVectorImpl<SDValue> &MemOpChains,
03913                    SDLoc dl) {
03914   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
03915     SDValue Arg = TailCallArgs[i].Arg;
03916     SDValue FIN = TailCallArgs[i].FrameIdxOp;
03917     int FI = TailCallArgs[i].FrameIdx;
03918     // Store relative to framepointer.
03919     MemOpChains.push_back(DAG.getStore(
03920         Chain, dl, Arg, FIN,
03921         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
03922         false, 0));
03923   }
03924 }
03925 
03926 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
03927 /// the appropriate stack slot for the tail call optimized function call.
03928 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
03929                                                MachineFunction &MF,
03930                                                SDValue Chain,
03931                                                SDValue OldRetAddr,
03932                                                SDValue OldFP,
03933                                                int SPDiff,
03934                                                bool isPPC64,
03935                                                bool isDarwinABI,
03936                                                SDLoc dl) {
03937   if (SPDiff) {
03938     // Calculate the new stack slot for the return address.
03939     int SlotSize = isPPC64 ? 8 : 4;
03940     const PPCFrameLowering *FL =
03941         MF.getSubtarget<PPCSubtarget>().getFrameLowering();
03942     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
03943     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03944                                                           NewRetAddrLoc, true);
03945     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03946     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
03947     Chain = DAG.getStore(
03948         Chain, dl, OldRetAddr, NewRetAddrFrIdx,
03949         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
03950         false, false, 0);
03951 
03952     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
03953     // slot as the FP is never overwritten.
03954     if (isDarwinABI) {
03955       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
03956       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
03957                                                           true);
03958       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
03959       Chain = DAG.getStore(
03960           Chain, dl, OldFP, NewFramePtrIdx,
03961           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
03962           false, false, 0);
03963     }
03964   }
03965   return Chain;
03966 }
03967 
03968 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
03969 /// the position of the argument.
03970 static void
03971 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
03972                          SDValue Arg, int SPDiff, unsigned ArgOffset,
03973                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
03974   int Offset = ArgOffset + SPDiff;
03975   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
03976   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03977   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03978   SDValue FIN = DAG.getFrameIndex(FI, VT);
03979   TailCallArgumentInfo Info;
03980   Info.Arg = Arg;
03981   Info.FrameIdxOp = FIN;
03982   Info.FrameIdx = FI;
03983   TailCallArguments.push_back(Info);
03984 }
03985 
03986 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
03987 /// stack slot. Returns the chain as result and the loaded frame pointers in
03988 /// LROpOut/FPOpout. Used when tail calling.
03989 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
03990                                                         int SPDiff,
03991                                                         SDValue Chain,
03992                                                         SDValue &LROpOut,
03993                                                         SDValue &FPOpOut,
03994                                                         bool isDarwinABI,
03995                                                         SDLoc dl) const {
03996   if (SPDiff) {
03997     // Load the LR and FP stack slot for later adjusting.
03998     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
03999     LROpOut = getReturnAddrFrameIndex(DAG);
04000     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
04001                           false, false, false, 0);
04002     Chain = SDValue(LROpOut.getNode(), 1);
04003 
04004     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
04005     // slot as the FP is never overwritten.
04006     if (isDarwinABI) {
04007       FPOpOut = getFramePointerFrameIndex(DAG);
04008       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
04009                             false, false, false, 0);
04010       Chain = SDValue(FPOpOut.getNode(), 1);
04011     }
04012   }
04013   return Chain;
04014 }
04015 
04016 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
04017 /// by "Src" to address "Dst" of size "Size".  Alignment information is
04018 /// specified by the specific parameter attribute. The copy will be passed as
04019 /// a byval function parameter.
04020 /// Sometimes what we are copying is the end of a larger object, the part that
04021 /// does not fit in registers.
04022 static SDValue
04023 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
04024                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
04025                           SDLoc dl) {
04026   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
04027   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
04028                        false, false, false, MachinePointerInfo(),
04029                        MachinePointerInfo());
04030 }
04031 
04032 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
04033 /// tail calls.
04034 static void
04035 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
04036                  SDValue Arg, SDValue PtrOff, int SPDiff,
04037                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
04038                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
04039                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
04040                  SDLoc dl) {
04041   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
04042   if (!isTailCall) {
04043     if (isVector) {
04044       SDValue StackPtr;
04045       if (isPPC64)
04046         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
04047       else
04048         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04049       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
04050                            DAG.getConstant(ArgOffset, dl, PtrVT));
04051     }
04052     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
04053                                        MachinePointerInfo(), false, false, 0));
04054   // Calculate and remember argument location.
04055   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
04056                                   TailCallArguments);
04057 }
04058 
04059 static
04060 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
04061                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
04062                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
04063                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
04064   MachineFunction &MF = DAG.getMachineFunction();
04065 
04066   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
04067   // might overwrite each other in case of tail call optimization.
04068   SmallVector<SDValue, 8> MemOpChains2;
04069   // Do not flag preceding copytoreg stuff together with the following stuff.
04070   InFlag = SDValue();
04071   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
04072                                     MemOpChains2, dl);
04073   if (!MemOpChains2.empty())
04074     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
04075 
04076   // Store the return address to the appropriate stack slot.
04077   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
04078                                         isPPC64, isDarwinABI, dl);
04079 
04080   // Emit callseq_end just before tailcall node.
04081   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
04082                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
04083   InFlag = Chain.getValue(1);
04084 }
04085 
04086 // Is this global address that of a function that can be called by name? (as
04087 // opposed to something that must hold a descriptor for an indirect call).
04088 static bool isFunctionGlobalAddress(SDValue Callee) {
04089   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
04090     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
04091         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
04092       return false;
04093 
04094     return G->getGlobal()->getValueType()->isFunctionTy();
04095   }
04096 
04097   return false;
04098 }
04099 
04100 static
04101 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
04102                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
04103                      bool isTailCall, bool IsPatchPoint, bool hasNest,
04104                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
04105                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
04106                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
04107 
04108   bool isPPC64 = Subtarget.isPPC64();
04109   bool isSVR4ABI = Subtarget.isSVR4ABI();
04110   bool isELFv2ABI = Subtarget.isELFv2ABI();
04111 
04112   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
04113   NodeTys.push_back(MVT::Other);   // Returns a chain
04114   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
04115 
04116   unsigned CallOpc = PPCISD::CALL;
04117 
04118   bool needIndirectCall = true;
04119   if (!isSVR4ABI || !isPPC64)
04120     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
04121       // If this is an absolute destination address, use the munged value.
04122       Callee = SDValue(Dest, 0);
04123       needIndirectCall = false;
04124     }
04125 
04126   if (isFunctionGlobalAddress(Callee)) {
04127     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
04128     // A call to a TLS address is actually an indirect call to a
04129     // thread-specific pointer.
04130     unsigned OpFlags = 0;
04131     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
04132          (Subtarget.getTargetTriple().isMacOSX() &&
04133           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
04134          !G->getGlobal()->isStrongDefinitionForLinker()) ||
04135         (Subtarget.isTargetELF() && !isPPC64 &&
04136          !G->getGlobal()->hasLocalLinkage() &&
04137          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
04138       // PC-relative references to external symbols should go through $stub,
04139       // unless we're building with the leopard linker or later, which
04140       // automatically synthesizes these stubs.
04141       OpFlags = PPCII::MO_PLT_OR_STUB;
04142     }
04143 
04144     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
04145     // every direct call is) turn it into a TargetGlobalAddress /
04146     // TargetExternalSymbol node so that legalize doesn't hack it.
04147     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
04148                                         Callee.getValueType(), 0, OpFlags);
04149     needIndirectCall = false;
04150   }
04151 
04152   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
04153     unsigned char OpFlags = 0;
04154 
04155     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
04156          (Subtarget.getTargetTriple().isMacOSX() &&
04157           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
04158         (Subtarget.isTargetELF() && !isPPC64 &&
04159          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
04160       // PC-relative references to external symbols should go through $stub,
04161       // unless we're building with the leopard linker or later, which
04162       // automatically synthesizes these stubs.
04163       OpFlags = PPCII::MO_PLT_OR_STUB;
04164     }
04165 
04166     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
04167                                          OpFlags);
04168     needIndirectCall = false;
04169   }
04170 
04171   if (IsPatchPoint) {
04172     // We'll form an invalid direct call when lowering a patchpoint; the full
04173     // sequence for an indirect call is complicated, and many of the
04174     // instructions introduced might have side effects (and, thus, can't be
04175     // removed later). The call itself will be removed as soon as the
04176     // argument/return lowering is complete, so the fact that it has the wrong
04177     // kind of operands should not really matter.
04178     needIndirectCall = false;
04179   }
04180 
04181   if (needIndirectCall) {
04182     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
04183     // to do the call, we can't use PPCISD::CALL.
04184     SDValue MTCTROps[] = {Chain, Callee, InFlag};
04185 
04186     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
04187       // Function pointers in the 64-bit SVR4 ABI do not point to the function
04188       // entry point, but to the function descriptor (the function entry point
04189       // address is part of the function descriptor though).
04190       // The function descriptor is a three doubleword structure with the
04191       // following fields: function entry point, TOC base address and
04192       // environment pointer.
04193       // Thus for a call through a function pointer, the following actions need
04194       // to be performed:
04195       //   1. Save the TOC of the caller in the TOC save area of its stack
04196       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
04197       //   2. Load the address of the function entry point from the function
04198       //      descriptor.
04199       //   3. Load the TOC of the callee from the function descriptor into r2.
04200       //   4. Load the environment pointer from the function descriptor into
04201       //      r11.
04202       //   5. Branch to the function entry point address.
04203       //   6. On return of the callee, the TOC of the caller needs to be
04204       //      restored (this is done in FinishCall()).
04205       //
04206       // The loads are scheduled at the beginning of the call sequence, and the
04207       // register copies are flagged together to ensure that no other
04208       // operations can be scheduled in between. E.g. without flagging the
04209       // copies together, a TOC access in the caller could be scheduled between
04210       // the assignment of the callee TOC and the branch to the callee, which
04211       // results in the TOC access going through the TOC of the callee instead
04212       // of going through the TOC of the caller, which leads to incorrect code.
04213 
04214       // Load the address of the function entry point from the function
04215       // descriptor.
04216       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
04217       if (LDChain.getValueType() == MVT::Glue)
04218         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
04219 
04220       bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
04221 
04222       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
04223       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
04224                                         false, false, LoadsInv, 8);
04225 
04226       // Load environment pointer into r11.
04227       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
04228       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
04229       SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
04230                                        MPI.getWithOffset(16), false, false,
04231                                        LoadsInv, 8);
04232 
04233       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
04234       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
04235       SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
04236                                    MPI.getWithOffset(8), false, false,
04237                                    LoadsInv, 8);
04238 
04239       setUsesTOCBasePtr(DAG);
04240       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
04241                                         InFlag);
04242       Chain = TOCVal.getValue(0);
04243       InFlag = TOCVal.getValue(1);
04244 
04245       // If the function call has an explicit 'nest' parameter, it takes the
04246       // place of the environment pointer.
04247       if (!hasNest) {
04248         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
04249                                           InFlag);
04250 
04251         Chain = EnvVal.getValue(0);
04252         InFlag = EnvVal.getValue(1);
04253       }
04254 
04255       MTCTROps[0] = Chain;
04256       MTCTROps[1] = LoadFuncPtr;
04257       MTCTROps[2] = InFlag;
04258     }
04259 
04260     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
04261                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
04262     InFlag = Chain.getValue(1);
04263 
04264     NodeTys.clear();
04265     NodeTys.push_back(MVT::Other);
04266     NodeTys.push_back(MVT::Glue);
04267     Ops.push_back(Chain);
04268     CallOpc = PPCISD::BCTRL;
04269     Callee.setNode(nullptr);
04270     // Add use of X11 (holding environment pointer)
04271     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
04272       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
04273     // Add CTR register as callee so a bctr can be emitted later.
04274     if (isTailCall)
04275       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
04276   }
04277 
04278   // If this is a direct call, pass the chain and the callee.
04279   if (Callee.getNode()) {
04280     Ops.push_back(Chain);
04281     Ops.push_back(Callee);
04282   }
04283   // If this is a tail call add stack pointer delta.
04284   if (isTailCall)
04285     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
04286 
04287   // Add argument registers to the end of the list so that they are known live
04288   // into the call.
04289   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
04290     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
04291                                   RegsToPass[i].second.getValueType()));
04292 
04293   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
04294   // into the call.
04295   if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
04296     setUsesTOCBasePtr(DAG);
04297     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
04298   }
04299 
04300   return CallOpc;
04301 }
04302 
04303 static
04304 bool isLocalCall(const SDValue &Callee)
04305 {
04306   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
04307     return G->getGlobal()->isStrongDefinitionForLinker();
04308   return false;
04309 }
04310 
04311 SDValue
04312 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
04313                                    CallingConv::ID CallConv, bool isVarArg,
04314                                    const SmallVectorImpl<ISD::InputArg> &Ins,
04315                                    SDLoc dl, SelectionDAG &DAG,
04316                                    SmallVectorImpl<SDValue> &InVals) const {
04317 
04318   SmallVector<CCValAssign, 16> RVLocs;
04319   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
04320                     *DAG.getContext());
04321   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
04322 
04323   // Copy all of the result registers out of their specified physreg.
04324   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
04325     CCValAssign &VA = RVLocs[i];
04326     assert(VA.isRegLoc() && "Can only return in registers!");
04327 
04328     SDValue Val = DAG.getCopyFromReg(Chain, dl,
04329                                      VA.getLocReg(), VA.getLocVT(), InFlag);
04330     Chain = Val.getValue(1);
04331     InFlag = Val.getValue(2);
04332 
04333     switch (VA.getLocInfo()) {
04334     default: llvm_unreachable("Unknown loc info!");
04335     case CCValAssign::Full: break;
04336     case CCValAssign::AExt:
04337       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04338       break;
04339     case CCValAssign::ZExt:
04340       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
04341                         DAG.getValueType(VA.getValVT()));
04342       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04343       break;
04344     case CCValAssign::SExt:
04345       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
04346                         DAG.getValueType(VA.getValVT()));
04347       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04348       break;
04349     }
04350 
04351     InVals.push_back(Val);
04352   }
04353 
04354   return Chain;
04355 }
04356 
04357 SDValue
04358 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
04359                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
04360                               bool hasNest, SelectionDAG &DAG,
04361                               SmallVector<std::pair<unsigned, SDValue>, 8>
04362                                 &RegsToPass,
04363                               SDValue InFlag, SDValue Chain,
04364                               SDValue CallSeqStart, SDValue &Callee,
04365                               int SPDiff, unsigned NumBytes,
04366                               const SmallVectorImpl<ISD::InputArg> &Ins,
04367                               SmallVectorImpl<SDValue> &InVals,
04368                               ImmutableCallSite *CS) const {
04369 
04370   std::vector<EVT> NodeTys;
04371   SmallVector<SDValue, 8> Ops;
04372   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
04373                                  SPDiff, isTailCall, IsPatchPoint, hasNest,
04374                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
04375 
04376   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
04377   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
04378     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
04379 
04380   // When performing tail call optimization the callee pops its arguments off
04381   // the stack. Account for this here so these bytes can be pushed back on in
04382   // PPCFrameLowering::eliminateCallFramePseudoInstr.
04383   int BytesCalleePops =
04384     (CallConv == CallingConv::Fast &&
04385      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
04386 
04387   // Add a register mask operand representing the call-preserved registers.
04388   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
04389   const uint32_t *Mask =
04390       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
04391   assert(Mask && "Missing call preserved mask for calling convention");
04392   Ops.push_back(DAG.getRegisterMask(Mask));
04393 
04394   if (InFlag.getNode())
04395     Ops.push_back(InFlag);
04396 
04397   // Emit tail call.
04398   if (isTailCall) {
04399     assert(((Callee.getOpcode() == ISD::Register &&
04400              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
04401             Callee.getOpcode() == ISD::TargetExternalSymbol ||
04402             Callee.getOpcode() == ISD::TargetGlobalAddress ||
04403             isa<ConstantSDNode>(Callee)) &&
04404     "Expecting an global address, external symbol, absolute value or register");
04405 
04406     DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
04407     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
04408   }
04409 
04410   // Add a NOP immediately after the branch instruction when using the 64-bit
04411   // SVR4 ABI. At link time, if caller and callee are in a different module and
04412   // thus have a different TOC, the call will be replaced with a call to a stub
04413   // function which saves the current TOC, loads the TOC of the callee and
04414   // branches to the callee. The NOP will be replaced with a load instruction
04415   // which restores the TOC of the caller from the TOC save slot of the current
04416   // stack frame. If caller and callee belong to the same module (and have the
04417   // same TOC), the NOP will remain unchanged.
04418 
04419   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
04420       !IsPatchPoint) {
04421     if (CallOpc == PPCISD::BCTRL) {
04422       // This is a call through a function pointer.
04423       // Restore the caller TOC from the save area into R2.
04424       // See PrepareCall() for more information about calls through function
04425       // pointers in the 64-bit SVR4 ABI.
04426       // We are using a target-specific load with r2 hard coded, because the
04427       // result of a target-independent load would never go directly into r2,
04428       // since r2 is a reserved register (which prevents the register allocator
04429       // from allocating it), resulting in an additional register being
04430       // allocated and an unnecessary move instruction being generated.
04431       CallOpc = PPCISD::BCTRL_LOAD_TOC;
04432 
04433       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
04434       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
04435       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
04436       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
04437       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
04438 
04439       // The address needs to go after the chain input but before the flag (or
04440       // any other variadic arguments).
04441       Ops.insert(std::next(Ops.begin()), AddTOC);
04442     } else if ((CallOpc == PPCISD::CALL) &&
04443                (!isLocalCall(Callee) ||
04444                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
04445       // Otherwise insert NOP for non-local calls.
04446       CallOpc = PPCISD::CALL_NOP;
04447   }
04448 
04449   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
04450   InFlag = Chain.getValue(1);
04451 
04452   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
04453                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
04454                              InFlag, dl);
04455   if (!Ins.empty())
04456     InFlag = Chain.getValue(1);
04457 
04458   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
04459                          Ins, dl, DAG, InVals);
04460 }
04461 
04462 SDValue
04463 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
04464                              SmallVectorImpl<SDValue> &InVals) const {
04465   SelectionDAG &DAG                     = CLI.DAG;
04466   SDLoc &dl                             = CLI.DL;
04467   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
04468   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
04469   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
04470   SDValue Chain                         = CLI.Chain;
04471   SDValue Callee                        = CLI.Callee;
04472   bool &isTailCall                      = CLI.IsTailCall;
04473   CallingConv::ID CallConv              = CLI.CallConv;
04474   bool isVarArg                         = CLI.IsVarArg;
04475   bool IsPatchPoint                     = CLI.IsPatchPoint;
04476   ImmutableCallSite *CS                 = CLI.CS;
04477 
04478   if (isTailCall)
04479     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
04480                                                    Ins, DAG);
04481 
04482   if (!isTailCall && CS && CS->isMustTailCall())
04483     report_fatal_error("failed to perform tail call elimination on a call "
04484                        "site marked musttail");
04485 
04486   if (Subtarget.isSVR4ABI()) {
04487     if (Subtarget.isPPC64())
04488       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
04489                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04490                               dl, DAG, InVals, CS);
04491     else
04492       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
04493                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04494                               dl, DAG, InVals, CS);
04495   }
04496 
04497   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
04498                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04499                           dl, DAG, InVals, CS);
04500 }
04501 
04502 SDValue
04503 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
04504                                     CallingConv::ID CallConv, bool isVarArg,
04505                                     bool isTailCall, bool IsPatchPoint,
04506                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04507                                     const SmallVectorImpl<SDValue> &OutVals,
04508                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04509                                     SDLoc dl, SelectionDAG &DAG,
04510                                     SmallVectorImpl<SDValue> &InVals,
04511                                     ImmutableCallSite *CS) const {
04512   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
04513   // of the 32-bit SVR4 ABI stack frame layout.
04514 
04515   assert((CallConv == CallingConv::C ||
04516           CallConv == CallingConv::Fast) && "Unknown calling convention!");
04517 
04518   unsigned PtrByteSize = 4;
04519 
04520   MachineFunction &MF = DAG.getMachineFunction();
04521 
04522   // Mark this function as potentially containing a function that contains a
04523   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04524   // and restoring the callers stack pointer in this functions epilog. This is
04525   // done because by tail calling the called function might overwrite the value
04526   // in this function's (MF) stack pointer stack slot 0(SP).
04527   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04528       CallConv == CallingConv::Fast)
04529     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04530 
04531   // Count how many bytes are to be pushed on the stack, including the linkage
04532   // area, parameter list area and the part of the local variable space which
04533   // contains copies of aggregates which are passed by value.
04534 
04535   // Assign locations to all of the outgoing arguments.
04536   SmallVector<CCValAssign, 16> ArgLocs;
04537   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
04538                  *DAG.getContext());
04539 
04540   // Reserve space for the linkage area on the stack.
04541   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
04542                        PtrByteSize);
04543 
04544   if (isVarArg) {
04545     // Handle fixed and variable vector arguments differently.
04546     // Fixed vector arguments go into registers as long as registers are
04547     // available. Variable vector arguments always go into memory.
04548     unsigned NumArgs = Outs.size();
04549 
04550     for (unsigned i = 0; i != NumArgs; ++i) {
04551       MVT ArgVT = Outs[i].VT;
04552       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
04553       bool Result;
04554 
04555       if (Outs[i].IsFixed) {
04556         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
04557                                CCInfo);
04558       } else {
04559         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
04560                                       ArgFlags, CCInfo);
04561       }
04562 
04563       if (Result) {
04564 #ifndef NDEBUG
04565         errs() << "Call operand #" << i << " has unhandled type "
04566              << EVT(ArgVT).getEVTString() << "\n";
04567 #endif
04568         llvm_unreachable(nullptr);
04569       }
04570     }
04571   } else {
04572     // All arguments are treated the same.
04573     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
04574   }
04575 
04576   // Assign locations to all of the outgoing aggregate by value arguments.
04577   SmallVector<CCValAssign, 16> ByValArgLocs;
04578   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
04579                       ByValArgLocs, *DAG.getContext());
04580 
04581   // Reserve stack space for the allocations in CCInfo.
04582   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
04583 
04584   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
04585 
04586   // Size of the linkage area, parameter list area and the part of the local
04587   // space variable where copies of aggregates which are passed by value are
04588   // stored.
04589   unsigned NumBytes = CCByValInfo.getNextStackOffset();
04590 
04591   // Calculate by how many bytes the stack has to be adjusted in case of tail
04592   // call optimization.
04593   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04594 
04595   // Adjust the stack pointer for the new arguments...
04596   // These operations are automatically eliminated by the prolog/epilog pass
04597   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
04598                                dl);
04599   SDValue CallSeqStart = Chain;
04600 
04601   // Load the return address and frame pointer so it can be moved somewhere else
04602   // later.
04603   SDValue LROp, FPOp;
04604   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
04605                                        dl);
04606 
04607   // Set up a copy of the stack pointer for use loading and storing any
04608   // arguments that may not fit in the registers available for argument
04609   // passing.
04610   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04611 
04612   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04613   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04614   SmallVector<SDValue, 8> MemOpChains;
04615 
04616   bool seenFloatArg = false;
04617   // Walk the register/memloc assignments, inserting copies/loads.
04618   for (unsigned i = 0, j = 0, e = ArgLocs.size();
04619        i != e;
04620        ++i) {
04621     CCValAssign &VA = ArgLocs[i];
04622     SDValue Arg = OutVals[i];
04623     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04624 
04625     if (Flags.isByVal()) {
04626       // Argument is an aggregate which is passed by value, thus we need to
04627       // create a copy of it in the local variable space of the current stack
04628       // frame (which is the stack frame of the caller) and pass the address of
04629       // this copy to the callee.
04630       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
04631       CCValAssign &ByValVA = ByValArgLocs[j++];
04632       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
04633 
04634       // Memory reserved in the local variable space of the callers stack frame.
04635       unsigned LocMemOffset = ByValVA.getLocMemOffset();
04636 
04637       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
04638       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
04639                            StackPtr, PtrOff);
04640 
04641       // Create a copy of the argument in the local area of the current
04642       // stack frame.
04643       SDValue MemcpyCall =
04644         CreateCopyOfByValArgument(Arg, PtrOff,
04645                                   CallSeqStart.getNode()->getOperand(0),
04646                                   Flags, DAG, dl);
04647 
04648       // This must go outside the CALLSEQ_START..END.
04649       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04650                            CallSeqStart.getNode()->getOperand(1),
04651                            SDLoc(MemcpyCall));
04652       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04653                              NewCallSeqStart.getNode());
04654       Chain = CallSeqStart = NewCallSeqStart;
04655 
04656       // Pass the address of the aggregate copy on the stack either in a
04657       // physical register or in the parameter list area of the current stack
04658       // frame to the callee.
04659       Arg = PtrOff;
04660     }
04661 
04662     if (VA.isRegLoc()) {
04663       if (Arg.getValueType() == MVT::i1)
04664         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
04665 
04666       seenFloatArg |= VA.getLocVT().isFloatingPoint();
04667       // Put argument in a physical register.
04668       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
04669     } else {
04670       // Put argument in the parameter list area of the current stack frame.
04671       assert(VA.isMemLoc());
04672       unsigned LocMemOffset = VA.getLocMemOffset();
04673 
04674       if (!isTailCall) {
04675