LLVM API Documentation

PPCISelLowering.cpp
Go to the documentation of this file.
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PPCISelLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "PPCISelLowering.h"
00015 #include "MCTargetDesc/PPCPredicates.h"
00016 #include "PPCMachineFunctionInfo.h"
00017 #include "PPCPerfectShuffle.h"
00018 #include "PPCTargetMachine.h"
00019 #include "PPCTargetObjectFile.h"
00020 #include "llvm/ADT/STLExtras.h"
00021 #include "llvm/ADT/StringSwitch.h"
00022 #include "llvm/ADT/Triple.h"
00023 #include "llvm/CodeGen/CallingConvLower.h"
00024 #include "llvm/CodeGen/MachineFrameInfo.h"
00025 #include "llvm/CodeGen/MachineFunction.h"
00026 #include "llvm/CodeGen/MachineInstrBuilder.h"
00027 #include "llvm/CodeGen/MachineRegisterInfo.h"
00028 #include "llvm/CodeGen/SelectionDAG.h"
00029 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00030 #include "llvm/IR/CallingConv.h"
00031 #include "llvm/IR/Constants.h"
00032 #include "llvm/IR/DerivedTypes.h"
00033 #include "llvm/IR/Function.h"
00034 #include "llvm/IR/Intrinsics.h"
00035 #include "llvm/Support/CommandLine.h"
00036 #include "llvm/Support/ErrorHandling.h"
00037 #include "llvm/Support/MathExtras.h"
00038 #include "llvm/Support/raw_ostream.h"
00039 #include "llvm/Target/TargetOptions.h"
00040 using namespace llvm;
00041 
00042 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
00043 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
00044 
00045 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
00046 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
00047 
00048 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
00049 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
00050 
00051 // FIXME: Remove this once the bug has been fixed!
00052 extern cl::opt<bool> ANDIGlueBug;
00053 
00054 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00055   // If it isn't a Mach-O file then it's going to be a linux ELF
00056   // object file.
00057   if (TT.isOSDarwin())
00058     return new TargetLoweringObjectFileMachO();
00059 
00060   return new PPC64LinuxTargetObjectFile();
00061 }
00062 
00063 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
00064     : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
00065       Subtarget(*TM.getSubtargetImpl()) {
00066   setPow2DivIsCheap();
00067 
00068   // Use _setjmp/_longjmp instead of setjmp/longjmp.
00069   setUseUnderscoreSetJmp(true);
00070   setUseUnderscoreLongJmp(true);
00071 
00072   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
00073   // arguments are at least 4/8 bytes aligned.
00074   bool isPPC64 = Subtarget.isPPC64();
00075   setMinStackArgumentAlignment(isPPC64 ? 8:4);
00076 
00077   // Set up the register classes.
00078   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
00079   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
00080   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
00081 
00082   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
00083   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00084   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
00085 
00086   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00087 
00088   // PowerPC has pre-inc load and store's.
00089   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
00090   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
00091   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
00092   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
00093   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
00094   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
00095   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
00096   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
00097   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
00098   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
00099 
00100   if (Subtarget.useCRBits()) {
00101     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00102 
00103     if (isPPC64 || Subtarget.hasFPCVT()) {
00104       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
00105       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
00106                          isPPC64 ? MVT::i64 : MVT::i32);
00107       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
00108       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
00109                          isPPC64 ? MVT::i64 : MVT::i32);
00110     } else {
00111       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
00112       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
00113     }
00114 
00115     // PowerPC does not support direct load / store of condition registers
00116     setOperationAction(ISD::LOAD, MVT::i1, Custom);
00117     setOperationAction(ISD::STORE, MVT::i1, Custom);
00118 
00119     // FIXME: Remove this once the ANDI glue bug is fixed:
00120     if (ANDIGlueBug)
00121       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
00122 
00123     setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00124     setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
00125     setTruncStoreAction(MVT::i64, MVT::i1, Expand);
00126     setTruncStoreAction(MVT::i32, MVT::i1, Expand);
00127     setTruncStoreAction(MVT::i16, MVT::i1, Expand);
00128     setTruncStoreAction(MVT::i8, MVT::i1, Expand);
00129 
00130     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
00131   }
00132 
00133   // This is used in the ppcf128->int sequence.  Note it has different semantics
00134   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
00135   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
00136 
00137   // We do not currently implement these libm ops for PowerPC.
00138   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
00139   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
00140   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
00141   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
00142   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
00143   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
00144 
00145   // PowerPC has no SREM/UREM instructions
00146   setOperationAction(ISD::SREM, MVT::i32, Expand);
00147   setOperationAction(ISD::UREM, MVT::i32, Expand);
00148   setOperationAction(ISD::SREM, MVT::i64, Expand);
00149   setOperationAction(ISD::UREM, MVT::i64, Expand);
00150 
00151   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
00152   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00153   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00154   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
00155   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
00156   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00157   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00158   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
00159   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
00160 
00161   // We don't support sin/cos/sqrt/fmod/pow
00162   setOperationAction(ISD::FSIN , MVT::f64, Expand);
00163   setOperationAction(ISD::FCOS , MVT::f64, Expand);
00164   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00165   setOperationAction(ISD::FREM , MVT::f64, Expand);
00166   setOperationAction(ISD::FPOW , MVT::f64, Expand);
00167   setOperationAction(ISD::FMA  , MVT::f64, Legal);
00168   setOperationAction(ISD::FSIN , MVT::f32, Expand);
00169   setOperationAction(ISD::FCOS , MVT::f32, Expand);
00170   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00171   setOperationAction(ISD::FREM , MVT::f32, Expand);
00172   setOperationAction(ISD::FPOW , MVT::f32, Expand);
00173   setOperationAction(ISD::FMA  , MVT::f32, Legal);
00174 
00175   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00176 
00177   // If we're enabling GP optimizations, use hardware square root
00178   if (!Subtarget.hasFSQRT() &&
00179       !(TM.Options.UnsafeFPMath &&
00180         Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
00181     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
00182 
00183   if (!Subtarget.hasFSQRT() &&
00184       !(TM.Options.UnsafeFPMath &&
00185         Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
00186     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
00187 
00188   if (Subtarget.hasFCPSGN()) {
00189     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
00190     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
00191   } else {
00192     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00193     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00194   }
00195 
00196   if (Subtarget.hasFPRND()) {
00197     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
00198     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
00199     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
00200     setOperationAction(ISD::FROUND, MVT::f64, Legal);
00201 
00202     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
00203     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
00204     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
00205     setOperationAction(ISD::FROUND, MVT::f32, Legal);
00206   }
00207 
00208   // PowerPC does not have BSWAP, CTPOP or CTTZ
00209   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
00210   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
00211   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
00212   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
00213   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
00214   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
00215   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00216   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00217 
00218   if (Subtarget.hasPOPCNTD()) {
00219     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
00220     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
00221   } else {
00222     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
00223     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
00224   }
00225 
00226   // PowerPC does not have ROTR
00227   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
00228   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
00229 
00230   if (!Subtarget.useCRBits()) {
00231     // PowerPC does not have Select
00232     setOperationAction(ISD::SELECT, MVT::i32, Expand);
00233     setOperationAction(ISD::SELECT, MVT::i64, Expand);
00234     setOperationAction(ISD::SELECT, MVT::f32, Expand);
00235     setOperationAction(ISD::SELECT, MVT::f64, Expand);
00236   }
00237 
00238   // PowerPC wants to turn select_cc of FP into fsel when possible.
00239   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00240   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00241 
00242   // PowerPC wants to optimize integer setcc a bit
00243   if (!Subtarget.useCRBits())
00244     setOperationAction(ISD::SETCC, MVT::i32, Custom);
00245 
00246   // PowerPC does not have BRCOND which requires SetCC
00247   if (!Subtarget.useCRBits())
00248     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00249 
00250   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
00251 
00252   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
00253   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00254 
00255   // PowerPC does not have [U|S]INT_TO_FP
00256   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
00257   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
00258 
00259   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
00260   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
00261   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
00262   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
00263 
00264   // We cannot sextinreg(i1).  Expand to shifts.
00265   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00266 
00267   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00268   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00269   // support continuation, user-level threading, and etc.. As a result, no
00270   // other SjLj exception interfaces are implemented and please don't build
00271   // your own exception handling based on them.
00272   // LLVM/Clang supports zero-cost DWARF exception handling.
00273   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00274   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00275 
00276   // We want to legalize GlobalAddress and ConstantPool nodes into the
00277   // appropriate instructions to materialize the address.
00278   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00279   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00280   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
00281   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
00282   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
00283   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00284   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00285   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
00286   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
00287   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
00288 
00289   // TRAP is legal.
00290   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00291 
00292   // TRAMPOLINE is custom lowered.
00293   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00294   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00295 
00296   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00297   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00298 
00299   if (Subtarget.isSVR4ABI()) {
00300     if (isPPC64) {
00301       // VAARG always uses double-word chunks, so promote anything smaller.
00302       setOperationAction(ISD::VAARG, MVT::i1, Promote);
00303       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
00304       setOperationAction(ISD::VAARG, MVT::i8, Promote);
00305       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
00306       setOperationAction(ISD::VAARG, MVT::i16, Promote);
00307       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
00308       setOperationAction(ISD::VAARG, MVT::i32, Promote);
00309       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
00310       setOperationAction(ISD::VAARG, MVT::Other, Expand);
00311     } else {
00312       // VAARG is custom lowered with the 32-bit SVR4 ABI.
00313       setOperationAction(ISD::VAARG, MVT::Other, Custom);
00314       setOperationAction(ISD::VAARG, MVT::i64, Custom);
00315     }
00316   } else
00317     setOperationAction(ISD::VAARG, MVT::Other, Expand);
00318 
00319   if (Subtarget.isSVR4ABI() && !isPPC64)
00320     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
00321     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
00322   else
00323     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00324 
00325   // Use the default implementation.
00326   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00327   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
00328   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
00329   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
00330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
00331 
00332   // We want to custom lower some of our intrinsics.
00333   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00334 
00335   // To handle counter-based loop conditions.
00336   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
00337 
00338   // Comparisons that require checking two conditions.
00339   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
00340   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
00341   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
00342   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
00343   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
00344   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
00345   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
00346   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
00347   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
00348   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
00349   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
00350   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
00351 
00352   if (Subtarget.has64BitSupport()) {
00353     // They also have instructions for converting between i64 and fp.
00354     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00355     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
00356     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00357     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00358     // This is just the low 32 bits of a (signed) fp->i64 conversion.
00359     // We cannot do this with Promote because i64 is not a legal type.
00360     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00361 
00362     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
00363       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00364   } else {
00365     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
00366     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
00367   }
00368 
00369   // With the instructions enabled under FPCVT, we can do everything.
00370   if (Subtarget.hasFPCVT()) {
00371     if (Subtarget.has64BitSupport()) {
00372       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00373       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
00374       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00375       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
00376     }
00377 
00378     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00379     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00380     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00381     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00382   }
00383 
00384   if (Subtarget.use64BitRegs()) {
00385     // 64-bit PowerPC implementations can support i64 types directly
00386     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
00387     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
00388     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
00389     // 64-bit PowerPC wants to expand i128 shifts itself.
00390     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
00391     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
00392     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
00393   } else {
00394     // 32-bit PowerPC wants to expand i64 shifts itself.
00395     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00396     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00397     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00398   }
00399 
00400   if (Subtarget.hasAltivec()) {
00401     // First set operation action for all vector types to expand. Then we
00402     // will selectively turn on ones that can be effectively codegen'd.
00403     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00404          i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
00405       MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
00406 
00407       // add/sub are legal for all supported vector VT's.
00408       setOperationAction(ISD::ADD , VT, Legal);
00409       setOperationAction(ISD::SUB , VT, Legal);
00410 
00411       // We promote all shuffles to v16i8.
00412       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
00413       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
00414 
00415       // We promote all non-typed operations to v4i32.
00416       setOperationAction(ISD::AND   , VT, Promote);
00417       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
00418       setOperationAction(ISD::OR    , VT, Promote);
00419       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
00420       setOperationAction(ISD::XOR   , VT, Promote);
00421       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
00422       setOperationAction(ISD::LOAD  , VT, Promote);
00423       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
00424       setOperationAction(ISD::SELECT, VT, Promote);
00425       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
00426       setOperationAction(ISD::STORE, VT, Promote);
00427       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
00428 
00429       // No other operations are legal.
00430       setOperationAction(ISD::MUL , VT, Expand);
00431       setOperationAction(ISD::SDIV, VT, Expand);
00432       setOperationAction(ISD::SREM, VT, Expand);
00433       setOperationAction(ISD::UDIV, VT, Expand);
00434       setOperationAction(ISD::UREM, VT, Expand);
00435       setOperationAction(ISD::FDIV, VT, Expand);
00436       setOperationAction(ISD::FREM, VT, Expand);
00437       setOperationAction(ISD::FNEG, VT, Expand);
00438       setOperationAction(ISD::FSQRT, VT, Expand);
00439       setOperationAction(ISD::FLOG, VT, Expand);
00440       setOperationAction(ISD::FLOG10, VT, Expand);
00441       setOperationAction(ISD::FLOG2, VT, Expand);
00442       setOperationAction(ISD::FEXP, VT, Expand);
00443       setOperationAction(ISD::FEXP2, VT, Expand);
00444       setOperationAction(ISD::FSIN, VT, Expand);
00445       setOperationAction(ISD::FCOS, VT, Expand);
00446       setOperationAction(ISD::FABS, VT, Expand);
00447       setOperationAction(ISD::FPOWI, VT, Expand);
00448       setOperationAction(ISD::FFLOOR, VT, Expand);
00449       setOperationAction(ISD::FCEIL,  VT, Expand);
00450       setOperationAction(ISD::FTRUNC, VT, Expand);
00451       setOperationAction(ISD::FRINT,  VT, Expand);
00452       setOperationAction(ISD::FNEARBYINT, VT, Expand);
00453       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
00454       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00455       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
00456       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00457       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00458       setOperationAction(ISD::UDIVREM, VT, Expand);
00459       setOperationAction(ISD::SDIVREM, VT, Expand);
00460       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
00461       setOperationAction(ISD::FPOW, VT, Expand);
00462       setOperationAction(ISD::BSWAP, VT, Expand);
00463       setOperationAction(ISD::CTPOP, VT, Expand);
00464       setOperationAction(ISD::CTLZ, VT, Expand);
00465       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00466       setOperationAction(ISD::CTTZ, VT, Expand);
00467       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00468       setOperationAction(ISD::VSELECT, VT, Expand);
00469       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00470 
00471       for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00472            j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
00473         MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
00474         setTruncStoreAction(VT, InnerVT, Expand);
00475       }
00476       setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
00477       setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
00478       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
00479     }
00480 
00481     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
00482     // with merges, splats, etc.
00483     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
00484 
00485     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
00486     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
00487     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
00488     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
00489     setOperationAction(ISD::SELECT, MVT::v4i32,
00490                        Subtarget.useCRBits() ? Legal : Expand);
00491     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
00492     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
00493     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
00494     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
00495     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
00496     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00497     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
00498     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00499     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
00500 
00501     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
00502     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
00503     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
00504     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
00505 
00506     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
00507     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
00508 
00509     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
00510       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00511       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00512     }
00513 
00514     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00515     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00516     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
00517 
00518     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
00519     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
00520 
00521     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
00522     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
00523     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
00524     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00525 
00526     // Altivec does not contain unordered floating-point compare instructions
00527     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
00528     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
00529     setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
00530     setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
00531     setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
00532     setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
00533 
00534     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
00535     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
00536 
00537     if (Subtarget.hasVSX()) {
00538       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00539       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
00540 
00541       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00542       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00543       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00544       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00545       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00546 
00547       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00548 
00549       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
00550       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00551 
00552       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00553       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00554 
00555       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
00556       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
00557       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
00558       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00559       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
00560 
00561       // Share the Altivec comparison restrictions.
00562       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
00563       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
00564       setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
00565       setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
00566       setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
00567       setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
00568 
00569       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
00570       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
00571 
00572       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
00573       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
00574 
00575       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
00576 
00577       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
00578 
00579       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
00580       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
00581 
00582       // VSX v2i64 only supports non-arithmetic operations.
00583       setOperationAction(ISD::ADD, MVT::v2i64, Expand);
00584       setOperationAction(ISD::SUB, MVT::v2i64, Expand);
00585 
00586       setOperationAction(ISD::SHL, MVT::v2i64, Expand);
00587       setOperationAction(ISD::SRA, MVT::v2i64, Expand);
00588       setOperationAction(ISD::SRL, MVT::v2i64, Expand);
00589 
00590       setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
00591 
00592       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
00593       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
00594       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
00595       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
00596 
00597       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
00598 
00599       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00600       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00601       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00602       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00603 
00604       // Vector operation legalization checks the result type of
00605       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
00606       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
00607       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
00608       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
00609       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
00610 
00611       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
00612     }
00613   }
00614 
00615   if (Subtarget.has64BitSupport()) {
00616     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
00617     setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
00618   }
00619 
00620   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
00621   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
00622   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
00623   setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
00624 
00625   setBooleanContents(ZeroOrOneBooleanContent);
00626   // Altivec instructions set fields to all zeros or all ones.
00627   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00628 
00629   if (!isPPC64) {
00630     // These libcalls are not available in 32-bit.
00631     setLibcallName(RTLIB::SHL_I128, nullptr);
00632     setLibcallName(RTLIB::SRL_I128, nullptr);
00633     setLibcallName(RTLIB::SRA_I128, nullptr);
00634   }
00635 
00636   if (isPPC64) {
00637     setStackPointerRegisterToSaveRestore(PPC::X1);
00638     setExceptionPointerRegister(PPC::X3);
00639     setExceptionSelectorRegister(PPC::X4);
00640   } else {
00641     setStackPointerRegisterToSaveRestore(PPC::R1);
00642     setExceptionPointerRegister(PPC::R3);
00643     setExceptionSelectorRegister(PPC::R4);
00644   }
00645 
00646   // We have target-specific dag combine patterns for the following nodes:
00647   setTargetDAGCombine(ISD::SINT_TO_FP);
00648   setTargetDAGCombine(ISD::LOAD);
00649   setTargetDAGCombine(ISD::STORE);
00650   setTargetDAGCombine(ISD::BR_CC);
00651   if (Subtarget.useCRBits())
00652     setTargetDAGCombine(ISD::BRCOND);
00653   setTargetDAGCombine(ISD::BSWAP);
00654   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00655 
00656   setTargetDAGCombine(ISD::SIGN_EXTEND);
00657   setTargetDAGCombine(ISD::ZERO_EXTEND);
00658   setTargetDAGCombine(ISD::ANY_EXTEND);
00659 
00660   if (Subtarget.useCRBits()) {
00661     setTargetDAGCombine(ISD::TRUNCATE);
00662     setTargetDAGCombine(ISD::SETCC);
00663     setTargetDAGCombine(ISD::SELECT_CC);
00664   }
00665 
00666   // Use reciprocal estimates.
00667   if (TM.Options.UnsafeFPMath) {
00668     setTargetDAGCombine(ISD::FDIV);
00669     setTargetDAGCombine(ISD::FSQRT);
00670   }
00671 
00672   // Darwin long double math library functions have $LDBL128 appended.
00673   if (Subtarget.isDarwin()) {
00674     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
00675     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
00676     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
00677     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
00678     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
00679     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
00680     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
00681     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
00682     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
00683     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
00684   }
00685 
00686   // With 32 condition bits, we don't need to sink (and duplicate) compares
00687   // aggressively in CodeGenPrep.
00688   if (Subtarget.useCRBits())
00689     setHasMultipleConditionRegisters();
00690 
00691   setMinFunctionAlignment(2);
00692   if (Subtarget.isDarwin())
00693     setPrefFunctionAlignment(4);
00694 
00695   if (isPPC64 && Subtarget.isJITCodeModel())
00696     // Temporary workaround for the inability of PPC64 JIT to handle jump
00697     // tables.
00698     setSupportJumpTables(false);
00699 
00700   setInsertFencesForAtomic(true);
00701 
00702   if (Subtarget.enableMachineScheduler())
00703     setSchedulingPreference(Sched::Source);
00704   else
00705     setSchedulingPreference(Sched::Hybrid);
00706 
00707   computeRegisterProperties();
00708 
00709   // The Freescale cores does better with aggressive inlining of memcpy and
00710   // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
00711   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
00712       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
00713     MaxStoresPerMemset = 32;
00714     MaxStoresPerMemsetOptSize = 16;
00715     MaxStoresPerMemcpy = 32;
00716     MaxStoresPerMemcpyOptSize = 8;
00717     MaxStoresPerMemmove = 32;
00718     MaxStoresPerMemmoveOptSize = 8;
00719 
00720     setPrefFunctionAlignment(4);
00721   }
00722 }
00723 
00724 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
00725 /// the desired ByVal argument alignment.
00726 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
00727                              unsigned MaxMaxAlign) {
00728   if (MaxAlign == MaxMaxAlign)
00729     return;
00730   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
00731     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
00732       MaxAlign = 32;
00733     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
00734       MaxAlign = 16;
00735   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
00736     unsigned EltAlign = 0;
00737     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
00738     if (EltAlign > MaxAlign)
00739       MaxAlign = EltAlign;
00740   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
00741     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
00742       unsigned EltAlign = 0;
00743       getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
00744       if (EltAlign > MaxAlign)
00745         MaxAlign = EltAlign;
00746       if (MaxAlign == MaxMaxAlign)
00747         break;
00748     }
00749   }
00750 }
00751 
00752 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
00753 /// function arguments in the caller parameter area.
00754 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
00755   // Darwin passes everything on 4 byte boundary.
00756   if (Subtarget.isDarwin())
00757     return 4;
00758 
00759   // 16byte and wider vectors are passed on 16byte boundary.
00760   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
00761   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
00762   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
00763     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
00764   return Align;
00765 }
00766 
00767 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
00768   switch (Opcode) {
00769   default: return nullptr;
00770   case PPCISD::FSEL:            return "PPCISD::FSEL";
00771   case PPCISD::FCFID:           return "PPCISD::FCFID";
00772   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
00773   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
00774   case PPCISD::FRE:             return "PPCISD::FRE";
00775   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
00776   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
00777   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
00778   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
00779   case PPCISD::VPERM:           return "PPCISD::VPERM";
00780   case PPCISD::Hi:              return "PPCISD::Hi";
00781   case PPCISD::Lo:              return "PPCISD::Lo";
00782   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
00783   case PPCISD::LOAD:            return "PPCISD::LOAD";
00784   case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
00785   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
00786   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
00787   case PPCISD::SRL:             return "PPCISD::SRL";
00788   case PPCISD::SRA:             return "PPCISD::SRA";
00789   case PPCISD::SHL:             return "PPCISD::SHL";
00790   case PPCISD::CALL:            return "PPCISD::CALL";
00791   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
00792   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
00793   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
00794   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
00795   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
00796   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
00797   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
00798   case PPCISD::VCMP:            return "PPCISD::VCMP";
00799   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
00800   case PPCISD::LBRX:            return "PPCISD::LBRX";
00801   case PPCISD::STBRX:           return "PPCISD::STBRX";
00802   case PPCISD::LARX:            return "PPCISD::LARX";
00803   case PPCISD::STCX:            return "PPCISD::STCX";
00804   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
00805   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
00806   case PPCISD::BDZ:             return "PPCISD::BDZ";
00807   case PPCISD::MFFS:            return "PPCISD::MFFS";
00808   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
00809   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
00810   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
00811   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
00812   case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
00813   case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
00814   case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
00815   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
00816   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
00817   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
00818   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
00819   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
00820   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
00821   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
00822   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
00823   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
00824   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
00825   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
00826   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
00827   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
00828   case PPCISD::SC:              return "PPCISD::SC";
00829   }
00830 }
00831 
00832 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
00833   if (!VT.isVector())
00834     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
00835   return VT.changeVectorElementTypeToInteger();
00836 }
00837 
00838 //===----------------------------------------------------------------------===//
00839 // Node matching predicates, for use by the tblgen matching code.
00840 //===----------------------------------------------------------------------===//
00841 
00842 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
00843 static bool isFloatingPointZero(SDValue Op) {
00844   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
00845     return CFP->getValueAPF().isZero();
00846   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
00847     // Maybe this has already been legalized into the constant pool?
00848     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
00849       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
00850         return CFP->getValueAPF().isZero();
00851   }
00852   return false;
00853 }
00854 
00855 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
00856 /// true if Op is undef or if it matches the specified value.
00857 static bool isConstantOrUndef(int Op, int Val) {
00858   return Op < 0 || Op == Val;
00859 }
00860 
00861 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
00862 /// VPKUHUM instruction.
00863 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
00864                                SelectionDAG &DAG) {
00865   unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
00866   if (!isUnary) {
00867     for (unsigned i = 0; i != 16; ++i)
00868       if (!isConstantOrUndef(N->getMaskElt(i),  i*2+j))
00869         return false;
00870   } else {
00871     for (unsigned i = 0; i != 8; ++i)
00872       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
00873           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
00874         return false;
00875   }
00876   return true;
00877 }
00878 
00879 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
00880 /// VPKUWUM instruction.
00881 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
00882                                SelectionDAG &DAG) {
00883   unsigned j, k;
00884   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
00885     j = 0;
00886     k = 1;
00887   } else {
00888     j = 2;
00889     k = 3;
00890   }
00891   if (!isUnary) {
00892     for (unsigned i = 0; i != 16; i += 2)
00893       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
00894           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k))
00895         return false;
00896   } else {
00897     for (unsigned i = 0; i != 8; i += 2)
00898       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
00899           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k) ||
00900           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j) ||
00901           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+k))
00902         return false;
00903   }
00904   return true;
00905 }
00906 
00907 /// isVMerge - Common function, used to match vmrg* shuffles.
00908 ///
00909 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
00910                      unsigned LHSStart, unsigned RHSStart) {
00911   if (N->getValueType(0) != MVT::v16i8)
00912     return false;
00913   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
00914          "Unsupported merge size!");
00915 
00916   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
00917     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
00918       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
00919                              LHSStart+j+i*UnitSize) ||
00920           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
00921                              RHSStart+j+i*UnitSize))
00922         return false;
00923     }
00924   return true;
00925 }
00926 
00927 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
00928 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
00929 /// The ShuffleKind distinguishes between big-endian merges with two 
00930 /// different inputs (0), either-endian merges with two identical inputs (1),
00931 /// and little-endian merges with two different inputs (2).  For the latter,
00932 /// the input operands are swapped (see PPCInstrAltivec.td).
00933 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
00934                              unsigned ShuffleKind, SelectionDAG &DAG) {
00935   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
00936     if (ShuffleKind == 1) // unary
00937       return isVMerge(N, UnitSize, 0, 0);
00938     else if (ShuffleKind == 2) // swapped
00939       return isVMerge(N, UnitSize, 0, 16);
00940     else
00941       return false;
00942   } else {
00943     if (ShuffleKind == 1) // unary
00944       return isVMerge(N, UnitSize, 8, 8);
00945     else if (ShuffleKind == 0) // normal
00946       return isVMerge(N, UnitSize, 8, 24);
00947     else
00948       return false;
00949   }
00950 }
00951 
00952 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
00953 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
00954 /// The ShuffleKind distinguishes between big-endian merges with two 
00955 /// different inputs (0), either-endian merges with two identical inputs (1),
00956 /// and little-endian merges with two different inputs (2).  For the latter,
00957 /// the input operands are swapped (see PPCInstrAltivec.td).
00958 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
00959                              unsigned ShuffleKind, SelectionDAG &DAG) {
00960   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
00961     if (ShuffleKind == 1) // unary
00962       return isVMerge(N, UnitSize, 8, 8);
00963     else if (ShuffleKind == 2) // swapped
00964       return isVMerge(N, UnitSize, 8, 24);
00965     else
00966       return false;
00967   } else {
00968     if (ShuffleKind == 1) // unary
00969       return isVMerge(N, UnitSize, 0, 0);
00970     else if (ShuffleKind == 0) // normal
00971       return isVMerge(N, UnitSize, 0, 16);
00972     else
00973       return false;
00974   }
00975 }
00976 
00977 
00978 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
00979 /// amount, otherwise return -1.
00980 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
00981   if (N->getValueType(0) != MVT::v16i8)
00982     return -1;
00983 
00984   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
00985 
00986   // Find the first non-undef value in the shuffle mask.
00987   unsigned i;
00988   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
00989     /*search*/;
00990 
00991   if (i == 16) return -1;  // all undef.
00992 
00993   // Otherwise, check to see if the rest of the elements are consecutively
00994   // numbered from this value.
00995   unsigned ShiftAmt = SVOp->getMaskElt(i);
00996   if (ShiftAmt < i) return -1;
00997 
00998   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
00999 
01000     ShiftAmt += i;
01001 
01002     if (!isUnary) {
01003       // Check the rest of the elements to see if they are consecutive.
01004       for (++i; i != 16; ++i)
01005         if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
01006           return -1;
01007     } else {
01008       // Check the rest of the elements to see if they are consecutive.
01009       for (++i; i != 16; ++i)
01010         if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
01011           return -1;
01012     }
01013 
01014   } else {  // Big Endian
01015 
01016     ShiftAmt -= i;
01017 
01018     if (!isUnary) {
01019       // Check the rest of the elements to see if they are consecutive.
01020       for (++i; i != 16; ++i)
01021         if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01022           return -1;
01023     } else {
01024       // Check the rest of the elements to see if they are consecutive.
01025       for (++i; i != 16; ++i)
01026         if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
01027           return -1;
01028     }
01029   }
01030   return ShiftAmt;
01031 }
01032 
01033 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
01034 /// specifies a splat of a single element that is suitable for input to
01035 /// VSPLTB/VSPLTH/VSPLTW.
01036 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
01037   assert(N->getValueType(0) == MVT::v16i8 &&
01038          (EltSize == 1 || EltSize == 2 || EltSize == 4));
01039 
01040   // This is a splat operation if each element of the permute is the same, and
01041   // if the value doesn't reference the second vector.
01042   unsigned ElementBase = N->getMaskElt(0);
01043 
01044   // FIXME: Handle UNDEF elements too!
01045   if (ElementBase >= 16)
01046     return false;
01047 
01048   // Check that the indices are consecutive, in the case of a multi-byte element
01049   // splatted with a v16i8 mask.
01050   for (unsigned i = 1; i != EltSize; ++i)
01051     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
01052       return false;
01053 
01054   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
01055     if (N->getMaskElt(i) < 0) continue;
01056     for (unsigned j = 0; j != EltSize; ++j)
01057       if (N->getMaskElt(i+j) != N->getMaskElt(j))
01058         return false;
01059   }
01060   return true;
01061 }
01062 
01063 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
01064 /// are -0.0.
01065 bool PPC::isAllNegativeZeroVector(SDNode *N) {
01066   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
01067 
01068   APInt APVal, APUndef;
01069   unsigned BitSize;
01070   bool HasAnyUndefs;
01071 
01072   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
01073     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
01074       return CFP->getValueAPF().isNegZero();
01075 
01076   return false;
01077 }
01078 
01079 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
01080 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
01081 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
01082                                 SelectionDAG &DAG) {
01083   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01084   assert(isSplatShuffleMask(SVOp, EltSize));
01085   if (DAG.getTarget().getDataLayout()->isLittleEndian())
01086     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
01087   else
01088     return SVOp->getMaskElt(0) / EltSize;
01089 }
01090 
01091 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
01092 /// by using a vspltis[bhw] instruction of the specified element size, return
01093 /// the constant being splatted.  The ByteSize field indicates the number of
01094 /// bytes of each element [124] -> [bhw].
01095 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
01096   SDValue OpVal(nullptr, 0);
01097 
01098   // If ByteSize of the splat is bigger than the element size of the
01099   // build_vector, then we have a case where we are checking for a splat where
01100   // multiple elements of the buildvector are folded together into a single
01101   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
01102   unsigned EltSize = 16/N->getNumOperands();
01103   if (EltSize < ByteSize) {
01104     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
01105     SDValue UniquedVals[4];
01106     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
01107 
01108     // See if all of the elements in the buildvector agree across.
01109     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01110       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01111       // If the element isn't a constant, bail fully out.
01112       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
01113 
01114 
01115       if (!UniquedVals[i&(Multiple-1)].getNode())
01116         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
01117       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
01118         return SDValue();  // no match.
01119     }
01120 
01121     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
01122     // either constant or undef values that are identical for each chunk.  See
01123     // if these chunks can form into a larger vspltis*.
01124 
01125     // Check to see if all of the leading entries are either 0 or -1.  If
01126     // neither, then this won't fit into the immediate field.
01127     bool LeadingZero = true;
01128     bool LeadingOnes = true;
01129     for (unsigned i = 0; i != Multiple-1; ++i) {
01130       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
01131 
01132       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
01133       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
01134     }
01135     // Finally, check the least significant entry.
01136     if (LeadingZero) {
01137       if (!UniquedVals[Multiple-1].getNode())
01138         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
01139       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
01140       if (Val < 16)
01141         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
01142     }
01143     if (LeadingOnes) {
01144       if (!UniquedVals[Multiple-1].getNode())
01145         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
01146       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
01147       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
01148         return DAG.getTargetConstant(Val, MVT::i32);
01149     }
01150 
01151     return SDValue();
01152   }
01153 
01154   // Check to see if this buildvec has a single non-undef value in its elements.
01155   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01156     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01157     if (!OpVal.getNode())
01158       OpVal = N->getOperand(i);
01159     else if (OpVal != N->getOperand(i))
01160       return SDValue();
01161   }
01162 
01163   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
01164 
01165   unsigned ValSizeInBytes = EltSize;
01166   uint64_t Value = 0;
01167   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
01168     Value = CN->getZExtValue();
01169   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
01170     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
01171     Value = FloatToBits(CN->getValueAPF().convertToFloat());
01172   }
01173 
01174   // If the splat value is larger than the element value, then we can never do
01175   // this splat.  The only case that we could fit the replicated bits into our
01176   // immediate field for would be zero, and we prefer to use vxor for it.
01177   if (ValSizeInBytes < ByteSize) return SDValue();
01178 
01179   // If the element value is larger than the splat value, cut it in half and
01180   // check to see if the two halves are equal.  Continue doing this until we
01181   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
01182   while (ValSizeInBytes > ByteSize) {
01183     ValSizeInBytes >>= 1;
01184 
01185     // If the top half equals the bottom half, we're still ok.
01186     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
01187          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
01188       return SDValue();
01189   }
01190 
01191   // Properly sign extend the value.
01192   int MaskVal = SignExtend32(Value, ByteSize * 8);
01193 
01194   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
01195   if (MaskVal == 0) return SDValue();
01196 
01197   // Finally, if this value fits in a 5 bit sext field, return it
01198   if (SignExtend32<5>(MaskVal) == MaskVal)
01199     return DAG.getTargetConstant(MaskVal, MVT::i32);
01200   return SDValue();
01201 }
01202 
01203 //===----------------------------------------------------------------------===//
01204 //  Addressing Mode Selection
01205 //===----------------------------------------------------------------------===//
01206 
01207 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
01208 /// or 64-bit immediate, and if the value can be accurately represented as a
01209 /// sign extension from a 16-bit value.  If so, this returns true and the
01210 /// immediate.
01211 static bool isIntS16Immediate(SDNode *N, short &Imm) {
01212   if (!isa<ConstantSDNode>(N))
01213     return false;
01214 
01215   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
01216   if (N->getValueType(0) == MVT::i32)
01217     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
01218   else
01219     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
01220 }
01221 static bool isIntS16Immediate(SDValue Op, short &Imm) {
01222   return isIntS16Immediate(Op.getNode(), Imm);
01223 }
01224 
01225 
01226 /// SelectAddressRegReg - Given the specified addressed, check to see if it
01227 /// can be represented as an indexed [r+r] operation.  Returns false if it
01228 /// can be more efficiently represented with [r+imm].
01229 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
01230                                             SDValue &Index,
01231                                             SelectionDAG &DAG) const {
01232   short imm = 0;
01233   if (N.getOpcode() == ISD::ADD) {
01234     if (isIntS16Immediate(N.getOperand(1), imm))
01235       return false;    // r+i
01236     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
01237       return false;    // r+i
01238 
01239     Base = N.getOperand(0);
01240     Index = N.getOperand(1);
01241     return true;
01242   } else if (N.getOpcode() == ISD::OR) {
01243     if (isIntS16Immediate(N.getOperand(1), imm))
01244       return false;    // r+i can fold it if we can.
01245 
01246     // If this is an or of disjoint bitfields, we can codegen this as an add
01247     // (for better address arithmetic) if the LHS and RHS of the OR are provably
01248     // disjoint.
01249     APInt LHSKnownZero, LHSKnownOne;
01250     APInt RHSKnownZero, RHSKnownOne;
01251     DAG.computeKnownBits(N.getOperand(0),
01252                          LHSKnownZero, LHSKnownOne);
01253 
01254     if (LHSKnownZero.getBoolValue()) {
01255       DAG.computeKnownBits(N.getOperand(1),
01256                            RHSKnownZero, RHSKnownOne);
01257       // If all of the bits are known zero on the LHS or RHS, the add won't
01258       // carry.
01259       if (~(LHSKnownZero | RHSKnownZero) == 0) {
01260         Base = N.getOperand(0);
01261         Index = N.getOperand(1);
01262         return true;
01263       }
01264     }
01265   }
01266 
01267   return false;
01268 }
01269 
01270 // If we happen to be doing an i64 load or store into a stack slot that has
01271 // less than a 4-byte alignment, then the frame-index elimination may need to
01272 // use an indexed load or store instruction (because the offset may not be a
01273 // multiple of 4). The extra register needed to hold the offset comes from the
01274 // register scavenger, and it is possible that the scavenger will need to use
01275 // an emergency spill slot. As a result, we need to make sure that a spill slot
01276 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
01277 // stack slot.
01278 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
01279   // FIXME: This does not handle the LWA case.
01280   if (VT != MVT::i64)
01281     return;
01282 
01283   // NOTE: We'll exclude negative FIs here, which come from argument
01284   // lowering, because there are no known test cases triggering this problem
01285   // using packed structures (or similar). We can remove this exclusion if
01286   // we find such a test case. The reason why this is so test-case driven is
01287   // because this entire 'fixup' is only to prevent crashes (from the
01288   // register scavenger) on not-really-valid inputs. For example, if we have:
01289   //   %a = alloca i1
01290   //   %b = bitcast i1* %a to i64*
01291   //   store i64* a, i64 b
01292   // then the store should really be marked as 'align 1', but is not. If it
01293   // were marked as 'align 1' then the indexed form would have been
01294   // instruction-selected initially, and the problem this 'fixup' is preventing
01295   // won't happen regardless.
01296   if (FrameIdx < 0)
01297     return;
01298 
01299   MachineFunction &MF = DAG.getMachineFunction();
01300   MachineFrameInfo *MFI = MF.getFrameInfo();
01301 
01302   unsigned Align = MFI->getObjectAlignment(FrameIdx);
01303   if (Align >= 4)
01304     return;
01305 
01306   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01307   FuncInfo->setHasNonRISpills();
01308 }
01309 
01310 /// Returns true if the address N can be represented by a base register plus
01311 /// a signed 16-bit displacement [r+imm], and if it is not better
01312 /// represented as reg+reg.  If Aligned is true, only accept displacements
01313 /// suitable for STD and friends, i.e. multiples of 4.
01314 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
01315                                             SDValue &Base,
01316                                             SelectionDAG &DAG,
01317                                             bool Aligned) const {
01318   // FIXME dl should come from parent load or store, not from address
01319   SDLoc dl(N);
01320   // If this can be more profitably realized as r+r, fail.
01321   if (SelectAddressRegReg(N, Disp, Base, DAG))
01322     return false;
01323 
01324   if (N.getOpcode() == ISD::ADD) {
01325     short imm = 0;
01326     if (isIntS16Immediate(N.getOperand(1), imm) &&
01327         (!Aligned || (imm & 3) == 0)) {
01328       Disp = DAG.getTargetConstant(imm, N.getValueType());
01329       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01330         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01331         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01332       } else {
01333         Base = N.getOperand(0);
01334       }
01335       return true; // [r+i]
01336     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
01337       // Match LOAD (ADD (X, Lo(G))).
01338       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
01339              && "Cannot handle constant offsets yet!");
01340       Disp = N.getOperand(1).getOperand(0);  // The global address.
01341       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
01342              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
01343              Disp.getOpcode() == ISD::TargetConstantPool ||
01344              Disp.getOpcode() == ISD::TargetJumpTable);
01345       Base = N.getOperand(0);
01346       return true;  // [&g+r]
01347     }
01348   } else if (N.getOpcode() == ISD::OR) {
01349     short imm = 0;
01350     if (isIntS16Immediate(N.getOperand(1), imm) &&
01351         (!Aligned || (imm & 3) == 0)) {
01352       // If this is an or of disjoint bitfields, we can codegen this as an add
01353       // (for better address arithmetic) if the LHS and RHS of the OR are
01354       // provably disjoint.
01355       APInt LHSKnownZero, LHSKnownOne;
01356       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
01357 
01358       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
01359         // If all of the bits are known zero on the LHS or RHS, the add won't
01360         // carry.
01361         if (FrameIndexSDNode *FI =
01362               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01363           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01364           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01365         } else {
01366           Base = N.getOperand(0);
01367         }
01368         Disp = DAG.getTargetConstant(imm, N.getValueType());
01369         return true;
01370       }
01371     }
01372   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
01373     // Loading from a constant address.
01374 
01375     // If this address fits entirely in a 16-bit sext immediate field, codegen
01376     // this as "d, 0"
01377     short Imm;
01378     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
01379       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
01380       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01381                              CN->getValueType(0));
01382       return true;
01383     }
01384 
01385     // Handle 32-bit sext immediates with LIS + addr mode.
01386     if ((CN->getValueType(0) == MVT::i32 ||
01387          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
01388         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
01389       int Addr = (int)CN->getZExtValue();
01390 
01391       // Otherwise, break this down into an LIS + disp.
01392       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
01393 
01394       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
01395       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
01396       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
01397       return true;
01398     }
01399   }
01400 
01401   Disp = DAG.getTargetConstant(0, getPointerTy());
01402   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
01403     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01404     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01405   } else
01406     Base = N;
01407   return true;      // [r+0]
01408 }
01409 
01410 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
01411 /// represented as an indexed [r+r] operation.
01412 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
01413                                                 SDValue &Index,
01414                                                 SelectionDAG &DAG) const {
01415   // Check to see if we can easily represent this as an [r+r] address.  This
01416   // will fail if it thinks that the address is more profitably represented as
01417   // reg+imm, e.g. where imm = 0.
01418   if (SelectAddressRegReg(N, Base, Index, DAG))
01419     return true;
01420 
01421   // If the operand is an addition, always emit this as [r+r], since this is
01422   // better (for code size, and execution, as the memop does the add for free)
01423   // than emitting an explicit add.
01424   if (N.getOpcode() == ISD::ADD) {
01425     Base = N.getOperand(0);
01426     Index = N.getOperand(1);
01427     return true;
01428   }
01429 
01430   // Otherwise, do it the hard way, using R0 as the base register.
01431   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01432                          N.getValueType());
01433   Index = N;
01434   return true;
01435 }
01436 
01437 /// getPreIndexedAddressParts - returns true by value, base pointer and
01438 /// offset pointer and addressing mode by reference if the node's address
01439 /// can be legally represented as pre-indexed load / store address.
01440 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
01441                                                   SDValue &Offset,
01442                                                   ISD::MemIndexedMode &AM,
01443                                                   SelectionDAG &DAG) const {
01444   if (DisablePPCPreinc) return false;
01445 
01446   bool isLoad = true;
01447   SDValue Ptr;
01448   EVT VT;
01449   unsigned Alignment;
01450   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01451     Ptr = LD->getBasePtr();
01452     VT = LD->getMemoryVT();
01453     Alignment = LD->getAlignment();
01454   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
01455     Ptr = ST->getBasePtr();
01456     VT  = ST->getMemoryVT();
01457     Alignment = ST->getAlignment();
01458     isLoad = false;
01459   } else
01460     return false;
01461 
01462   // PowerPC doesn't have preinc load/store instructions for vectors.
01463   if (VT.isVector())
01464     return false;
01465 
01466   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
01467 
01468     // Common code will reject creating a pre-inc form if the base pointer
01469     // is a frame index, or if N is a store and the base pointer is either
01470     // the same as or a predecessor of the value being stored.  Check for
01471     // those situations here, and try with swapped Base/Offset instead.
01472     bool Swap = false;
01473 
01474     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
01475       Swap = true;
01476     else if (!isLoad) {
01477       SDValue Val = cast<StoreSDNode>(N)->getValue();
01478       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
01479         Swap = true;
01480     }
01481 
01482     if (Swap)
01483       std::swap(Base, Offset);
01484 
01485     AM = ISD::PRE_INC;
01486     return true;
01487   }
01488 
01489   // LDU/STU can only handle immediates that are a multiple of 4.
01490   if (VT != MVT::i64) {
01491     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
01492       return false;
01493   } else {
01494     // LDU/STU need an address with at least 4-byte alignment.
01495     if (Alignment < 4)
01496       return false;
01497 
01498     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
01499       return false;
01500   }
01501 
01502   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01503     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
01504     // sext i32 to i64 when addr mode is r+i.
01505     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
01506         LD->getExtensionType() == ISD::SEXTLOAD &&
01507         isa<ConstantSDNode>(Offset))
01508       return false;
01509   }
01510 
01511   AM = ISD::PRE_INC;
01512   return true;
01513 }
01514 
01515 //===----------------------------------------------------------------------===//
01516 //  LowerOperation implementation
01517 //===----------------------------------------------------------------------===//
01518 
01519 /// GetLabelAccessInfo - Return true if we should reference labels using a
01520 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
01521 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
01522                                unsigned &LoOpFlags,
01523                                const GlobalValue *GV = nullptr) {
01524   HiOpFlags = PPCII::MO_HA;
01525   LoOpFlags = PPCII::MO_LO;
01526 
01527   // Don't use the pic base if not in PIC relocation model.
01528   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
01529 
01530   if (isPIC) {
01531     HiOpFlags |= PPCII::MO_PIC_FLAG;
01532     LoOpFlags |= PPCII::MO_PIC_FLAG;
01533   }
01534 
01535   // If this is a reference to a global value that requires a non-lazy-ptr, make
01536   // sure that instruction lowering adds it.
01537   if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
01538     HiOpFlags |= PPCII::MO_NLP_FLAG;
01539     LoOpFlags |= PPCII::MO_NLP_FLAG;
01540 
01541     if (GV->hasHiddenVisibility()) {
01542       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01543       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01544     }
01545   }
01546 
01547   return isPIC;
01548 }
01549 
01550 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
01551                              SelectionDAG &DAG) {
01552   EVT PtrVT = HiPart.getValueType();
01553   SDValue Zero = DAG.getConstant(0, PtrVT);
01554   SDLoc DL(HiPart);
01555 
01556   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
01557   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
01558 
01559   // With PIC, the first instruction is actually "GR+hi(&G)".
01560   if (isPIC)
01561     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
01562                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
01563 
01564   // Generate non-pic code that has direct accesses to the constant pool.
01565   // The address of the global is just (hi(&g)+lo(&g)).
01566   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
01567 }
01568 
01569 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
01570                                              SelectionDAG &DAG) const {
01571   EVT PtrVT = Op.getValueType();
01572   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
01573   const Constant *C = CP->getConstVal();
01574 
01575   // 64-bit SVR4 ABI code is always position-independent.
01576   // The actual address of the GlobalValue is stored in the TOC.
01577   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01578     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
01579     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
01580                        DAG.getRegister(PPC::X2, MVT::i64));
01581   }
01582 
01583   unsigned MOHiFlag, MOLoFlag;
01584   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
01585 
01586   if (isPIC && Subtarget.isSVR4ABI()) {
01587     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
01588                                            PPCII::MO_PIC_FLAG);
01589     SDLoc DL(CP);
01590     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
01591                        DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
01592   }
01593 
01594   SDValue CPIHi =
01595     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
01596   SDValue CPILo =
01597     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
01598   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
01599 }
01600 
01601 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
01602   EVT PtrVT = Op.getValueType();
01603   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
01604 
01605   // 64-bit SVR4 ABI code is always position-independent.
01606   // The actual address of the GlobalValue is stored in the TOC.
01607   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01608     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
01609     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
01610                        DAG.getRegister(PPC::X2, MVT::i64));
01611   }
01612 
01613   unsigned MOHiFlag, MOLoFlag;
01614   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
01615 
01616   if (isPIC && Subtarget.isSVR4ABI()) {
01617     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
01618                                         PPCII::MO_PIC_FLAG);
01619     SDLoc DL(GA);
01620     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
01621                        DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
01622   }
01623 
01624   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
01625   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
01626   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
01627 }
01628 
01629 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
01630                                              SelectionDAG &DAG) const {
01631   EVT PtrVT = Op.getValueType();
01632 
01633   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
01634 
01635   unsigned MOHiFlag, MOLoFlag;
01636   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
01637   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
01638   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
01639   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
01640 }
01641 
01642 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
01643                                               SelectionDAG &DAG) const {
01644 
01645   // FIXME: TLS addresses currently use medium model code sequences,
01646   // which is the most useful form.  Eventually support for small and
01647   // large models could be added if users need it, at the cost of
01648   // additional complexity.
01649   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
01650   SDLoc dl(GA);
01651   const GlobalValue *GV = GA->getGlobal();
01652   EVT PtrVT = getPointerTy();
01653   bool is64bit = Subtarget.isPPC64();
01654 
01655   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
01656 
01657   if (Model == TLSModel::LocalExec) {
01658     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01659                                                PPCII::MO_TPREL_HA);
01660     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01661                                                PPCII::MO_TPREL_LO);
01662     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
01663                                      is64bit ? MVT::i64 : MVT::i32);
01664     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
01665     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
01666   }
01667 
01668   if (Model == TLSModel::InitialExec) {
01669     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01670     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01671                                                 PPCII::MO_TLS);
01672     SDValue GOTPtr;
01673     if (is64bit) {
01674       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01675       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
01676                            PtrVT, GOTReg, TGA);
01677     } else
01678       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
01679     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
01680                                    PtrVT, TGA, GOTPtr);
01681     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
01682   }
01683 
01684   if (Model == TLSModel::GeneralDynamic) {
01685     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01686     SDValue GOTPtr;
01687     if (is64bit) {
01688       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01689       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
01690                                    GOTReg, TGA);
01691     } else {
01692       GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
01693     }
01694     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
01695                                    GOTPtr, TGA);
01696 
01697     // We need a chain node, and don't have one handy.  The underlying
01698     // call has no side effects, so using the function entry node
01699     // suffices.
01700     SDValue Chain = DAG.getEntryNode();
01701     Chain = DAG.getCopyToReg(Chain, dl,
01702                              is64bit ? PPC::X3 : PPC::R3, GOTEntry);
01703     SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
01704                                       is64bit ? MVT::i64 : MVT::i32);
01705     SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
01706                                   PtrVT, ParmReg, TGA);
01707     // The return value from GET_TLS_ADDR really is in X3 already, but
01708     // some hacks are needed here to tie everything together.  The extra
01709     // copies dissolve during subsequent transforms.
01710     Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
01711     return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT);
01712   }
01713 
01714   if (Model == TLSModel::LocalDynamic) {
01715     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01716     SDValue GOTPtr;
01717     if (is64bit) {
01718       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01719       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
01720                            GOTReg, TGA);
01721     } else {
01722       GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
01723     }
01724     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
01725                                    GOTPtr, TGA);
01726 
01727     // We need a chain node, and don't have one handy.  The underlying
01728     // call has no side effects, so using the function entry node
01729     // suffices.
01730     SDValue Chain = DAG.getEntryNode();
01731     Chain = DAG.getCopyToReg(Chain, dl,
01732                              is64bit ? PPC::X3 : PPC::R3, GOTEntry);
01733     SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
01734                                       is64bit ? MVT::i64 : MVT::i32);
01735     SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
01736                                   PtrVT, ParmReg, TGA);
01737     // The return value from GET_TLSLD_ADDR really is in X3 already, but
01738     // some hacks are needed here to tie everything together.  The extra
01739     // copies dissolve during subsequent transforms.
01740     Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
01741     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
01742                                       Chain, ParmReg, TGA);
01743     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
01744   }
01745 
01746   llvm_unreachable("Unknown TLS model!");
01747 }
01748 
01749 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
01750                                               SelectionDAG &DAG) const {
01751   EVT PtrVT = Op.getValueType();
01752   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
01753   SDLoc DL(GSDN);
01754   const GlobalValue *GV = GSDN->getGlobal();
01755 
01756   // 64-bit SVR4 ABI code is always position-independent.
01757   // The actual address of the GlobalValue is stored in the TOC.
01758   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01759     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
01760     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
01761                        DAG.getRegister(PPC::X2, MVT::i64));
01762   }
01763 
01764   unsigned MOHiFlag, MOLoFlag;
01765   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
01766 
01767   if (isPIC && Subtarget.isSVR4ABI()) {
01768     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
01769                                             GSDN->getOffset(),
01770                                             PPCII::MO_PIC_FLAG);
01771     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
01772                        DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
01773   }
01774 
01775   SDValue GAHi =
01776     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
01777   SDValue GALo =
01778     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
01779 
01780   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
01781 
01782   // If the global reference is actually to a non-lazy-pointer, we have to do an
01783   // extra load to get the address of the global.
01784   if (MOHiFlag & PPCII::MO_NLP_FLAG)
01785     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
01786                       false, false, false, 0);
01787   return Ptr;
01788 }
01789 
01790 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
01791   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
01792   SDLoc dl(Op);
01793 
01794   if (Op.getValueType() == MVT::v2i64) {
01795     // When the operands themselves are v2i64 values, we need to do something
01796     // special because VSX has no underlying comparison operations for these.
01797     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
01798       // Equality can be handled by casting to the legal type for Altivec
01799       // comparisons, everything else needs to be expanded.
01800       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
01801         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
01802                  DAG.getSetCC(dl, MVT::v4i32,
01803                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
01804                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
01805                    CC));
01806       }
01807 
01808       return SDValue();
01809     }
01810 
01811     // We handle most of these in the usual way.
01812     return Op;
01813   }
01814 
01815   // If we're comparing for equality to zero, expose the fact that this is
01816   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
01817   // fold the new nodes.
01818   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
01819     if (C->isNullValue() && CC == ISD::SETEQ) {
01820       EVT VT = Op.getOperand(0).getValueType();
01821       SDValue Zext = Op.getOperand(0);
01822       if (VT.bitsLT(MVT::i32)) {
01823         VT = MVT::i32;
01824         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
01825       }
01826       unsigned Log2b = Log2_32(VT.getSizeInBits());
01827       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
01828       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
01829                                 DAG.getConstant(Log2b, MVT::i32));
01830       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
01831     }
01832     // Leave comparisons against 0 and -1 alone for now, since they're usually
01833     // optimized.  FIXME: revisit this when we can custom lower all setcc
01834     // optimizations.
01835     if (C->isAllOnesValue() || C->isNullValue())
01836       return SDValue();
01837   }
01838 
01839   // If we have an integer seteq/setne, turn it into a compare against zero
01840   // by xor'ing the rhs with the lhs, which is faster than setting a
01841   // condition register, reading it back out, and masking the correct bit.  The
01842   // normal approach here uses sub to do this instead of xor.  Using xor exposes
01843   // the result to other bit-twiddling opportunities.
01844   EVT LHSVT = Op.getOperand(0).getValueType();
01845   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
01846     EVT VT = Op.getValueType();
01847     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
01848                                 Op.getOperand(1));
01849     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
01850   }
01851   return SDValue();
01852 }
01853 
01854 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
01855                                       const PPCSubtarget &Subtarget) const {
01856   SDNode *Node = Op.getNode();
01857   EVT VT = Node->getValueType(0);
01858   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
01859   SDValue InChain = Node->getOperand(0);
01860   SDValue VAListPtr = Node->getOperand(1);
01861   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
01862   SDLoc dl(Node);
01863 
01864   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
01865 
01866   // gpr_index
01867   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
01868                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
01869                                     false, false, 0);
01870   InChain = GprIndex.getValue(1);
01871 
01872   if (VT == MVT::i64) {
01873     // Check if GprIndex is even
01874     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
01875                                  DAG.getConstant(1, MVT::i32));
01876     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
01877                                 DAG.getConstant(0, MVT::i32), ISD::SETNE);
01878     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
01879                                           DAG.getConstant(1, MVT::i32));
01880     // Align GprIndex to be even if it isn't
01881     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
01882                            GprIndex);
01883   }
01884 
01885   // fpr index is 1 byte after gpr
01886   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
01887                                DAG.getConstant(1, MVT::i32));
01888 
01889   // fpr
01890   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
01891                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
01892                                     false, false, 0);
01893   InChain = FprIndex.getValue(1);
01894 
01895   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
01896                                        DAG.getConstant(8, MVT::i32));
01897 
01898   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
01899                                         DAG.getConstant(4, MVT::i32));
01900 
01901   // areas
01902   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
01903                                      MachinePointerInfo(), false, false,
01904                                      false, 0);
01905   InChain = OverflowArea.getValue(1);
01906 
01907   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
01908                                     MachinePointerInfo(), false, false,
01909                                     false, 0);
01910   InChain = RegSaveArea.getValue(1);
01911 
01912   // select overflow_area if index > 8
01913   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
01914                             DAG.getConstant(8, MVT::i32), ISD::SETLT);
01915 
01916   // adjustment constant gpr_index * 4/8
01917   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
01918                                     VT.isInteger() ? GprIndex : FprIndex,
01919                                     DAG.getConstant(VT.isInteger() ? 4 : 8,
01920                                                     MVT::i32));
01921 
01922   // OurReg = RegSaveArea + RegConstant
01923   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
01924                                RegConstant);
01925 
01926   // Floating types are 32 bytes into RegSaveArea
01927   if (VT.isFloatingPoint())
01928     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
01929                          DAG.getConstant(32, MVT::i32));
01930 
01931   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
01932   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
01933                                    VT.isInteger() ? GprIndex : FprIndex,
01934                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1,
01935                                                    MVT::i32));
01936 
01937   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
01938                               VT.isInteger() ? VAListPtr : FprPtr,
01939                               MachinePointerInfo(SV),
01940                               MVT::i8, false, false, 0);
01941 
01942   // determine if we should load from reg_save_area or overflow_area
01943   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
01944 
01945   // increase overflow_area by 4/8 if gpr/fpr > 8
01946   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
01947                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
01948                                           MVT::i32));
01949 
01950   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
01951                              OverflowAreaPlusN);
01952 
01953   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
01954                               OverflowAreaPtr,
01955                               MachinePointerInfo(),
01956                               MVT::i32, false, false, 0);
01957 
01958   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
01959                      false, false, false, 0);
01960 }
01961 
01962 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
01963                                        const PPCSubtarget &Subtarget) const {
01964   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
01965 
01966   // We have to copy the entire va_list struct:
01967   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
01968   return DAG.getMemcpy(Op.getOperand(0), Op,
01969                        Op.getOperand(1), Op.getOperand(2),
01970                        DAG.getConstant(12, MVT::i32), 8, false, true,
01971                        MachinePointerInfo(), MachinePointerInfo());
01972 }
01973 
01974 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
01975                                                   SelectionDAG &DAG) const {
01976   return Op.getOperand(0);
01977 }
01978 
01979 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
01980                                                 SelectionDAG &DAG) const {
01981   SDValue Chain = Op.getOperand(0);
01982   SDValue Trmp = Op.getOperand(1); // trampoline
01983   SDValue FPtr = Op.getOperand(2); // nested function
01984   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
01985   SDLoc dl(Op);
01986 
01987   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
01988   bool isPPC64 = (PtrVT == MVT::i64);
01989   Type *IntPtrTy =
01990     DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
01991                                                              *DAG.getContext());
01992 
01993   TargetLowering::ArgListTy Args;
01994   TargetLowering::ArgListEntry Entry;
01995 
01996   Entry.Ty = IntPtrTy;
01997   Entry.Node = Trmp; Args.push_back(Entry);
01998 
01999   // TrampSize == (isPPC64 ? 48 : 40);
02000   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
02001                                isPPC64 ? MVT::i64 : MVT::i32);
02002   Args.push_back(Entry);
02003 
02004   Entry.Node = FPtr; Args.push_back(Entry);
02005   Entry.Node = Nest; Args.push_back(Entry);
02006 
02007   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
02008   TargetLowering::CallLoweringInfo CLI(DAG);
02009   CLI.setDebugLoc(dl).setChain(Chain)
02010     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
02011                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
02012                std::move(Args), 0);
02013 
02014   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02015   return CallResult.second;
02016 }
02017 
02018 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
02019                                         const PPCSubtarget &Subtarget) const {
02020   MachineFunction &MF = DAG.getMachineFunction();
02021   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02022 
02023   SDLoc dl(Op);
02024 
02025   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
02026     // vastart just stores the address of the VarArgsFrameIndex slot into the
02027     // memory location argument.
02028     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02029     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02030     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02031     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02032                         MachinePointerInfo(SV),
02033                         false, false, 0);
02034   }
02035 
02036   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
02037   // We suppose the given va_list is already allocated.
02038   //
02039   // typedef struct {
02040   //  char gpr;     /* index into the array of 8 GPRs
02041   //                 * stored in the register save area
02042   //                 * gpr=0 corresponds to r3,
02043   //                 * gpr=1 to r4, etc.
02044   //                 */
02045   //  char fpr;     /* index into the array of 8 FPRs
02046   //                 * stored in the register save area
02047   //                 * fpr=0 corresponds to f1,
02048   //                 * fpr=1 to f2, etc.
02049   //                 */
02050   //  char *overflow_arg_area;
02051   //                /* location on stack that holds
02052   //                 * the next overflow argument
02053   //                 */
02054   //  char *reg_save_area;
02055   //               /* where r3:r10 and f1:f8 (if saved)
02056   //                * are stored
02057   //                */
02058   // } va_list[1];
02059 
02060 
02061   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
02062   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
02063 
02064 
02065   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02066 
02067   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
02068                                             PtrVT);
02069   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
02070                                  PtrVT);
02071 
02072   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
02073   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
02074 
02075   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
02076   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
02077 
02078   uint64_t FPROffset = 1;
02079   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
02080 
02081   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02082 
02083   // Store first byte : number of int regs
02084   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
02085                                          Op.getOperand(1),
02086                                          MachinePointerInfo(SV),
02087                                          MVT::i8, false, false, 0);
02088   uint64_t nextOffset = FPROffset;
02089   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
02090                                   ConstFPROffset);
02091 
02092   // Store second byte : number of float regs
02093   SDValue secondStore =
02094     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
02095                       MachinePointerInfo(SV, nextOffset), MVT::i8,
02096                       false, false, 0);
02097   nextOffset += StackOffset;
02098   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
02099 
02100   // Store second word : arguments given on stack
02101   SDValue thirdStore =
02102     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
02103                  MachinePointerInfo(SV, nextOffset),
02104                  false, false, 0);
02105   nextOffset += FrameOffset;
02106   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
02107 
02108   // Store third word : arguments given in registers
02109   return DAG.getStore(thirdStore, dl, FR, nextPtr,
02110                       MachinePointerInfo(SV, nextOffset),
02111                       false, false, 0);
02112 
02113 }
02114 
02115 #include "PPCGenCallingConv.inc"
02116 
02117 // Function whose sole purpose is to kill compiler warnings 
02118 // stemming from unused functions included from PPCGenCallingConv.inc.
02119 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
02120   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
02121 }
02122 
02123 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
02124                                       CCValAssign::LocInfo &LocInfo,
02125                                       ISD::ArgFlagsTy &ArgFlags,
02126                                       CCState &State) {
02127   return true;
02128 }
02129 
02130 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
02131                                              MVT &LocVT,
02132                                              CCValAssign::LocInfo &LocInfo,
02133                                              ISD::ArgFlagsTy &ArgFlags,
02134                                              CCState &State) {
02135   static const MCPhysReg ArgRegs[] = {
02136     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02137     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02138   };
02139   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02140 
02141   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
02142 
02143   // Skip one register if the first unallocated register has an even register
02144   // number and there are still argument registers available which have not been
02145   // allocated yet. RegNum is actually an index into ArgRegs, which means we
02146   // need to skip a register if RegNum is odd.
02147   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
02148     State.AllocateReg(ArgRegs[RegNum]);
02149   }
02150 
02151   // Always return false here, as this function only makes sure that the first
02152   // unallocated register has an odd register number and does not actually
02153   // allocate a register for the current argument.
02154   return false;
02155 }
02156 
02157 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
02158                                                MVT &LocVT,
02159                                                CCValAssign::LocInfo &LocInfo,
02160                                                ISD::ArgFlagsTy &ArgFlags,
02161                                                CCState &State) {
02162   static const MCPhysReg ArgRegs[] = {
02163     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02164     PPC::F8
02165   };
02166 
02167   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02168 
02169   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
02170 
02171   // If there is only one Floating-point register left we need to put both f64
02172   // values of a split ppc_fp128 value on the stack.
02173   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
02174     State.AllocateReg(ArgRegs[RegNum]);
02175   }
02176 
02177   // Always return false here, as this function only makes sure that the two f64
02178   // values a ppc_fp128 value is split into are both passed in registers or both
02179   // passed on the stack and does not actually allocate a register for the
02180   // current argument.
02181   return false;
02182 }
02183 
02184 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
02185 /// on Darwin.
02186 static const MCPhysReg *GetFPR() {
02187   static const MCPhysReg FPR[] = {
02188     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02189     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
02190   };
02191 
02192   return FPR;
02193 }
02194 
02195 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
02196 /// the stack.
02197 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
02198                                        unsigned PtrByteSize) {
02199   unsigned ArgSize = ArgVT.getStoreSize();
02200   if (Flags.isByVal())
02201     ArgSize = Flags.getByValSize();
02202 
02203   // Round up to multiples of the pointer size, except for array members,
02204   // which are always packed.
02205   if (!Flags.isInConsecutiveRegs())
02206     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02207 
02208   return ArgSize;
02209 }
02210 
02211 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
02212 /// on the stack.
02213 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
02214                                             ISD::ArgFlagsTy Flags,
02215                                             unsigned PtrByteSize) {
02216   unsigned Align = PtrByteSize;
02217 
02218   // Altivec parameters are padded to a 16 byte boundary.
02219   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02220       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02221       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02222     Align = 16;
02223 
02224   // ByVal parameters are aligned as requested.
02225   if (Flags.isByVal()) {
02226     unsigned BVAlign = Flags.getByValAlign();
02227     if (BVAlign > PtrByteSize) {
02228       if (BVAlign % PtrByteSize != 0)
02229           llvm_unreachable(
02230             "ByVal alignment is not a multiple of the pointer size");
02231 
02232       Align = BVAlign;
02233     }
02234   }
02235 
02236   // Array members are always packed to their original alignment.
02237   if (Flags.isInConsecutiveRegs()) {
02238     // If the array member was split into multiple registers, the first
02239     // needs to be aligned to the size of the full type.  (Except for
02240     // ppcf128, which is only aligned as its f64 components.)
02241     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
02242       Align = OrigVT.getStoreSize();
02243     else
02244       Align = ArgVT.getStoreSize();
02245   }
02246 
02247   return Align;
02248 }
02249 
02250 /// CalculateStackSlotUsed - Return whether this argument will use its
02251 /// stack slot (instead of being passed in registers).  ArgOffset,
02252 /// AvailableFPRs, and AvailableVRs must hold the current argument
02253 /// position, and will be updated to account for this argument.
02254 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
02255                                    ISD::ArgFlagsTy Flags,
02256                                    unsigned PtrByteSize,
02257                                    unsigned LinkageSize,
02258                                    unsigned ParamAreaSize,
02259                                    unsigned &ArgOffset,
02260                                    unsigned &AvailableFPRs,
02261                                    unsigned &AvailableVRs) {
02262   bool UseMemory = false;
02263 
02264   // Respect alignment of argument on the stack.
02265   unsigned Align =
02266     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
02267   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02268   // If there's no space left in the argument save area, we must
02269   // use memory (this check also catches zero-sized arguments).
02270   if (ArgOffset >= LinkageSize + ParamAreaSize)
02271     UseMemory = true;
02272 
02273   // Allocate argument on the stack.
02274   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
02275   if (Flags.isInConsecutiveRegsLast())
02276     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02277   // If we overran the argument save area, we must use memory
02278   // (this check catches arguments passed partially in memory)
02279   if (ArgOffset > LinkageSize + ParamAreaSize)
02280     UseMemory = true;
02281 
02282   // However, if the argument is actually passed in an FPR or a VR,
02283   // we don't use memory after all.
02284   if (!Flags.isByVal()) {
02285     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
02286       if (AvailableFPRs > 0) {
02287         --AvailableFPRs;
02288         return false;
02289       }
02290     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02291         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02292         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02293       if (AvailableVRs > 0) {
02294         --AvailableVRs;
02295         return false;
02296       }
02297   }
02298 
02299   return UseMemory;
02300 }
02301 
02302 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
02303 /// ensure minimum alignment required for target.
02304 static unsigned EnsureStackAlignment(const TargetMachine &Target,
02305                                      unsigned NumBytes) {
02306   unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
02307   unsigned AlignMask = TargetAlign - 1;
02308   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
02309   return NumBytes;
02310 }
02311 
02312 SDValue
02313 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
02314                                         CallingConv::ID CallConv, bool isVarArg,
02315                                         const SmallVectorImpl<ISD::InputArg>
02316                                           &Ins,
02317                                         SDLoc dl, SelectionDAG &DAG,
02318                                         SmallVectorImpl<SDValue> &InVals)
02319                                           const {
02320   if (Subtarget.isSVR4ABI()) {
02321     if (Subtarget.isPPC64())
02322       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
02323                                          dl, DAG, InVals);
02324     else
02325       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
02326                                          dl, DAG, InVals);
02327   } else {
02328     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
02329                                        dl, DAG, InVals);
02330   }
02331 }
02332 
02333 SDValue
02334 PPCTargetLowering::LowerFormalArguments_32SVR4(
02335                                       SDValue Chain,
02336                                       CallingConv::ID CallConv, bool isVarArg,
02337                                       const SmallVectorImpl<ISD::InputArg>
02338                                         &Ins,
02339                                       SDLoc dl, SelectionDAG &DAG,
02340                                       SmallVectorImpl<SDValue> &InVals) const {
02341 
02342   // 32-bit SVR4 ABI Stack Frame Layout:
02343   //              +-----------------------------------+
02344   //        +-->  |            Back chain             |
02345   //        |     +-----------------------------------+
02346   //        |     | Floating-point register save area |
02347   //        |     +-----------------------------------+
02348   //        |     |    General register save area     |
02349   //        |     +-----------------------------------+
02350   //        |     |          CR save word             |
02351   //        |     +-----------------------------------+
02352   //        |     |         VRSAVE save word          |
02353   //        |     +-----------------------------------+
02354   //        |     |         Alignment padding         |
02355   //        |     +-----------------------------------+
02356   //        |     |     Vector register save area     |
02357   //        |     +-----------------------------------+
02358   //        |     |       Local variable space        |
02359   //        |     +-----------------------------------+
02360   //        |     |        Parameter list area        |
02361   //        |     +-----------------------------------+
02362   //        |     |           LR save word            |
02363   //        |     +-----------------------------------+
02364   // SP-->  +---  |            Back chain             |
02365   //              +-----------------------------------+
02366   //
02367   // Specifications:
02368   //   System V Application Binary Interface PowerPC Processor Supplement
02369   //   AltiVec Technology Programming Interface Manual
02370 
02371   MachineFunction &MF = DAG.getMachineFunction();
02372   MachineFrameInfo *MFI = MF.getFrameInfo();
02373   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02374 
02375   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02376   // Potential tail calls could cause overwriting of argument stack slots.
02377   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02378                        (CallConv == CallingConv::Fast));
02379   unsigned PtrByteSize = 4;
02380 
02381   // Assign locations to all of the incoming arguments.
02382   SmallVector<CCValAssign, 16> ArgLocs;
02383   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02384                  getTargetMachine(), ArgLocs, *DAG.getContext());
02385 
02386   // Reserve space for the linkage area on the stack.
02387   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
02388   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
02389 
02390   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
02391 
02392   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02393     CCValAssign &VA = ArgLocs[i];
02394 
02395     // Arguments stored in registers.
02396     if (VA.isRegLoc()) {
02397       const TargetRegisterClass *RC;
02398       EVT ValVT = VA.getValVT();
02399 
02400       switch (ValVT.getSimpleVT().SimpleTy) {
02401         default:
02402           llvm_unreachable("ValVT not supported by formal arguments Lowering");
02403         case MVT::i1:
02404         case MVT::i32:
02405           RC = &PPC::GPRCRegClass;
02406           break;
02407         case MVT::f32:
02408           RC = &PPC::F4RCRegClass;
02409           break;
02410         case MVT::f64:
02411           if (Subtarget.hasVSX())
02412             RC = &PPC::VSFRCRegClass;
02413           else
02414             RC = &PPC::F8RCRegClass;
02415           break;
02416         case MVT::v16i8:
02417         case MVT::v8i16:
02418         case MVT::v4i32:
02419         case MVT::v4f32:
02420           RC = &PPC::VRRCRegClass;
02421           break;
02422         case MVT::v2f64:
02423         case MVT::v2i64:
02424           RC = &PPC::VSHRCRegClass;
02425           break;
02426       }
02427 
02428       // Transform the arguments stored in physical registers into virtual ones.
02429       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02430       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
02431                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
02432 
02433       if (ValVT == MVT::i1)
02434         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
02435 
02436       InVals.push_back(ArgValue);
02437     } else {
02438       // Argument stored in memory.
02439       assert(VA.isMemLoc());
02440 
02441       unsigned ArgSize = VA.getLocVT().getStoreSize();
02442       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
02443                                       isImmutable);
02444 
02445       // Create load nodes to retrieve arguments from the stack.
02446       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02447       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
02448                                    MachinePointerInfo(),
02449                                    false, false, false, 0));
02450     }
02451   }
02452 
02453   // Assign locations to all of the incoming aggregate by value arguments.
02454   // Aggregates passed by value are stored in the local variable space of the
02455   // caller's stack frame, right above the parameter list area.
02456   SmallVector<CCValAssign, 16> ByValArgLocs;
02457   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02458                       getTargetMachine(), ByValArgLocs, *DAG.getContext());
02459 
02460   // Reserve stack space for the allocations in CCInfo.
02461   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
02462 
02463   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
02464 
02465   // Area that is at least reserved in the caller of this function.
02466   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
02467   MinReservedArea = std::max(MinReservedArea, LinkageSize);
02468 
02469   // Set the size that is at least reserved in caller of this function.  Tail
02470   // call optimized function's reserved stack space needs to be aligned so that
02471   // taking the difference between two stack areas will result in an aligned
02472   // stack.
02473   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
02474   FuncInfo->setMinReservedArea(MinReservedArea);
02475 
02476   SmallVector<SDValue, 8> MemOps;
02477 
02478   // If the function takes variable number of arguments, make a frame index for
02479   // the start of the first vararg value... for expansion of llvm.va_start.
02480   if (isVarArg) {
02481     static const MCPhysReg GPArgRegs[] = {
02482       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02483       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02484     };
02485     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
02486 
02487     static const MCPhysReg FPArgRegs[] = {
02488       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02489       PPC::F8
02490     };
02491     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
02492 
02493     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
02494                                                           NumGPArgRegs));
02495     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
02496                                                           NumFPArgRegs));
02497 
02498     // Make room for NumGPArgRegs and NumFPArgRegs.
02499     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
02500                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
02501 
02502     FuncInfo->setVarArgsStackOffset(
02503       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
02504                              CCInfo.getNextStackOffset(), true));
02505 
02506     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
02507     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02508 
02509     // The fixed integer arguments of a variadic function are stored to the
02510     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
02511     // the result of va_next.
02512     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
02513       // Get an existing live-in vreg, or add a new one.
02514       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
02515       if (!VReg)
02516         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
02517 
02518       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02519       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02520                                    MachinePointerInfo(), false, false, 0);
02521       MemOps.push_back(Store);
02522       // Increment the address by four for the next argument to store
02523       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
02524       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02525     }
02526 
02527     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
02528     // is set.
02529     // The double arguments are stored to the VarArgsFrameIndex
02530     // on the stack.
02531     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
02532       // Get an existing live-in vreg, or add a new one.
02533       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
02534       if (!VReg)
02535         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
02536 
02537       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
02538       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02539                                    MachinePointerInfo(), false, false, 0);
02540       MemOps.push_back(Store);
02541       // Increment the address by eight for the next argument to store
02542       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
02543                                          PtrVT);
02544       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02545     }
02546   }
02547 
02548   if (!MemOps.empty())
02549     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02550 
02551   return Chain;
02552 }
02553 
02554 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
02555 // value to MVT::i64 and then truncate to the correct register size.
02556 SDValue
02557 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
02558                                      SelectionDAG &DAG, SDValue ArgVal,
02559                                      SDLoc dl) const {
02560   if (Flags.isSExt())
02561     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
02562                          DAG.getValueType(ObjectVT));
02563   else if (Flags.isZExt())
02564     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
02565                          DAG.getValueType(ObjectVT));
02566 
02567   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
02568 }
02569 
02570 SDValue
02571 PPCTargetLowering::LowerFormalArguments_64SVR4(
02572                                       SDValue Chain,
02573                                       CallingConv::ID CallConv, bool isVarArg,
02574                                       const SmallVectorImpl<ISD::InputArg>
02575                                         &Ins,
02576                                       SDLoc dl, SelectionDAG &DAG,
02577                                       SmallVectorImpl<SDValue> &InVals) const {
02578   // TODO: add description of PPC stack frame format, or at least some docs.
02579   //
02580   bool isELFv2ABI = Subtarget.isELFv2ABI();
02581   bool isLittleEndian = Subtarget.isLittleEndian();
02582   MachineFunction &MF = DAG.getMachineFunction();
02583   MachineFrameInfo *MFI = MF.getFrameInfo();
02584   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02585 
02586   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02587   // Potential tail calls could cause overwriting of argument stack slots.
02588   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02589                        (CallConv == CallingConv::Fast));
02590   unsigned PtrByteSize = 8;
02591 
02592   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
02593                                                           isELFv2ABI);
02594 
02595   static const MCPhysReg GPR[] = {
02596     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
02597     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
02598   };
02599 
02600   static const MCPhysReg *FPR = GetFPR();
02601 
02602   static const MCPhysReg VR[] = {
02603     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
02604     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
02605   };
02606   static const MCPhysReg VSRH[] = {
02607     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
02608     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
02609   };
02610 
02611   const unsigned Num_GPR_Regs = array_lengthof(GPR);
02612   const unsigned Num_FPR_Regs = 13;
02613   const unsigned Num_VR_Regs  = array_lengthof(VR);
02614 
02615   // Do a first pass over the arguments to determine whether the ABI
02616   // guarantees that our caller has allocated the parameter save area
02617   // on its stack frame.  In the ELFv1 ABI, this is always the case;
02618   // in the ELFv2 ABI, it is true if this is a vararg function or if
02619   // any parameter is located in a stack slot.
02620 
02621   bool HasParameterArea = !isELFv2ABI || isVarArg;
02622   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
02623   unsigned NumBytes = LinkageSize;
02624   unsigned AvailableFPRs = Num_FPR_Regs;
02625   unsigned AvailableVRs = Num_VR_Regs;
02626   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
02627     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
02628                                PtrByteSize, LinkageSize, ParamAreaSize,
02629                                NumBytes, AvailableFPRs, AvailableVRs))
02630       HasParameterArea = true;
02631 
02632   // Add DAG nodes to load the arguments or copy them out of registers.  On
02633   // entry to a function on PPC, the arguments start after the linkage area,
02634   // although the first ones are often in registers.
02635 
02636   unsigned ArgOffset = LinkageSize;
02637   unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
02638   SmallVector<SDValue, 8> MemOps;
02639   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
02640   unsigned CurArgIdx = 0;
02641   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
02642     SDValue ArgVal;
02643     bool needsLoad = false;
02644     EVT ObjectVT = Ins[ArgNo].VT;
02645     EVT OrigVT = Ins[ArgNo].ArgVT;
02646     unsigned ObjSize = ObjectVT.getStoreSize();
02647     unsigned ArgSize = ObjSize;
02648     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
02649     std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
02650     CurArgIdx = Ins[ArgNo].OrigArgIndex;
02651 
02652     /* Respect alignment of argument on the stack.  */
02653     unsigned Align =
02654       CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
02655     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02656     unsigned CurArgOffset = ArgOffset;
02657 
02658     /* Compute GPR index associated with argument offset.  */
02659     GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
02660     GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
02661 
02662     // FIXME the codegen can be much improved in some cases.
02663     // We do not have to keep everything in memory.
02664     if (Flags.isByVal()) {
02665       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
02666       ObjSize = Flags.getByValSize();
02667       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02668       // Empty aggregate parameters do not take up registers.  Examples:
02669       //   struct { } a;
02670       //   union  { } b;
02671       //   int c[0];
02672       // etc.  However, we have to provide a place-holder in InVals, so
02673       // pretend we have an 8-byte item at the current address for that
02674       // purpose.
02675       if (!ObjSize) {
02676         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
02677         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02678         InVals.push_back(FIN);
02679         continue;
02680       }
02681 
02682       // Create a stack object covering all stack doublewords occupied
02683       // by the argument.  If the argument is (fully or partially) on
02684       // the stack, or if the argument is fully in registers but the
02685       // caller has allocated the parameter save anyway, we can refer
02686       // directly to the caller's stack frame.  Otherwise, create a
02687       // local copy in our own frame.
02688       int FI;
02689       if (HasParameterArea ||
02690           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
02691         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
02692       else
02693         FI = MFI->CreateStackObject(ArgSize, Align, false);
02694       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02695 
02696       // Handle aggregates smaller than 8 bytes.
02697       if (ObjSize < PtrByteSize) {
02698         // The value of the object is its address, which differs from the
02699         // address of the enclosing doubleword on big-endian systems.
02700         SDValue Arg = FIN;
02701         if (!isLittleEndian) {
02702           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
02703           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
02704         }
02705         InVals.push_back(Arg);
02706 
02707         if (GPR_idx != Num_GPR_Regs) {
02708           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
02709           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02710           SDValue Store;
02711 
02712           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
02713             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
02714                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
02715             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
02716                                       MachinePointerInfo(FuncArg),
02717                                       ObjType, false, false, 0);
02718           } else {
02719             // For sizes that don't fit a truncating store (3, 5, 6, 7),
02720             // store the whole register as-is to the parameter save area
02721             // slot.
02722             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02723                                  MachinePointerInfo(FuncArg),
02724                                  false, false, 0);
02725           }
02726 
02727           MemOps.push_back(Store);
02728         }
02729         // Whether we copied from a register or not, advance the offset
02730         // into the parameter save area by a full doubleword.
02731         ArgOffset += PtrByteSize;
02732         continue;
02733       }
02734 
02735       // The value of the object is its address, which is the address of
02736       // its first stack doubleword.
02737       InVals.push_back(FIN);
02738 
02739       // Store whatever pieces of the object are in registers to memory.
02740       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
02741         if (GPR_idx == Num_GPR_Regs)
02742           break;
02743 
02744         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
02745         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02746         SDValue Addr = FIN;
02747         if (j) {
02748           SDValue Off = DAG.getConstant(j, PtrVT);
02749           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
02750         }
02751         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
02752                                      MachinePointerInfo(FuncArg, j),
02753                                      false, false, 0);
02754         MemOps.push_back(Store);
02755         ++GPR_idx;
02756       }
02757       ArgOffset += ArgSize;
02758       continue;
02759     }
02760 
02761     switch (ObjectVT.getSimpleVT().SimpleTy) {
02762     default: llvm_unreachable("Unhandled argument type!");
02763     case MVT::i1:
02764     case MVT::i32:
02765     case MVT::i64:
02766       // These can be scalar arguments or elements of an integer array type
02767       // passed directly.  Clang may use those instead of "byval" aggregate
02768       // types to avoid forcing arguments to memory unnecessarily.
02769       if (GPR_idx != Num_GPR_Regs) {
02770         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
02771         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
02772 
02773         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
02774           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
02775           // value to MVT::i64 and then truncate to the correct register size.
02776           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
02777       } else {
02778         needsLoad = true;
02779         ArgSize = PtrByteSize;
02780       }
02781       ArgOffset += 8;
02782       break;
02783 
02784     case MVT::f32:
02785     case MVT::f64:
02786       // These can be scalar arguments or elements of a float array type
02787       // passed directly.  The latter are used to implement ELFv2 homogenous
02788       // float aggregates.
02789       if (FPR_idx != Num_FPR_Regs) {
02790         unsigned VReg;
02791 
02792         if (ObjectVT == MVT::f32)
02793           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
02794         else
02795           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
02796                                             &PPC::VSFRCRegClass :
02797                                             &PPC::F8RCRegClass);
02798 
02799         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
02800         ++FPR_idx;
02801       } else if (GPR_idx != Num_GPR_Regs) {
02802         // This can only ever happen in the presence of f32 array types,
02803         // since otherwise we never run out of FPRs before running out
02804         // of GPRs.
02805         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
02806         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
02807 
02808         if (ObjectVT == MVT::f32) {
02809           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
02810             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
02811                                  DAG.getConstant(32, MVT::i32));
02812           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
02813         }
02814 
02815         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
02816       } else {
02817         needsLoad = true;
02818       }
02819 
02820       // When passing an array of floats, the array occupies consecutive
02821       // space in the argument area; only round up to the next doubleword
02822       // at the end of the array.  Otherwise, each float takes 8 bytes.
02823       ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
02824       ArgOffset += ArgSize;
02825       if (Flags.isInConsecutiveRegsLast())
02826         ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02827       break;
02828     case MVT::v4f32:
02829     case MVT::v4i32:
02830     case MVT::v8i16:
02831     case MVT::v16i8:
02832     case MVT::v2f64:
02833     case MVT::v2i64:
02834       // These can be scalar arguments or elements of a vector array type
02835       // passed directly.  The latter are used to implement ELFv2 homogenous
02836       // vector aggregates.
02837       if (VR_idx != Num_VR_Regs) {
02838         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
02839                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
02840                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
02841         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
02842         ++VR_idx;
02843       } else {
02844         needsLoad = true;
02845       }
02846       ArgOffset += 16;
02847       break;
02848     }
02849 
02850     // We need to load the argument to a virtual register if we determined
02851     // above that we ran out of physical registers of the appropriate type.
02852     if (needsLoad) {
02853       if (ObjSize < ArgSize && !isLittleEndian)
02854         CurArgOffset += ArgSize - ObjSize;
02855       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
02856       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02857       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
02858                            false, false, false, 0);
02859     }
02860 
02861     InVals.push_back(ArgVal);
02862   }
02863 
02864   // Area that is at least reserved in the caller of this function.
02865   unsigned MinReservedArea;
02866   if (HasParameterArea)
02867     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
02868   else
02869     MinReservedArea = LinkageSize;
02870 
02871   // Set the size that is at least reserved in caller of this function.  Tail
02872   // call optimized functions' reserved stack space needs to be aligned so that
02873   // taking the difference between two stack areas will result in an aligned
02874   // stack.
02875   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
02876   FuncInfo->setMinReservedArea(MinReservedArea);
02877 
02878   // If the function takes variable number of arguments, make a frame index for
02879   // the start of the first vararg value... for expansion of llvm.va_start.
02880   if (isVarArg) {
02881     int Depth = ArgOffset;
02882 
02883     FuncInfo->setVarArgsFrameIndex(
02884       MFI->CreateFixedObject(PtrByteSize, Depth, true));
02885     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02886 
02887     // If this function is vararg, store any remaining integer argument regs
02888     // to their spots on the stack so that they may be loaded by deferencing the
02889     // result of va_next.
02890     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
02891          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
02892       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
02893       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02894       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02895                                    MachinePointerInfo(), false, false, 0);
02896       MemOps.push_back(Store);
02897       // Increment the address by four for the next argument to store
02898       SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
02899       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02900     }
02901   }
02902 
02903   if (!MemOps.empty())
02904     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02905 
02906   return Chain;
02907 }
02908 
02909 SDValue
02910 PPCTargetLowering::LowerFormalArguments_Darwin(
02911                                       SDValue Chain,
02912                                       CallingConv::ID CallConv, bool isVarArg,
02913                                       const SmallVectorImpl<ISD::InputArg>
02914                                         &Ins,
02915                                       SDLoc dl, SelectionDAG &DAG,
02916                                       SmallVectorImpl<SDValue> &InVals) const {
02917   // TODO: add description of PPC stack frame format, or at least some docs.
02918   //
02919   MachineFunction &MF = DAG.getMachineFunction();
02920   MachineFrameInfo *MFI = MF.getFrameInfo();
02921   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02922 
02923   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02924   bool isPPC64 = PtrVT == MVT::i64;
02925   // Potential tail calls could cause overwriting of argument stack slots.
02926   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02927                        (CallConv == CallingConv::Fast));
02928   unsigned PtrByteSize = isPPC64 ? 8 : 4;
02929 
02930   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
02931                                                           false);
02932   unsigned ArgOffset = LinkageSize;
02933   // Area that is at least reserved in caller of this function.
02934   unsigned MinReservedArea = ArgOffset;
02935 
02936   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
02937     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02938     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02939   };
02940   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
02941     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
02942     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
02943   };
02944 
02945   static const MCPhysReg *FPR = GetFPR();
02946 
02947   static const MCPhysReg VR[] = {
02948     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
02949     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
02950   };
02951 
02952   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
02953   const unsigned Num_FPR_Regs = 13;
02954   const unsigned Num_VR_Regs  = array_lengthof( VR);
02955 
02956   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
02957 
02958   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
02959 
02960   // In 32-bit non-varargs functions, the stack space for vectors is after the
02961   // stack space for non-vectors.  We do not use this space unless we have
02962   // too many vectors to fit in registers, something that only occurs in
02963   // constructed examples:), but we have to walk the arglist to figure
02964   // that out...for the pathological case, compute VecArgOffset as the
02965   // start of the vector parameter area.  Computing VecArgOffset is the
02966   // entire point of the following loop.
02967   unsigned VecArgOffset = ArgOffset;
02968   if (!isVarArg && !isPPC64) {
02969     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
02970          ++ArgNo) {
02971       EVT ObjectVT = Ins[ArgNo].VT;
02972       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
02973 
02974       if (Flags.isByVal()) {
02975         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
02976         unsigned ObjSize = Flags.getByValSize();
02977         unsigned ArgSize =
02978                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02979         VecArgOffset += ArgSize;
02980         continue;
02981       }
02982 
02983       switch(ObjectVT.getSimpleVT().SimpleTy) {
02984       default: llvm_unreachable("Unhandled argument type!");
02985       case MVT::i1:
02986       case MVT::i32:
02987       case MVT::f32:
02988         VecArgOffset += 4;
02989         break;
02990       case MVT::i64:  // PPC64
02991       case MVT::f64:
02992         // FIXME: We are guaranteed to be !isPPC64 at this point.
02993         // Does MVT::i64 apply?
02994         VecArgOffset += 8;
02995         break;
02996       case MVT::v4f32:
02997       case MVT::v4i32:
02998       case MVT::v8i16:
02999       case MVT::v16i8:
03000         // Nothing to do, we're only looking at Nonvector args here.
03001         break;
03002       }
03003     }
03004   }
03005   // We've found where the vector parameter area in memory is.  Skip the
03006   // first 12 parameters; these don't use that memory.
03007   VecArgOffset = ((VecArgOffset+15)/16)*16;
03008   VecArgOffset += 12*16;
03009 
03010   // Add DAG nodes to load the arguments or copy them out of registers.  On
03011   // entry to a function on PPC, the arguments start after the linkage area,
03012   // although the first ones are often in registers.
03013 
03014   SmallVector<SDValue, 8> MemOps;
03015   unsigned nAltivecParamsAtEnd = 0;
03016   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
03017   unsigned CurArgIdx = 0;
03018   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
03019     SDValue ArgVal;
03020     bool needsLoad = false;
03021     EVT ObjectVT = Ins[ArgNo].VT;
03022     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
03023     unsigned ArgSize = ObjSize;
03024     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03025     std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
03026     CurArgIdx = Ins[ArgNo].OrigArgIndex;
03027 
03028     unsigned CurArgOffset = ArgOffset;
03029 
03030     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
03031     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
03032         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
03033       if (isVarArg || isPPC64) {
03034         MinReservedArea = ((MinReservedArea+15)/16)*16;
03035         MinReservedArea += CalculateStackSlotSize(ObjectVT,
03036                                                   Flags,
03037                                                   PtrByteSize);
03038       } else  nAltivecParamsAtEnd++;
03039     } else
03040       // Calculate min reserved area.
03041       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
03042                                                 Flags,
03043                                                 PtrByteSize);
03044 
03045     // FIXME the codegen can be much improved in some cases.
03046     // We do not have to keep everything in memory.
03047     if (Flags.isByVal()) {
03048       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
03049       ObjSize = Flags.getByValSize();
03050       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03051       // Objects of size 1 and 2 are right justified, everything else is
03052       // left justified.  This means the memory address is adjusted forwards.
03053       if (ObjSize==1 || ObjSize==2) {
03054         CurArgOffset = CurArgOffset + (4 - ObjSize);
03055       }
03056       // The value of the object is its address.
03057       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
03058       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03059       InVals.push_back(FIN);
03060       if (ObjSize==1 || ObjSize==2) {
03061         if (GPR_idx != Num_GPR_Regs) {
03062           unsigned VReg;
03063           if (isPPC64)
03064             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03065           else
03066             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03067           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03068           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
03069           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
03070                                             MachinePointerInfo(FuncArg),
03071                                             ObjType, false, false, 0);
03072           MemOps.push_back(Store);
03073           ++GPR_idx;
03074         }
03075 
03076         ArgOffset += PtrByteSize;
03077 
03078         continue;
03079       }
03080       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03081         // Store whatever pieces of the object are in registers
03082         // to memory.  ArgOffset will be the address of the beginning
03083         // of the object.
03084         if (GPR_idx != Num_GPR_Regs) {
03085           unsigned VReg;
03086           if (isPPC64)
03087             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03088           else
03089             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03090           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
03091           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03092           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03093           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03094                                        MachinePointerInfo(FuncArg, j),
03095                                        false, false, 0);
03096           MemOps.push_back(Store);
03097           ++GPR_idx;
03098           ArgOffset += PtrByteSize;
03099         } else {
03100           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
03101           break;
03102         }
03103       }
03104       continue;
03105     }
03106 
03107     switch (ObjectVT.getSimpleVT().SimpleTy) {
03108     default: llvm_unreachable("Unhandled argument type!");
03109     case MVT::i1:
03110     case MVT::i32:
03111       if (!isPPC64) {
03112         if (GPR_idx != Num_GPR_Regs) {
03113           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03114           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
03115 
03116           if (ObjectVT == MVT::i1)
03117             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
03118 
03119           ++GPR_idx;
03120         } else {
03121           needsLoad = true;
03122           ArgSize = PtrByteSize;
03123         }
03124         // All int arguments reserve stack space in the Darwin ABI.
03125         ArgOffset += PtrByteSize;
03126         break;
03127       }
03128       // FALLTHROUGH
03129     case MVT::i64:  // PPC64
03130       if (GPR_idx != Num_GPR_Regs) {
03131         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03132         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03133 
03134         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03135           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03136           // value to MVT::i64 and then truncate to the correct register size.
03137           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03138 
03139         ++GPR_idx;
03140       } else {
03141         needsLoad = true;
03142         ArgSize = PtrByteSize;
03143       }
03144       // All int arguments reserve stack space in the Darwin ABI.
03145       ArgOffset += 8;
03146       break;
03147 
03148     case MVT::f32:
03149     case MVT::f64:
03150       // Every 4 bytes of argument space consumes one of the GPRs available for
03151       // argument passing.
03152       if (GPR_idx != Num_GPR_Regs) {
03153         ++GPR_idx;
03154         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
03155           ++GPR_idx;
03156       }
03157       if (FPR_idx != Num_FPR_Regs) {
03158         unsigned VReg;
03159 
03160         if (ObjectVT == MVT::f32)
03161           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03162         else
03163           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
03164 
03165         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03166         ++FPR_idx;
03167       } else {
03168         needsLoad = true;
03169       }
03170 
03171       // All FP arguments reserve stack space in the Darwin ABI.
03172       ArgOffset += isPPC64 ? 8 : ObjSize;
03173       break;
03174     case MVT::v4f32:
03175     case MVT::v4i32:
03176     case MVT::v8i16:
03177     case MVT::v16i8:
03178       // Note that vector arguments in registers don't reserve stack space,
03179       // except in varargs functions.
03180       if (VR_idx != Num_VR_Regs) {
03181         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03182         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03183         if (isVarArg) {
03184           while ((ArgOffset % 16) != 0) {
03185             ArgOffset += PtrByteSize;
03186             if (GPR_idx != Num_GPR_Regs)
03187               GPR_idx++;
03188           }
03189           ArgOffset += 16;
03190           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
03191         }
03192         ++VR_idx;
03193       } else {
03194         if (!isVarArg && !isPPC64) {
03195           // Vectors go after all the nonvectors.
03196           CurArgOffset = VecArgOffset;
03197           VecArgOffset += 16;
03198         } else {
03199           // Vectors are aligned.
03200           ArgOffset = ((ArgOffset+15)/16)*16;
03201           CurArgOffset = ArgOffset;
03202           ArgOffset += 16;
03203         }
03204         needsLoad = true;
03205       }
03206       break;
03207     }
03208 
03209     // We need to load the argument to a virtual register if we determined above
03210     // that we ran out of physical registers of the appropriate type.
03211     if (needsLoad) {
03212       int FI = MFI->CreateFixedObject(ObjSize,
03213                                       CurArgOffset + (ArgSize - ObjSize),
03214                                       isImmutable);
03215       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03216       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03217                            false, false, false, 0);
03218     }
03219 
03220     InVals.push_back(ArgVal);
03221   }
03222 
03223   // Allow for Altivec parameters at the end, if needed.
03224   if (nAltivecParamsAtEnd) {
03225     MinReservedArea = ((MinReservedArea+15)/16)*16;
03226     MinReservedArea += 16*nAltivecParamsAtEnd;
03227   }
03228 
03229   // Area that is at least reserved in the caller of this function.
03230   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
03231 
03232   // Set the size that is at least reserved in caller of this function.  Tail
03233   // call optimized functions' reserved stack space needs to be aligned so that
03234   // taking the difference between two stack areas will result in an aligned
03235   // stack.
03236   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
03237   FuncInfo->setMinReservedArea(MinReservedArea);
03238 
03239   // If the function takes variable number of arguments, make a frame index for
03240   // the start of the first vararg value... for expansion of llvm.va_start.
03241   if (isVarArg) {
03242     int Depth = ArgOffset;
03243 
03244     FuncInfo->setVarArgsFrameIndex(
03245       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
03246                              Depth, true));
03247     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03248 
03249     // If this function is vararg, store any remaining integer argument regs
03250     // to their spots on the stack so that they may be loaded by deferencing the
03251     // result of va_next.
03252     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
03253       unsigned VReg;
03254 
03255       if (isPPC64)
03256         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03257       else
03258         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03259 
03260       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03261       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03262                                    MachinePointerInfo(), false, false, 0);
03263       MemOps.push_back(Store);
03264       // Increment the address by four for the next argument to store
03265       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
03266       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03267     }
03268   }
03269 
03270   if (!MemOps.empty())
03271     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03272 
03273   return Chain;
03274 }
03275 
03276 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
03277 /// adjusted to accommodate the arguments for the tailcall.
03278 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
03279                                    unsigned ParamSize) {
03280 
03281   if (!isTailCall) return 0;
03282 
03283   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
03284   unsigned CallerMinReservedArea = FI->getMinReservedArea();
03285   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
03286   // Remember only if the new adjustement is bigger.
03287   if (SPDiff < FI->getTailCallSPDelta())
03288     FI->setTailCallSPDelta(SPDiff);
03289 
03290   return SPDiff;
03291 }
03292 
03293 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03294 /// for tail call optimization. Targets which want to do tail call
03295 /// optimization should implement this function.
03296 bool
03297 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03298                                                      CallingConv::ID CalleeCC,
03299                                                      bool isVarArg,
03300                                       const SmallVectorImpl<ISD::InputArg> &Ins,
03301                                                      SelectionDAG& DAG) const {
03302   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
03303     return false;
03304 
03305   // Variable argument functions are not supported.
03306   if (isVarArg)
03307     return false;
03308 
03309   MachineFunction &MF = DAG.getMachineFunction();
03310   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
03311   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
03312     // Functions containing by val parameters are not supported.
03313     for (unsigned i = 0; i != Ins.size(); i++) {
03314        ISD::ArgFlagsTy Flags = Ins[i].Flags;
03315        if (Flags.isByVal()) return false;
03316     }
03317 
03318     // Non-PIC/GOT tail calls are supported.
03319     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
03320       return true;
03321 
03322     // At the moment we can only do local tail calls (in same module, hidden
03323     // or protected) if we are generating PIC.
03324     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
03325       return G->getGlobal()->hasHiddenVisibility()
03326           || G->getGlobal()->hasProtectedVisibility();
03327   }
03328 
03329   return false;
03330 }
03331 
03332 /// isCallCompatibleAddress - Return the immediate to use if the specified
03333 /// 32-bit value is representable in the immediate field of a BxA instruction.
03334 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
03335   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
03336   if (!C) return nullptr;
03337 
03338   int Addr = C->getZExtValue();
03339   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
03340       SignExtend32<26>(Addr) != Addr)
03341     return nullptr;  // Top 6 bits have to be sext of immediate.
03342 
03343   return DAG.getConstant((int)C->getZExtValue() >> 2,
03344                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
03345 }
03346 
03347 namespace {
03348 
03349 struct TailCallArgumentInfo {
03350   SDValue Arg;
03351   SDValue FrameIdxOp;
03352   int       FrameIdx;
03353 
03354   TailCallArgumentInfo() : FrameIdx(0) {}
03355 };
03356 
03357 }
03358 
03359 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
03360 static void
03361 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
03362                                            SDValue Chain,
03363                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
03364                    SmallVectorImpl<SDValue> &MemOpChains,
03365                    SDLoc dl) {
03366   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
03367     SDValue Arg = TailCallArgs[i].Arg;
03368     SDValue FIN = TailCallArgs[i].FrameIdxOp;
03369     int FI = TailCallArgs[i].FrameIdx;
03370     // Store relative to framepointer.
03371     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
03372                                        MachinePointerInfo::getFixedStack(FI),
03373                                        false, false, 0));
03374   }
03375 }
03376 
03377 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
03378 /// the appropriate stack slot for the tail call optimized function call.
03379 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
03380                                                MachineFunction &MF,
03381                                                SDValue Chain,
03382                                                SDValue OldRetAddr,
03383                                                SDValue OldFP,
03384                                                int SPDiff,
03385                                                bool isPPC64,
03386                                                bool isDarwinABI,
03387                                                SDLoc dl) {
03388   if (SPDiff) {
03389     // Calculate the new stack slot for the return address.
03390     int SlotSize = isPPC64 ? 8 : 4;
03391     int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
03392                                                                    isDarwinABI);
03393     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03394                                                           NewRetAddrLoc, true);
03395     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03396     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
03397     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
03398                          MachinePointerInfo::getFixedStack(NewRetAddr),
03399                          false, false, 0);
03400 
03401     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
03402     // slot as the FP is never overwritten.
03403     if (isDarwinABI) {
03404       int NewFPLoc =
03405         SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
03406       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
03407                                                           true);
03408       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
03409       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
03410                            MachinePointerInfo::getFixedStack(NewFPIdx),
03411                            false, false, 0);
03412     }
03413   }
03414   return Chain;
03415 }
03416 
03417 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
03418 /// the position of the argument.
03419 static void
03420 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
03421                          SDValue Arg, int SPDiff, unsigned ArgOffset,
03422                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
03423   int Offset = ArgOffset + SPDiff;
03424   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
03425   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03426   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03427   SDValue FIN = DAG.getFrameIndex(FI, VT);
03428   TailCallArgumentInfo Info;
03429   Info.Arg = Arg;
03430   Info.FrameIdxOp = FIN;
03431   Info.FrameIdx = FI;
03432   TailCallArguments.push_back(Info);
03433 }
03434 
03435 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
03436 /// stack slot. Returns the chain as result and the loaded frame pointers in
03437 /// LROpOut/FPOpout. Used when tail calling.
03438 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
03439                                                         int SPDiff,
03440                                                         SDValue Chain,
03441                                                         SDValue &LROpOut,
03442                                                         SDValue &FPOpOut,
03443                                                         bool isDarwinABI,
03444                                                         SDLoc dl) const {
03445   if (SPDiff) {
03446     // Load the LR and FP stack slot for later adjusting.
03447     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
03448     LROpOut = getReturnAddrFrameIndex(DAG);
03449     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
03450                           false, false, false, 0);
03451     Chain = SDValue(LROpOut.getNode(), 1);
03452 
03453     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
03454     // slot as the FP is never overwritten.
03455     if (isDarwinABI) {
03456       FPOpOut = getFramePointerFrameIndex(DAG);
03457       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
03458                             false, false, false, 0);
03459       Chain = SDValue(FPOpOut.getNode(), 1);
03460     }
03461   }
03462   return Chain;
03463 }
03464 
03465 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
03466 /// by "Src" to address "Dst" of size "Size".  Alignment information is
03467 /// specified by the specific parameter attribute. The copy will be passed as
03468 /// a byval function parameter.
03469 /// Sometimes what we are copying is the end of a larger object, the part that
03470 /// does not fit in registers.
03471 static SDValue
03472 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
03473                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
03474                           SDLoc dl) {
03475   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
03476   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
03477                        false, false, MachinePointerInfo(),
03478                        MachinePointerInfo());
03479 }
03480 
03481 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
03482 /// tail calls.
03483 static void
03484 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
03485                  SDValue Arg, SDValue PtrOff, int SPDiff,
03486                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
03487                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
03488                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
03489                  SDLoc dl) {
03490   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03491   if (!isTailCall) {
03492     if (isVector) {
03493       SDValue StackPtr;
03494       if (isPPC64)
03495         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
03496       else
03497         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
03498       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
03499                            DAG.getConstant(ArgOffset, PtrVT));
03500     }
03501     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
03502                                        MachinePointerInfo(), false, false, 0));
03503   // Calculate and remember argument location.
03504   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
03505                                   TailCallArguments);
03506 }
03507 
03508 static
03509 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
03510                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
03511                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
03512                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
03513   MachineFunction &MF = DAG.getMachineFunction();
03514 
03515   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
03516   // might overwrite each other in case of tail call optimization.
03517   SmallVector<SDValue, 8> MemOpChains2;
03518   // Do not flag preceding copytoreg stuff together with the following stuff.
03519   InFlag = SDValue();
03520   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
03521                                     MemOpChains2, dl);
03522   if (!MemOpChains2.empty())
03523     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03524 
03525   // Store the return address to the appropriate stack slot.
03526   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
03527                                         isPPC64, isDarwinABI, dl);
03528 
03529   // Emit callseq_end just before tailcall node.
03530   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
03531                              DAG.getIntPtrConstant(0, true), InFlag, dl);
03532   InFlag = Chain.getValue(1);
03533 }
03534 
03535 static
03536 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
03537                      SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
03538                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
03539                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
03540                      const PPCSubtarget &Subtarget) {
03541 
03542   bool isPPC64 = Subtarget.isPPC64();
03543   bool isSVR4ABI = Subtarget.isSVR4ABI();
03544   bool isELFv2ABI = Subtarget.isELFv2ABI();
03545 
03546   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03547   NodeTys.push_back(MVT::Other);   // Returns a chain
03548   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
03549 
03550   unsigned CallOpc = PPCISD::CALL;
03551 
03552   bool needIndirectCall = true;
03553   if (!isSVR4ABI || !isPPC64)
03554     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
03555       // If this is an absolute destination address, use the munged value.
03556       Callee = SDValue(Dest, 0);
03557       needIndirectCall = false;
03558     }
03559 
03560   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
03561     // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
03562     // Use indirect calls for ALL functions calls in JIT mode, since the
03563     // far-call stubs may be outside relocation limits for a BL instruction.
03564     if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
03565       unsigned OpFlags = 0;
03566       if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03567           (Subtarget.getTargetTriple().isMacOSX() &&
03568            Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
03569           (G->getGlobal()->isDeclaration() ||
03570            G->getGlobal()->isWeakForLinker())) ||
03571           (Subtarget.isTargetELF() && !isPPC64 &&
03572            !G->getGlobal()->hasLocalLinkage() &&
03573            DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03574         // PC-relative references to external symbols should go through $stub,
03575         // unless we're building with the leopard linker or later, which
03576         // automatically synthesizes these stubs.
03577         OpFlags = PPCII::MO_PLT_OR_STUB;
03578       }
03579 
03580       // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
03581       // every direct call is) turn it into a TargetGlobalAddress /
03582       // TargetExternalSymbol node so that legalize doesn't hack it.
03583       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
03584                                           Callee.getValueType(),
03585                                           0, OpFlags);
03586       needIndirectCall = false;
03587     }
03588   }
03589 
03590   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03591     unsigned char OpFlags = 0;
03592 
03593     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03594          (Subtarget.getTargetTriple().isMacOSX() &&
03595           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
03596         (Subtarget.isTargetELF() && !isPPC64 &&
03597          DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
03598       // PC-relative references to external symbols should go through $stub,
03599       // unless we're building with the leopard linker or later, which
03600       // automatically synthesizes these stubs.
03601       OpFlags = PPCII::MO_PLT_OR_STUB;
03602     }
03603 
03604     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
03605                                          OpFlags);
03606     needIndirectCall = false;
03607   }
03608 
03609   if (needIndirectCall) {
03610     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
03611     // to do the call, we can't use PPCISD::CALL.
03612     SDValue MTCTROps[] = {Chain, Callee, InFlag};
03613 
03614     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
03615       // Function pointers in the 64-bit SVR4 ABI do not point to the function
03616       // entry point, but to the function descriptor (the function entry point
03617       // address is part of the function descriptor though).
03618       // The function descriptor is a three doubleword structure with the
03619       // following fields: function entry point, TOC base address and
03620       // environment pointer.
03621       // Thus for a call through a function pointer, the following actions need
03622       // to be performed:
03623       //   1. Save the TOC of the caller in the TOC save area of its stack
03624       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
03625       //   2. Load the address of the function entry point from the function
03626       //      descriptor.
03627       //   3. Load the TOC of the callee from the function descriptor into r2.
03628       //   4. Load the environment pointer from the function descriptor into
03629       //      r11.
03630       //   5. Branch to the function entry point address.
03631       //   6. On return of the callee, the TOC of the caller needs to be
03632       //      restored (this is done in FinishCall()).
03633       //
03634       // All those operations are flagged together to ensure that no other
03635       // operations can be scheduled in between. E.g. without flagging the
03636       // operations together, a TOC access in the caller could be scheduled
03637       // between the load of the callee TOC and the branch to the callee, which
03638       // results in the TOC access going through the TOC of the callee instead
03639       // of going through the TOC of the caller, which leads to incorrect code.
03640 
03641       // Load the address of the function entry point from the function
03642       // descriptor.
03643       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
03644       SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
03645                               makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
03646       Chain = LoadFuncPtr.getValue(1);
03647       InFlag = LoadFuncPtr.getValue(2);
03648 
03649       // Load environment pointer into r11.
03650       // Offset of the environment pointer within the function descriptor.
03651       SDValue PtrOff = DAG.getIntPtrConstant(16);
03652 
03653       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
03654       SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
03655                                        InFlag);
03656       Chain = LoadEnvPtr.getValue(1);
03657       InFlag = LoadEnvPtr.getValue(2);
03658 
03659       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
03660                                         InFlag);
03661       Chain = EnvVal.getValue(0);
03662       InFlag = EnvVal.getValue(1);
03663 
03664       // Load TOC of the callee into r2. We are using a target-specific load
03665       // with r2 hard coded, because the result of a target-independent load
03666       // would never go directly into r2, since r2 is a reserved register (which
03667       // prevents the register allocator from allocating it), resulting in an
03668       // additional register being allocated and an unnecessary move instruction
03669       // being generated.
03670       VTs = DAG.getVTList(MVT::Other, MVT::Glue);
03671       SDValue TOCOff = DAG.getIntPtrConstant(8);
03672       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
03673       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
03674                                        AddTOC, InFlag);
03675       Chain = LoadTOCPtr.getValue(0);
03676       InFlag = LoadTOCPtr.getValue(1);
03677 
03678       MTCTROps[0] = Chain;
03679       MTCTROps[1] = LoadFuncPtr;
03680       MTCTROps[2] = InFlag;
03681     }
03682 
03683     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
03684                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
03685     InFlag = Chain.getValue(1);
03686 
03687     NodeTys.clear();
03688     NodeTys.push_back(MVT::Other);
03689     NodeTys.push_back(MVT::Glue);
03690     Ops.push_back(Chain);
03691     CallOpc = PPCISD::BCTRL;
03692     Callee.setNode(nullptr);
03693     // Add use of X11 (holding environment pointer)
03694     if (isSVR4ABI && isPPC64 && !isELFv2ABI)
03695       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
03696     // Add CTR register as callee so a bctr can be emitted later.
03697     if (isTailCall)
03698       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
03699   }
03700 
03701   // If this is a direct call, pass the chain and the callee.
03702   if (Callee.getNode()) {
03703     Ops.push_back(Chain);
03704     Ops.push_back(Callee);
03705   }
03706   // If this is a tail call add stack pointer delta.
03707   if (isTailCall)
03708     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
03709 
03710   // Add argument registers to the end of the list so that they are known live
03711   // into the call.
03712   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03713     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03714                                   RegsToPass[i].second.getValueType()));
03715 
03716   // Direct calls in the ELFv2 ABI need the TOC register live into the call.
03717   if (Callee.getNode() && isELFv2ABI)
03718     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
03719 
03720   return CallOpc;
03721 }
03722 
03723 static
03724 bool isLocalCall(const SDValue &Callee)
03725 {
03726   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
03727     return !G->getGlobal()->isDeclaration() &&
03728            !G->getGlobal()->isWeakForLinker();
03729   return false;
03730 }
03731 
03732 SDValue
03733 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
03734                                    CallingConv::ID CallConv, bool isVarArg,
03735                                    const SmallVectorImpl<ISD::InputArg> &Ins,
03736                                    SDLoc dl, SelectionDAG &DAG,
03737                                    SmallVectorImpl<SDValue> &InVals) const {
03738 
03739   SmallVector<CCValAssign, 16> RVLocs;
03740   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
03741                     getTargetMachine(), RVLocs, *DAG.getContext());
03742   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
03743 
03744   // Copy all of the result registers out of their specified physreg.
03745   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03746     CCValAssign &VA = RVLocs[i];
03747     assert(VA.isRegLoc() && "Can only return in registers!");
03748 
03749     SDValue Val = DAG.getCopyFromReg(Chain, dl,
03750                                      VA.getLocReg(), VA.getLocVT(), InFlag);
03751     Chain = Val.getValue(1);
03752     InFlag = Val.getValue(2);
03753 
03754     switch (VA.getLocInfo()) {
03755     default: llvm_unreachable("Unknown loc info!");
03756     case CCValAssign::Full: break;
03757     case CCValAssign::AExt:
03758       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
03759       break;
03760     case CCValAssign::ZExt:
03761       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
03762                         DAG.getValueType(VA.getValVT()));
03763       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
03764       break;
03765     case CCValAssign::SExt:
03766       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
03767                         DAG.getValueType(VA.getValVT()));
03768       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
03769       break;
03770     }
03771 
03772     InVals.push_back(Val);
03773   }
03774 
03775   return Chain;
03776 }
03777 
03778 SDValue
03779 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
03780                               bool isTailCall, bool isVarArg,
03781                               SelectionDAG &DAG,
03782                               SmallVector<std::pair<unsigned, SDValue>, 8>
03783                                 &RegsToPass,
03784                               SDValue InFlag, SDValue Chain,
03785                               SDValue &Callee,
03786                               int SPDiff, unsigned NumBytes,
03787                               const SmallVectorImpl<ISD::InputArg> &Ins,
03788                               SmallVectorImpl<SDValue> &InVals) const {
03789 
03790   bool isELFv2ABI = Subtarget.isELFv2ABI();
03791   std::vector<EVT> NodeTys;
03792   SmallVector<SDValue, 8> Ops;
03793   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
03794                                  isTailCall, RegsToPass, Ops, NodeTys,
03795                                  Subtarget);
03796 
03797   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
03798   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
03799     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
03800 
03801   // When performing tail call optimization the callee pops its arguments off
03802   // the stack. Account for this here so these bytes can be pushed back on in
03803   // PPCFrameLowering::eliminateCallFramePseudoInstr.
03804   int BytesCalleePops =
03805     (CallConv == CallingConv::Fast &&
03806      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
03807 
03808   // Add a register mask operand representing the call-preserved registers.
03809   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
03810   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
03811   assert(Mask && "Missing call preserved mask for calling convention");
03812   Ops.push_back(DAG.getRegisterMask(Mask));
03813 
03814   if (InFlag.getNode())
03815     Ops.push_back(InFlag);
03816 
03817   // Emit tail call.
03818   if (isTailCall) {
03819     assert(((Callee.getOpcode() == ISD::Register &&
03820              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
03821             Callee.getOpcode() == ISD::TargetExternalSymbol ||
03822             Callee.getOpcode() == ISD::TargetGlobalAddress ||
03823             isa<ConstantSDNode>(Callee)) &&
03824     "Expecting an global address, external symbol, absolute value or register");
03825 
03826     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
03827   }
03828 
03829   // Add a NOP immediately after the branch instruction when using the 64-bit
03830   // SVR4 ABI. At link time, if caller and callee are in a different module and
03831   // thus have a different TOC, the call will be replaced with a call to a stub
03832   // function which saves the current TOC, loads the TOC of the callee and
03833   // branches to the callee. The NOP will be replaced with a load instruction
03834   // which restores the TOC of the caller from the TOC save slot of the current
03835   // stack frame. If caller and callee belong to the same module (and have the
03836   // same TOC), the NOP will remain unchanged.
03837 
03838   bool needsTOCRestore = false;
03839   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
03840     if (CallOpc == PPCISD::BCTRL) {
03841       // This is a call through a function pointer.
03842       // Restore the caller TOC from the save area into R2.
03843       // See PrepareCall() for more information about calls through function
03844       // pointers in the 64-bit SVR4 ABI.
03845       // We are using a target-specific load with r2 hard coded, because the
03846       // result of a target-independent load would never go directly into r2,
03847       // since r2 is a reserved register (which prevents the register allocator
03848       // from allocating it), resulting in an additional register being
03849       // allocated and an unnecessary move instruction being generated.
03850       needsTOCRestore = true;
03851     } else if ((CallOpc == PPCISD::CALL) &&
03852                (!isLocalCall(Callee) ||
03853                 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03854       // Otherwise insert NOP for non-local calls.
03855       CallOpc = PPCISD::CALL_NOP;
03856     }
03857   }
03858 
03859   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
03860   InFlag = Chain.getValue(1);
03861 
03862   if (needsTOCRestore) {
03863     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
03864     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03865     SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
03866     unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
03867     SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
03868     SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
03869     Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
03870     InFlag = Chain.getValue(1);
03871   }
03872 
03873   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
03874                              DAG.getIntPtrConstant(BytesCalleePops, true),
03875                              InFlag, dl);
03876   if (!Ins.empty())
03877     InFlag = Chain.getValue(1);
03878 
03879   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03880                          Ins, dl, DAG, InVals);
03881 }
03882 
03883 SDValue
03884 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
03885                              SmallVectorImpl<SDValue> &InVals) const {
03886   SelectionDAG &DAG                     = CLI.DAG;
03887   SDLoc &dl                             = CLI.DL;
03888   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
03889   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
03890   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
03891   SDValue Chain                         = CLI.Chain;
03892   SDValue Callee                        = CLI.Callee;
03893   bool &isTailCall                      = CLI.IsTailCall;
03894   CallingConv::ID CallConv              = CLI.CallConv;
03895   bool isVarArg                         = CLI.IsVarArg;
03896 
03897   if (isTailCall)
03898     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
03899                                                    Ins, DAG);
03900 
03901   if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
03902     report_fatal_error("failed to perform tail call elimination on a call "
03903                        "site marked musttail");
03904 
03905   if (Subtarget.isSVR4ABI()) {
03906     if (Subtarget.isPPC64())
03907       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
03908                               isTailCall, Outs, OutVals, Ins,
03909                               dl, DAG, InVals);
03910     else
03911       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
03912                               isTailCall, Outs, OutVals, Ins,
03913                               dl, DAG, InVals);
03914   }
03915 
03916   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
03917                           isTailCall, Outs, OutVals, Ins,
03918                           dl, DAG, InVals);
03919 }
03920 
03921 SDValue
03922 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
03923                                     CallingConv::ID CallConv, bool isVarArg,
03924                                     bool isTailCall,
03925                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03926                                     const SmallVectorImpl<SDValue> &OutVals,
03927                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03928                                     SDLoc dl, SelectionDAG &DAG,
03929                                     SmallVectorImpl<SDValue> &InVals) const {
03930   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
03931   // of the 32-bit SVR4 ABI stack frame layout.
03932 
03933   assert((CallConv == CallingConv::C ||
03934           CallConv == CallingConv::Fast) && "Unknown calling convention!");
03935 
03936   unsigned PtrByteSize = 4;
03937 
03938   MachineFunction &MF = DAG.getMachineFunction();
03939 
03940   // Mark this function as potentially containing a function that contains a
03941   // tail call. As a consequence the frame pointer will be used for dynamicalloc
03942   // and restoring the callers stack pointer in this functions epilog. This is
03943   // done because by tail calling the called function might overwrite the value
03944   // in this function's (MF) stack pointer stack slot 0(SP).
03945   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
03946       CallConv == CallingConv::Fast)
03947     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
03948 
03949   // Count how many bytes are to be pushed on the stack, including the linkage
03950   // area, parameter list area and the part of the local variable space which
03951   // contains copies of aggregates which are passed by value.
03952 
03953   // Assign locations to all of the outgoing arguments.
03954   SmallVector<CCValAssign, 16> ArgLocs;
03955   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
03956                  getTargetMachine(), ArgLocs, *DAG.getContext());
03957 
03958   // Reserve space for the linkage area on the stack.
03959   CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
03960                        PtrByteSize);
03961 
03962   if (isVarArg) {
03963     // Handle fixed and variable vector arguments differently.
03964     // Fixed vector arguments go into registers as long as registers are
03965     // available. Variable vector arguments always go into memory.
03966     unsigned NumArgs = Outs.size();
03967 
03968     for (unsigned i = 0; i != NumArgs; ++i) {
03969       MVT ArgVT = Outs[i].VT;
03970       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
03971       bool Result;
03972 
03973       if (Outs[i].IsFixed) {
03974         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
03975                                CCInfo);
03976       } else {
03977         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
03978                                       ArgFlags, CCInfo);
03979       }
03980 
03981       if (Result) {
03982 #ifndef NDEBUG
03983         errs() << "Call operand #" << i << " has unhandled type "
03984              << EVT(ArgVT).getEVTString() << "\n";
03985 #endif
03986         llvm_unreachable(nullptr);
03987       }
03988     }
03989   } else {
03990     // All arguments are treated the same.
03991     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
03992   }
03993 
03994   // Assign locations to all of the outgoing aggregate by value arguments.
03995   SmallVector<CCValAssign, 16> ByValArgLocs;
03996   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
03997                       getTargetMachine(), ByValArgLocs, *DAG.getContext());
03998 
03999   // Reserve stack space for the allocations in CCInfo.
04000   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
04001 
04002   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
04003 
04004   // Size of the linkage area, parameter list area and the part of the local
04005   // space variable where copies of aggregates which are passed by value are
04006   // stored.
04007   unsigned NumBytes = CCByValInfo.getNextStackOffset();
04008 
04009   // Calculate by how many bytes the stack has to be adjusted in case of tail
04010   // call optimization.
04011   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04012 
04013   // Adjust the stack pointer for the new arguments...
04014   // These operations are automatically eliminated by the prolog/epilog pass
04015   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04016                                dl);
04017   SDValue CallSeqStart = Chain;
04018 
04019   // Load the return address and frame pointer so it can be moved somewhere else
04020   // later.
04021   SDValue LROp, FPOp;
04022   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
04023                                        dl);
04024 
04025   // Set up a copy of the stack pointer for use loading and storing any
04026   // arguments that may not fit in the registers available for argument
04027   // passing.
04028   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04029 
04030   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04031   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04032   SmallVector<SDValue, 8> MemOpChains;
04033 
04034   bool seenFloatArg = false;
04035   // Walk the register/memloc assignments, inserting copies/loads.
04036   for (unsigned i = 0, j = 0, e = ArgLocs.size();
04037        i != e;
04038        ++i) {
04039     CCValAssign &VA = ArgLocs[i];
04040     SDValue Arg = OutVals[i];
04041     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04042 
04043     if (Flags.isByVal()) {
04044       // Argument is an aggregate which is passed by value, thus we need to
04045       // create a copy of it in the local variable space of the current stack
04046       // frame (which is the stack frame of the caller) and pass the address of
04047       // this copy to the callee.
04048       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
04049       CCValAssign &ByValVA = ByValArgLocs[j++];
04050       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
04051 
04052       // Memory reserved in the local variable space of the callers stack frame.
04053       unsigned LocMemOffset = ByValVA.getLocMemOffset();
04054 
04055       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04056       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04057 
04058       // Create a copy of the argument in the local area of the current
04059       // stack frame.
04060       SDValue MemcpyCall =
04061         CreateCopyOfByValArgument(Arg, PtrOff,
04062                                   CallSeqStart.getNode()->getOperand(0),
04063                                   Flags, DAG, dl);
04064 
04065       // This must go outside the CALLSEQ_START..END.
04066       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04067                            CallSeqStart.getNode()->getOperand(1),
04068                            SDLoc(MemcpyCall));
04069       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04070                              NewCallSeqStart.getNode());
04071       Chain = CallSeqStart = NewCallSeqStart;
04072 
04073       // Pass the address of the aggregate copy on the stack either in a
04074       // physical register or in the parameter list area of the current stack
04075       // frame to the callee.
04076       Arg = PtrOff;
04077     }
04078 
04079     if (VA.isRegLoc()) {
04080       if (Arg.getValueType() == MVT::i1)
04081         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
04082 
04083       seenFloatArg |= VA.getLocVT().isFloatingPoint();
04084       // Put argument in a physical register.
04085       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
04086     } else {
04087       // Put argument in the parameter list area of the current stack frame.
04088       assert(VA.isMemLoc());
04089       unsigned LocMemOffset = VA.getLocMemOffset();
04090 
04091       if (!isTailCall) {
04092         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04093         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04094 
04095         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
04096                                            MachinePointerInfo(),
04097                                            false, false, 0));
04098       } else {
04099         // Calculate and remember argument location.
04100         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
04101                                  TailCallArguments);
04102       }
04103     }
04104   }
04105 
04106   if (!MemOpChains.empty())
04107     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
04108 
04109   // Build a sequence of copy-to-reg nodes chained together with token chain
04110   // and flag operands which copy the outgoing args into the appropriate regs.
04111   SDValue InFlag;
04112   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
04113     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
04114                              RegsToPass[i].second, InFlag);
04115     InFlag = Chain.getValue(1);
04116   }
04117 
04118   // Set CR bit 6 to true if this is a vararg call with floating args passed in
04119   // registers.
04120   if (isVarArg) {
04121     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
04122     SDValue Ops[] = { Chain, InFlag };
04123 
04124     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
04125                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
04126 
04127     InFlag = Chain.getValue(1);
04128   }
04129 
04130   if (isTailCall)
04131     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
04132                     false, TailCallArguments);
04133 
04134   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
04135                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
04136                     Ins, InVals);
04137 }
04138 
04139 // Copy an argument into memory, being careful to do this outside the
04140 // call sequence for the call to which the argument belongs.
04141 SDValue
04142 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
04143                                               SDValue CallSeqStart,
04144                                               ISD::ArgFlagsTy Flags,
04145                                               SelectionDAG &DAG,
04146                                               SDLoc dl) const {
04147   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
04148                         CallSeqStart.getNode()->getOperand(0),
04149                         Flags, DAG, dl);
04150   // The MEMCPY must go outside the CALLSEQ_START..END.
04151   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04152                              CallSeqStart.getNode()->getOperand(1),
04153                              SDLoc(MemcpyCall));
04154   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04155                          NewCallSeqStart.getNode());
04156   return NewCallSeqStart;
04157 }
04158 
04159 SDValue
04160 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
04161                                     CallingConv::ID CallConv, bool isVarArg,
04162                                     bool isTailCall,
04163                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04164                                     const SmallVectorImpl<SDValue> &OutVals,
04165                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04166                                     SDLoc dl, SelectionDAG &DAG,
04167                                     SmallVectorImpl<SDValue> &InVals) const {
04168 
04169   bool isELFv2ABI = Subtarget.isELFv2ABI();
04170   bool isLittleEndian = Subtarget.isLittleEndian();
04171   unsigned NumOps = Outs.size();
04172 
04173   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04174   unsigned PtrByteSize = 8;
04175 
04176   MachineFunction &MF = DAG.getMachineFunction();
04177 
04178   // Mark this function as potentially containing a function that contains a
04179   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04180   // and restoring the callers stack pointer in this functions epilog. This is
04181   // done because by tail calling the called function might overwrite the value
04182   // in this function's (MF) stack pointer stack slot 0(SP).
04183   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04184       CallConv == CallingConv::Fast)
04185     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04186 
04187   // Count how many bytes are to be pushed on the stack, including the linkage
04188   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
04189   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
04190   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
04191   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
04192                                                           isELFv2ABI);
04193   unsigned NumBytes = LinkageSize;
04194 
04195   // Add up all the space actually used.
04196   for (unsigned i = 0; i != NumOps; ++i) {
04197     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04198     EVT ArgVT = Outs[i].VT;
04199     EVT OrigVT = Outs[i].ArgVT;
04200 
04201     /* Respect alignment of argument on the stack.  */
04202     unsigned Align =
04203       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04204     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
04205 
04206     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
04207     if (Flags.isInConsecutiveRegsLast())
04208       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04209   }
04210 
04211   unsigned NumBytesActuallyUsed = NumBytes;
04212 
04213   // The prolog code of the callee may store up to 8 GPR argument registers to
04214   // the stack, allowing va_start to index over them in memory if its varargs.
04215   // Because we cannot tell if this is needed on the caller side, we have to
04216   // conservatively assume that it is needed.  As such, make sure we have at
04217   // least enough stack space for the caller to store the 8 GPRs.
04218   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
04219   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
04220 
04221   // Tail call needs the stack to be aligned.
04222   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04223       CallConv == CallingConv::Fast)
04224     NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
04225 
04226   // Calculate by how many bytes the stack has to be adjusted in case of tail
04227   // call optimization.
04228   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04229 
04230   // To protect arguments on the stack from being clobbered in a tail call,
04231   // force all the loads to happen before doing any other lowering.
04232   if (isTailCall)
04233     Chain = DAG.getStackArgumentTokenFactor(Chain);
04234 
04235   // Adjust the stack pointer for the new arguments...
04236   // These operations are automatically eliminated by the prolog/epilog pass
04237   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04238                                dl);
04239   SDValue CallSeqStart = Chain;
04240 
04241   // Load the return address and frame pointer so it can be move somewhere else
04242   // later.
04243   SDValue LROp, FPOp;
04244   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
04245                                        dl);
04246 
04247   // Set up a copy of the stack pointer for use loading and storing any
04248   // arguments that may not fit in the registers available for argument
04249   // passing.
04250   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
04251 
04252   // Figure out which arguments are going to go in registers, and which in
04253   // memory.  Also, if this is a vararg function, floating point operations
04254   // must be stored to our stack, and loaded into integer regs as well, if
04255   // any integer regs are available for argument passing.
04256   unsigned ArgOffset = LinkageSize;
04257   unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
04258 
04259   static const MCPhysReg GPR[] = {
04260     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
04261     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
04262   };
04263   static const MCPhysReg *FPR = GetFPR();
04264 
04265   static const MCPhysReg VR[] = {
04266     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
04267     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
04268   };
04269   static const MCPhysReg VSRH[] = {
04270     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
04271     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
04272   };
04273 
04274   const unsigned NumGPRs = array_lengthof(GPR);
04275   const unsigned NumFPRs = 13;
04276   const unsigned NumVRs  = array_lengthof(VR);
04277 
04278   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04279   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04280 
04281   SmallVector<SDValue, 8> MemOpChains;
04282   for (unsigned i = 0; i != NumOps; ++i) {
04283     SDValue Arg = OutVals[i];
04284     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04285     EVT ArgVT = Outs[i].VT;
04286     EVT OrigVT = Outs[i].ArgVT;
04287 
04288     /* Respect alignment of argument on the stack.  */
04289     unsigned Align =
04290       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04291     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
04292 
04293     /* Compute GPR index associated with argument offset.  */
04294     GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
04295     GPR_idx = std::min(GPR_idx, NumGPRs);
04296 
04297     // PtrOff will be used to store the current argument to the stack if a
04298     // register cannot be found for it.
04299     SDValue PtrOff;
04300 
04301     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
04302 
04303     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
04304 
04305     // Promote integers to 64-bit values.
04306     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
04307       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
04308       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
04309       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
04310     }
04311 
04312     // FIXME memcpy is used way more than necessary.  Correctness first.
04313     // Note: "by value" is code for passing a structure by value, not
04314     // basic types.
04315     if (Flags.isByVal()) {
04316       // Note: Size includes alignment padding, so
04317       //   struct x { short a; char b; }
04318       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
04319       // These are the proper values we need for right-justifying the
04320       // aggregate in a parameter register.
04321       unsigned Size = Flags.getByValSize();
04322 
04323       // An empty aggregate parameter takes up no storage and no
04324       // registers.
04325       if (Size == 0)
04326         continue;
04327 
04328       // All aggregates smaller than 8 bytes must be passed right-justified.
04329       if (Size==1 || Size==2 || Size==4) {
04330         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
04331         if (GPR_idx != NumGPRs) {
04332           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
04333                                         MachinePointerInfo(), VT,
04334                                         false, false, 0);
04335           MemOpChains.push_back(Load.getValue(1));
04336           RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
04337 
04338           ArgOffset += PtrByteSize;
04339           continue;
04340         }
04341       }
04342 
04343       if (GPR_idx == NumGPRs && Size < 8) {
04344         SDValue AddPtr = PtrOff;
04345         if (!isLittleEndian) {
04346           SDValue Const = DAG.getConstant(PtrByteSize - Size,
04347                                           PtrOff.getValueType());
04348           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04349         }
04350         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04351                                                           CallSeqStart,
04352                                                           Flags, DAG, dl);
04353         ArgOffset += PtrByteSize;
04354         continue;
04355       }
04356       // Copy entire object into memory.  There are cases where gcc-generated
04357       // code assumes it is there, even if it could be put entirely into
04358       // registers.  (This is not what the doc says.)
04359 
04360       // FIXME: The above statement is likely due to a misunderstanding of the
04361       // documents.  All arguments must be copied into the parameter area BY
04362       // THE CALLEE in the event that the callee takes the address of any
04363       // formal argument.  That has not yet been implemented.  However, it is
04364       // reasonable to use the stack area as a staging area for the register
04365       // load.
04366 
04367       // Skip this for small aggregates, as we will use the same slot for a
04368       // right-justified copy, below.
04369       if (Size >= 8)
04370         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
04371                                                           CallSeqStart,
04372                                                           Flags, DAG, dl);
04373 
04374       // When a register is available, pass a small aggregate right-justified.
04375       if (Size < 8 && GPR_idx != NumGPRs) {
04376         // The easiest way to get this right-justified in a register
04377         // is to copy the structure into the rightmost portion of a
04378         // local variable slot, then load the whole slot into the
04379         // register.
04380         // FIXME: The memcpy seems to produce pretty awful code for
04381         // small aggregates, particularly for packed ones.
04382         // FIXME: It would be preferable to use the slot in the
04383         // parameter save area instead of a new local variable.
04384         SDValue AddPtr = PtrOff;
04385         if (!isLittleEndian) {
04386           SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
04387           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04388         }
04389         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04390                                                           CallSeqStart,
04391                                                           Flags, DAG, dl);
04392 
04393         // Load the slot into the register.
04394         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
04395                                    MachinePointerInfo(),
04396                                    false, false, false, 0);
04397         MemOpChains.push_back(Load.getValue(1));
04398         RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
04399 
04400         // Done with this argument.
04401         ArgOffset += PtrByteSize;
04402         continue;
04403       }
04404 
04405       // For aggregates larger than PtrByteSize, copy the pieces of the
04406       // object that fit into registers from the parameter save area.
04407       for (unsigned j=0; j<Size; j+=PtrByteSize) {
04408         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
04409         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
04410         if (GPR_idx != NumGPRs) {
04411           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
04412                                      MachinePointerInfo(),
04413                                      false, false, false, 0);
04414           MemOpChains.push_back(Load.getValue(1));
04415           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04416           ArgOffset += PtrByteSize;
04417         } else {
04418           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
04419           break;
04420         }
04421       }
04422       continue;
04423     }
04424 
04425     switch (Arg.getSimpleValueType().SimpleTy) {
04426     default: llvm_unreachable("Unexpected ValueType for argument!");
04427     case MVT::i1:
04428     case MVT::i32:
04429     case MVT::i64:
04430       // These can be scalar arguments or elements of an integer array type
04431       // passed directly.  Clang may use those instead of "byval" aggregate
04432       // types to avoid forcing arguments to memory unnecessarily.
04433       if (GPR_idx != NumGPRs) {
04434         RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
04435       } else {
04436         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04437                          true, isTailCall, false, MemOpChains,
04438                          TailCallArguments, dl);
04439       }
04440       ArgOffset += PtrByteSize;
04441       break;
04442     case MVT::f32:
04443     case MVT::f64: {
04444       // These can be scalar arguments or elements of a float array type
04445       // passed directly.  The latter are used to implement ELFv2 homogenous
04446       // float aggregates.
04447 
04448       // Named arguments go into FPRs first, and once they overflow, the
04449       // remaining arguments go into GPRs and then the parameter save area.
04450       // Unnamed arguments for vararg functions always go to GPRs and
04451       // then the parameter save area.  For now, put all arguments to vararg
04452       // routines always in both locations (FPR *and* GPR or stack slot).
04453       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
04454 
04455       // First load the argument into the next available FPR.
04456       if (FPR_idx != NumFPRs)
04457         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
04458 
04459       // Next, load the argument into GPR or stack slot if needed.
04460       if (!NeedGPROrStack)
04461         ;
04462       else if (GPR_idx != NumGPRs) {
04463         // In the non-vararg case, this can only ever happen in the
04464         // presence of f32 array types, since otherwise we never run
04465         // out of FPRs before running out of GPRs.
04466         SDValue ArgVal;
04467 
04468         // Double values are always passed in a single GPR.
04469         if (Arg.getValueType() != MVT::f32) {
04470           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
04471 
04472         // Non-array float values are extended and passed in a GPR.
04473         } else if (!Flags.isInConsecutiveRegs()) {
04474           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04475           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04476 
04477         // If we have an array of floats, we collect every odd element
04478         // together with its predecessor into one GPR.
04479         } else if (ArgOffset % PtrByteSize != 0) {
04480           SDValue Lo, Hi;
04481           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
04482           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04483           if (!isLittleEndian)
04484             std::swap(Lo, Hi);
04485           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04486 
04487         // The final element, if even, goes into the first half of a GPR.
04488         } else if (Flags.isInConsecutiveRegsLast()) {
04489           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04490           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04491           if (!isLittleEndian)
04492             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
04493                                  DAG.getConstant(32, MVT::i32));
04494 
04495         // Non-final even elements are skipped; they will be handled
04496         // together the with subsequent argument on the next go-around.
04497         } else
04498           ArgVal = SDValue();
04499 
04500         if (ArgVal.getNode())
04501           RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
04502       } else {
04503         // Single-precision floating-point values are mapped to the
04504         // second (rightmost) word of the stack doubleword.
04505         if (Arg.getValueType() == MVT::f32 &&
04506             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
04507           SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
04508           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
04509         }
04510 
04511         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04512                          true, isTailCall, false, MemOpChains,
04513                          TailCallArguments, dl);
04514       }
04515       // When passing an array of floats, the array occupies consecutive
04516       // space in the argument area; only round up to the next doubleword
04517       // at the end of the array.  Otherwise, each float takes 8 bytes.
04518       ArgOffset += (Arg.getValueType() == MVT::f32 &&
04519                     Flags.isInConsecutiveRegs()) ? 4 : 8;
04520       if (Flags.isInConsecutiveRegsLast())
04521         ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04522       break;
04523     }
04524     case MVT::v4f32:
04525     case MVT::v4i32:
04526     case MVT::v8i16:
04527     case MVT::v16i8:
04528     case MVT::v2f64:
04529     case MVT::v2i64:
04530       // These can be scalar arguments or elements of a vector array type
04531       // passed directly.  The latter are used to implement ELFv2 homogenous
04532       // vector aggregates.
04533 
04534       // For a varargs call, named arguments go into VRs or on the stack as
04535       // usual; unnamed arguments always go to the stack or the corresponding
04536       // GPRs when within range.  For now, we always put the value in both
04537       // locations (or even all three).
04538       if (isVarArg) {
04539         // We could elide this store in the case where the object fits
04540         // entirely in R registers.  Maybe later.
04541         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
04542                                      MachinePointerInfo(), false, false, 0);
04543         MemOpChains.push_back(Store);
04544         if (VR_idx != NumVRs) {
04545           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
04546                                      MachinePointerInfo(),
04547                                      false, false, false, 0);
04548           MemOpChains.push_back(Load.getValue(1));
04549 
04550           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
04551                            Arg.getSimpleValueType() == MVT::v2i64) ?
04552                           VSRH[VR_idx] : VR[VR_idx];
04553           ++VR_idx;
04554 
04555           RegsToPass.push_back(std::make_pair(VReg, Load));
04556         }
04557         ArgOffset += 16;
04558         for (unsigned i=0; i<16; i+=PtrByteSize) {
04559           if (GPR_idx == NumGPRs)
04560             break;
04561           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
04562                                   DAG.getConstant(i, PtrVT));
04563           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
04564                                      false, false, false, 0);
04565           MemOpChains.push_back(Load.getValue(1));
04566           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04567         }
04568         break;
04569       }
04570 
04571       // Non-varargs Altivec params go into VRs or on the stack.
04572       if (VR_idx != NumVRs) {
04573         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
04574                          Arg.getSimpleValueType() == MVT::v2i64) ?
04575                         VSRH[VR_idx] : VR[VR_idx];
04576         ++VR_idx;
04577 
04578         RegsToPass.push_back(std::make_pair(VReg, Arg));
04579       } else {
04580         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04581                          true, isTailCall, true, MemOpChains,
04582                          TailCallArguments, dl);
04583       }
04584       ArgOffset += 16;
04585       break;
04586     }
04587   }
04588 
04589   assert(NumBytesActuallyUsed == ArgOffset);
04590   (void)NumBytesActuallyUsed;
04591 
04592   if (!MemOpChains.empty())
04593     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
04594 
04595   // Check if this is an indirect call (MTCTR/BCTRL).
04596   // See PrepareCall() for more information about calls through function
04597   // pointers in the 64-bit SVR4 ABI.
04598   if (!isTailCall &&
04599       !dyn_cast<GlobalAddressSDNode>(Callee) &&
04600       !dyn_cast<ExternalSymbolSDNode>(Callee)) {
04601     // Load r2 into a virtual register and store it to the TOC save area.
04602     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
04603     // TOC save area offset.
04604     unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
04605     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
04606     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
04607     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
04608                          false, false, 0);
04609     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
04610     // This does not mean the MTCTR instruction must use R12; it's easier
04611     // to model this as an extra parameter, so do that.
04612     if (isELFv2ABI)
04613       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
04614   }
04615 
04616   // Build a sequence of copy-to-reg nodes chained together with token chain
04617   // and flag operands which copy the outgoing args into the appropriate regs.
04618   SDValue InFlag;
04619   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
04620     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
04621                              RegsToPass[i].second, InFlag);
04622     InFlag = Chain.getValue(1);
04623   }
04624 
04625   if (isTailCall)
04626     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
04627                     FPOp, true, TailCallArguments);
04628 
04629   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
04630                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
04631                     Ins, InVals);
04632 }
04633 
04634 SDValue
04635 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
04636                                     CallingConv::ID CallConv, bool isVarArg,
04637                                     bool isTailCall,
04638                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04639                                     const SmallVectorImpl<SDValue> &OutVals,
04640                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04641                                     SDLoc dl, SelectionDAG &DAG,
04642                                     SmallVectorImpl<SDValue> &InVals) const {
04643 
04644   unsigned NumOps = Outs.size();
04645 
04646   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04647   bool isPPC64 = PtrVT == MVT::i64;
04648   unsigned PtrByteSize = isPPC64 ? 8 : 4;
04649 
04650   MachineFunction &MF = DAG.getMachineFunction();
04651 
04652   // Mark this function as potentially containing a function that contains a
04653   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04654   // and restoring the callers stack pointer in this functions epilog. This is
04655   // done because by tail calling the called function might overwrite the value
04656   // in this function's (MF) stack pointer stack slot 0(SP).
04657   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04658       CallConv == CallingConv::Fast)
04659     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04660 
04661   // Count how many bytes are to be pushed on the stack, including the linkage
04662   // area, and parameter passing area.  We start with 24/48 bytes, which is
04663   // prereserved space for [SP][CR][LR][3 x unused].
04664   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
04665                                                           false);
04666   unsigned NumBytes = LinkageSize;
04667 
04668   // Add up all the space actually used.
04669   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
04670   // they all go in registers, but we must reserve stack space for them for
04671   // possible use by the caller.  In varargs or 64-bit calls, parameters are
04672   // assigned stack space in order, with padding so Altivec parameters are
04673   // 16-byte aligned.
04674   unsigned nAltivecParamsAtEnd = 0;
04675   for (unsigned i = 0; i != NumOps; ++i) {
04676     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04677     EVT ArgVT = Outs[i].VT;
04678     // Varargs Altivec parameters are padded to a 16 byte boundary.
04679     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
04680         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
04681         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
04682       if (!isVarArg && !isPPC64) {
04683         // Non-varargs Altivec parameters go after all the non-Altivec
04684         // parameters; handle those later so we know how much padding we need.
04685         nAltivecParamsAtEnd++;
04686         continue;
04687       }
04688       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
04689       NumBytes = ((NumBytes+15)/16)*16;
04690     }
04691     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
04692   }
04693 
04694   // Allow for Altivec parameters at the end, if needed.
04695   if (nAltivecParamsAtEnd) {
04696     NumBytes = ((NumBytes+15)/16)*16;
04697     NumBytes += 16*nAltivecParamsAtEnd;
04698   }
04699 
04700   // The prolog code of the callee may store up to 8 GPR argument registers to
04701   // the stack, allowing va_start to index over them in memory if its varargs.
04702   // Because we cannot tell if this is needed on the caller side, we have to
04703   // conservatively assume that it is needed.  As such, make sure we have at
04704   // least enough stack space for the caller to store the 8 GPRs.
04705   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
04706 
04707   // Tail call needs the stack to be aligned.
04708   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04709       CallConv == CallingConv::Fast)
04710     NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
04711 
04712   // Calculate by how many bytes the stack has to be adjusted in case of tail
04713   // call optimization.
04714   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04715 
04716   // To protect arguments on the stack from being clobbered in a tail call,
04717   // force all the loads to happen before doing any other lowering.
04718   if (isTailCall)
04719     Chain = DAG.getStackArgumentTokenFactor(Chain);
04720 
04721   // Adjust the stack pointer for the new arguments...
04722   // These operations are automatically eliminated by the prolog/epilog pass
04723   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04724                                dl);
04725   SDValue CallSeqStart = Chain;
04726 
04727   // Load the return address and frame pointer so it can be move somewhere else
04728   // later.
04729   SDValue LROp, FPOp;
04730   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
04731                                        dl);
04732 
04733   // Set up a copy of the stack pointer for use loading and storing any
04734   // arguments that may not fit in the registers available for argument
04735   // passing.
04736   SDValue StackPtr;
04737   if (isPPC64)
04738     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
04739   else
04740     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04741 
04742   // Figure out which arguments are going to go in registers, and which in
04743   // memory.  Also, if this is a vararg function, floating point operations
04744   // must be stored to our stack, and loaded into integer regs as well, if
04745   // any integer regs are available for argument passing.
04746   unsigned ArgOffset = LinkageSize;
04747   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
04748 
04749   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
04750     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
04751     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
04752   };
04753   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
04754     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
04755     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
04756   };
04757   static const MCPhysReg *FPR = GetFPR();
04758 
04759   static const MCPhysReg VR[] = {
04760     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
04761     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
04762   };
04763   const unsigned NumGPRs = array_lengthof(GPR_32);
04764   const unsigned NumFPRs = 13;
04765   const unsigned NumVRs  = array_lengthof(VR);
04766 
04767   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
04768 
04769   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04770   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04771 
04772   SmallVector<SDValue, 8> MemOpChains;
04773   for (unsigned i = 0; i != NumOps; ++i) {
04774     SDValue Arg = OutVals[i];
04775     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04776 
04777     // PtrOff will be used to store the current argument to the stack if a
04778     // register cannot be found for it.
04779     SDValue PtrOff;
04780 
04781     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
04782 
04783     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
04784 
04785     // On PPC64, promote integers to 64-bit values.
04786     if (isPPC64 && Arg.getValueType() == MVT::i32) {
04787       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
04788       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
04789       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
04790     }
04791 
04792     // FIXME memcpy is used way more than necessary.  Correctness first.
04793     // Note: "by value" is code for passing a structure by value, not
04794     // basic types.
04795     if (Flags.isByVal()) {
04796       unsigned Size = Flags.getByValSize();
04797       // Very small objects are passed right-justified.  Everything else is
04798       // passed left-justified.
04799       if (Size==1 || Size==2) {
04800         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
04801         if (GPR_idx != NumGPRs) {
04802           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
04803                                         MachinePointerInfo(), VT,
04804                                         false, false, 0);
04805           MemOpChains.push_back(Load.getValue(1));
04806           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04807 
04808           ArgOffset += PtrByteSize;
04809         } else {
04810           SDValue Const = DAG.getConstant(PtrByteSize - Size,
04811                                           PtrOff.getValueType());
04812           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04813           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04814                                                             CallSeqStart,
04815                                                             Flags, DAG, dl);
04816           ArgOffset += PtrByteSize;
04817         }
04818         continue;
04819       }
04820       // Copy entire object into memory.  There are cases where gcc-generated
04821       // code assumes it is there, even if it could be put entirely into
04822       // registers.  (This is not what the doc says.)
04823       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
04824                                                         CallSeqStart,
04825                                                         Flags, DAG, dl);
04826 
04827       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
04828       // copy the pieces of the object that fit into registers from the
04829       // parameter save area.
04830       for (unsigned j=0; j<Size; j+=PtrByteSize) {
04831         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
04832         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
04833         if (GPR_idx != NumGPRs) {
04834           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
04835                                      MachinePointerInfo(),
04836                                      false, false, false, 0);
04837           MemOpChains.push_back(Load.getValue(1));
04838           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04839           ArgOffset += PtrByteSize;
04840         } else {
04841           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
04842           break;
04843         }
04844       }
04845       continue;
04846     }
04847 
04848     switch (Arg.getSimpleValueType().SimpleTy) {
04849     default: llvm_unreachable("Unexpected ValueType for argument!");
04850     case MVT::i1:
04851     case MVT::i32:
04852     case MVT::i64:
04853       if (GPR_idx != NumGPRs) {
04854         if (Arg.getValueType() == MVT::i1)
04855           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
04856 
04857         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
04858       } else {
04859         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04860                          isPPC64, isTailCall, false, MemOpChains,
04861                          TailCallArguments, dl);
04862       }
04863       ArgOffset += PtrByteSize;
04864       break;
04865     case MVT::f32:
04866     case MVT::f64:
04867       if (FPR_idx != NumFPRs) {
04868         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
04869 
04870         if (isVarArg) {
04871           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
04872                                        MachinePointerInfo(), false, false, 0);
04873           MemOpChains.push_back(Store);
04874 
04875           // Float varargs are always shadowed in available integer registers
04876           if (GPR_idx != NumGPRs) {
04877             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
04878                                        MachinePointerInfo(), false, false,
04879                                        false, 0);
04880             MemOpChains.push_back(Load.getValue(1));
04881             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04882           }
04883           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
04884             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
04885             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
04886             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
04887                                        MachinePointerInfo(),
04888                                        false, false, false, 0);
04889             MemOpChains.push_back(Load.getValue(1));
04890             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04891           }
04892         } else {
04893           // If we have any FPRs remaining, we may also have GPRs remaining.
04894           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
04895           // GPRs.
04896           if (GPR_idx != NumGPRs)
04897             ++GPR_idx;
04898           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
04899               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
04900             ++GPR_idx;
04901         }
04902       } else
04903         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04904                          isPPC64, isTailCall, false, MemOpChains,
04905                          TailCallArguments, dl);
04906       if (isPPC64)
04907         ArgOffset += 8;
04908       else
04909         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
04910       break;
04911     case MVT::v4f32:
04912     case MVT::v4i32:
04913     case MVT::v8i16:
04914     case MVT::v16i8:
04915       if (isVarArg) {
04916         // These go aligned on the stack, or in the corresponding R registers
04917         // when within range.  The Darwin PPC ABI doc claims they also go in
04918         // V registers; in fact gcc does this only for arguments that are
04919         // prototyped, not for those that match the ...  We do it for all
04920         // arguments, seems to work.
04921         while (ArgOffset % 16 !=0) {
04922           ArgOffset += PtrByteSize;
04923           if (GPR_idx != NumGPRs)
04924             GPR_idx++;
04925         }
04926         // We could elide this store in the case where the object fits
04927         // entirely in R registers.  Maybe later.
04928         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
04929                             DAG.getConstant(ArgOffset, PtrVT));
04930         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
04931                                      MachinePointerInfo(), false, false, 0);
04932         MemOpChains.push_back(Store);
04933         if (VR_idx != NumVRs) {
04934           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
04935                                      MachinePointerInfo(),
04936                                      false, false, false, 0);
04937           MemOpChains.push_back(Load.getValue(1));
04938           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
04939         }
04940         ArgOffset += 16;
04941         for (unsigned i=0; i<16; i+=PtrByteSize) {
04942           if (GPR_idx == NumGPRs)
04943             break;
04944           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
04945                                   DAG.getConstant(i, PtrVT));
04946           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
04947                                      false, false, false, 0);
04948           MemOpChains.push_back(Load.getValue(1));
04949           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04950         }
04951         break;
04952       }
04953 
04954       // Non-varargs Altivec params generally go in registers, but have
04955       // stack space allocated at the end.
04956       if (VR_idx != NumVRs) {
04957         // Doesn't have GPR space allocated.
04958         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
04959       } else if (nAltivecParamsAtEnd==0) {
04960         // We are emitting Altivec params in order.
04961         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04962                          isPPC64, isTailCall, true, MemOpChains,
04963                          TailCallArguments, dl);
04964         ArgOffset += 16;
04965       }
04966       break;
04967     }
04968   }
04969   // If all Altivec parameters fit in registers, as they usually do,
04970   // they get stack space following the non-Altivec parameters.  We
04971   // don't track this here because nobody below needs it.
04972   // If there are more Altivec parameters than fit in registers emit
04973   // the stores here.
04974   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
04975     unsigned j = 0;
04976     // Offset is aligned; skip 1st 12 params which go in V registers.
04977     ArgOffset = ((ArgOffset+15)/16)*16;
04978     ArgOffset += 12*16;
04979     for (unsigned i = 0; i != NumOps; ++i) {
04980       SDValue Arg = OutVals[i];
04981       EVT ArgType = Outs[i].VT;
04982       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
04983           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
04984         if (++j > NumVRs) {
04985           SDValue PtrOff;
04986           // We are emitting Altivec params in order.
04987           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04988                            isPPC64, isTailCall, true, MemOpChains,
04989                            TailCallArguments, dl);
04990           ArgOffset += 16;
04991         }
04992       }
04993     }
04994   }
04995 
04996   if (!MemOpChains.empty())
04997     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
04998 
04999   // On Darwin, R12 must contain the address of an indirect callee.  This does
05000   // not mean the MTCTR instruction must use R12; it's easier to model this as
05001   // an extra parameter, so do that.
05002   if (!isTailCall &&
05003       !dyn_cast<GlobalAddressSDNode>(Callee) &&
05004       !dyn_cast<ExternalSymbolSDNode>(Callee) &&
05005       !isBLACompatibleAddress(Callee, DAG))
05006     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
05007                                                    PPC::R12), Callee));
05008 
05009   // Build a sequence of copy-to-reg nodes chained together with token chain
05010   // and flag operands which copy the outgoing args into the appropriate regs.
05011   SDValue InFlag;
05012   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
05013     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
05014                              RegsToPass[i].second, InFlag);
05015     InFlag = Chain.getValue(1);
05016   }
05017 
05018   if (isTailCall)
05019     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
05020                     FPOp, true, TailCallArguments);
05021 
05022   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
05023                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
05024                     Ins, InVals);
05025 }
05026 
05027 bool
05028 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
05029                                   MachineFunction &MF, bool isVarArg,
05030                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
05031                                   LLVMContext &Context) const {
05032   SmallVector<CCValAssign, 16> RVLocs;
05033   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
05034                  RVLocs, Context);
05035   return CCInfo.CheckReturn(Outs, RetCC_PPC);
05036 }
05037 
05038 SDValue
05039 PPCTargetLowering::LowerReturn(SDValue Chain,
05040                                CallingConv::ID CallConv, bool isVarArg,
05041                                const SmallVectorImpl<ISD::OutputArg> &Outs,
05042                                const SmallVectorImpl<SDValue> &OutVals,
05043                                SDLoc dl, SelectionDAG &DAG) const {
05044 
05045   SmallVector<CCValAssign, 16> RVLocs;
05046   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
05047                  getTargetMachine(), RVLocs, *DAG.getContext());
05048   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
05049 
05050   SDValue Flag;
05051   SmallVector<SDValue, 4> RetOps(1, Chain);
05052 
05053   // Copy the result values into the output registers.
05054   for (unsigned i = 0; i != RVLocs.size(); ++i) {
05055     CCValAssign &VA = RVLocs[i];
05056     assert(VA.isRegLoc() && "Can only return in registers!");
05057 
05058     SDValue Arg = OutVals[i];
05059 
05060     switch (VA.getLocInfo()) {
05061     default: llvm_unreachable("Unknown loc info!");
05062     case CCValAssign::Full: break;
05063     case CCValAssign::AExt:
05064       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
05065       break;
05066     case CCValAssign::ZExt:
05067       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
05068       break;
05069     case CCValAssign::SExt:
05070       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
05071       break;
05072     }
05073 
05074     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
05075     Flag = Chain.getValue(1);
05076     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
05077   }
05078 
05079   RetOps[0] = Chain;  // Update chain.
05080 
05081   // Add the flag if we have it.
05082   if (Flag.getNode())
05083     RetOps.push_back(Flag);
05084 
05085   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
05086 }
05087 
05088 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
05089                                    const PPCSubtarget &Subtarget) const {
05090   // When we pop the dynamic allocation we need to restore the SP link.
05091   SDLoc dl(Op);
05092 
05093   // Get the corect type for pointers.
05094   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05095 
05096   // Construct the stack pointer operand.
05097   bool isPPC64 = Subtarget.isPPC64();
05098   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
05099   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
05100 
05101   // Get the operands for the STACKRESTORE.
05102   SDValue Chain = Op.getOperand(0);
05103   SDValue SaveSP = Op.getOperand(1);
05104 
05105   // Load the old link SP.
05106   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
05107                                    MachinePointerInfo(),
05108                                    false, false, false, 0);
05109 
05110   // Restore the stack pointer.
05111   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
05112 
05113   // Store the old link SP.
05114   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
05115                       false, false, 0);
05116 }
05117 
05118 
05119 
05120 SDValue
05121 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
05122   MachineFunction &MF = DAG.getMachineFunction();
05123   bool isPPC64 = Subtarget.isPPC64();
05124   bool isDarwinABI = Subtarget.isDarwinABI();
05125   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05126 
05127   // Get current frame pointer save index.  The users of this index will be
05128   // primarily DYNALLOC instructions.
05129   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05130   int RASI = FI->getReturnAddrSaveIndex();
05131 
05132   // If the frame pointer save index hasn't been defined yet.
05133   if (!RASI) {
05134     // Find out what the fix offset of the frame pointer save area.
05135     int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
05136     // Allocate the frame index for frame pointer save area.
05137     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
05138     // Save the result.
05139     FI->setReturnAddrSaveIndex(RASI);
05140   }
05141   return DAG.getFrameIndex(RASI, PtrVT);
05142 }
05143 
05144 SDValue
05145 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
05146   MachineFunction &MF = DAG.getMachineFunction();
05147   bool isPPC64 = Subtarget.isPPC64();
05148   bool isDarwinABI = Subtarget.isDarwinABI();
05149   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05150 
05151   // Get current frame pointer save index.  The users of this index will be
05152   // primarily DYNALLOC instructions.
05153   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05154   int FPSI = FI->getFramePointerSaveIndex();
05155 
05156   // If the frame pointer save index hasn't been defined yet.
05157   if (!FPSI) {
05158     // Find out what the fix offset of the frame pointer save area.
05159     int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
05160                                                            isDarwinABI);
05161 
05162     // Allocate the frame index for frame pointer save area.
05163     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
05164     // Save the result.
05165     FI->setFramePointerSaveIndex(FPSI);
05166   }
05167   return DAG.getFrameIndex(FPSI, PtrVT);
05168 }
05169 
05170 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
05171                                          SelectionDAG &DAG,
05172                                          const PPCSubtarget &Subtarget) const {
05173   // Get the inputs.
05174   SDValue Chain = Op.getOperand(0);
05175   SDValue Size  = Op.getOperand(1);
05176   SDLoc dl(Op);
05177 
05178   // Get the corect type for pointers.
05179   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05180   // Negate the size.
05181   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
05182                                   DAG.getConstant(0, PtrVT), Size);
05183   // Construct a node for the frame pointer save index.
05184   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
05185   // Build a DYNALLOC node.
05186   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
05187   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
05188   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
05189 }
05190 
05191 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
05192                                                SelectionDAG &DAG) const {
05193   SDLoc DL(Op);
05194   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
05195                      DAG.getVTList(MVT::i32, MVT::Other),
05196                      Op.getOperand(0), Op.getOperand(1));
05197 }
05198 
05199 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
05200                                                 SelectionDAG &DAG) const {
05201   SDLoc DL(Op);
05202   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
05203                      Op.getOperand(0), Op.getOperand(1));
05204 }
05205 
05206 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
05207   assert(Op.getValueType() == MVT::i1 &&
05208          "Custom lowering only for i1 loads");
05209 
05210   // First, load 8 bits into 32 bits, then truncate to 1 bit.
05211 
05212   SDLoc dl(Op);
05213   LoadSDNode *LD = cast<LoadSDNode>(Op);
05214 
05215   SDValue Chain = LD->getChain();
05216   SDValue BasePtr = LD->getBasePtr();
05217   MachineMemOperand *MMO = LD->getMemOperand();
05218 
05219   SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
05220                                  BasePtr, MVT::i8, MMO);
05221   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
05222 
05223   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
05224   return DAG.getMergeValues(Ops, dl);
05225 }
05226 
05227 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
05228   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
05229          "Custom lowering only for i1 stores");
05230 
05231   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
05232 
05233   SDLoc dl(Op);
05234   StoreSDNode *ST = cast<StoreSDNode>(Op);
05235 
05236   SDValue Chain = ST->getChain();
05237   SDValue BasePtr = ST->getBasePtr();
05238   SDValue Value = ST->getValue();
05239   MachineMemOperand *MMO = ST->getMemOperand();
05240 
05241   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
05242   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
05243 }
05244 
05245 // FIXME: Remove this once the ANDI glue bug is fixed:
05246 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
05247   assert(Op.getValueType() == MVT::i1 &&
05248          "Custom lowering only for i1 results");
05249 
05250   SDLoc DL(Op);
05251   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
05252                      Op.getOperand(0));
05253 }
05254 
05255 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
05256 /// possible.
05257 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
05258   // Not FP? Not a fsel.
05259   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
05260       !Op.getOperand(2).getValueType().isFloatingPoint())
05261     return Op;
05262 
05263   // We might be able to do better than this under some circumstances, but in
05264   // general, fsel-based lowering of select is a finite-math-only optimization.
05265   // For more information, see section F.3 of the 2.06 ISA specification.
05266   if (!DAG.getTarget().Options.NoInfsFPMath ||
05267       !DAG.getTarget().Options.NoNaNsFPMath)
05268     return Op;
05269 
05270   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
05271 
05272   EVT ResVT = Op.getValueType();
05273   EVT CmpVT = Op.getOperand(0).getValueType();
05274   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
05275   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
05276   SDLoc dl(Op);
05277 
05278   // If the RHS of the comparison is a 0.0, we don't need to do the
05279   // subtraction at all.
05280   SDValue Sel1;
05281   if (isFloatingPointZero(RHS))
05282     switch (CC) {
05283     default: break;       // SETUO etc aren't handled by fsel.
05284     case ISD::SETNE:
05285       std::swap(TV, FV);
05286     case ISD::SETEQ:
05287       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05288         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05289       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05290       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05291         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05292       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05293                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
05294     case ISD::SETULT:
05295     case ISD::SETLT:
05296       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05297     case ISD::SETOGE:
05298     case ISD::SETGE:
05299       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05300         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05301       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05302     case ISD::SETUGT:
05303     case ISD::SETGT:
05304       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05305     case ISD::SETOLE:
05306     case ISD::SETLE:
05307       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05308         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05309       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05310                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
05311     }
05312 
05313   SDValue Cmp;
05314   switch (CC) {
05315   default: break;       // SETUO etc aren't handled by fsel.
05316   case ISD::SETNE:
05317     std::swap(TV, FV);
05318   case ISD::SETEQ:
05319     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05320     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05321       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05322     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05323     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05324       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05325     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05326                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
05327   case ISD::SETULT:
05328   case ISD::SETLT:
05329     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05330     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05331       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05332     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05333   case ISD::SETOGE:
05334   case ISD::SETGE:
05335     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05336     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05337       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05338     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05339   case ISD::SETUGT:
05340   case ISD::SETGT:
05341     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05342     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05343       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05344     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05345   case ISD::SETOLE:
05346   case ISD::SETLE:
05347     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05348     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05349       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05350     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05351   }
05352   return Op;
05353 }
05354 
05355 // FIXME: Split this code up when LegalizeDAGTypes lands.
05356 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
05357                                            SDLoc dl) const {
05358   assert(Op.getOperand(0).getValueType().isFloatingPoint());
05359   SDValue Src = Op.getOperand(0);
05360   if (Src.getValueType() == MVT::f32)
05361     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
05362 
05363   SDValue Tmp;
05364   switch (Op.getSimpleValueType().SimpleTy) {
05365   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
05366   case MVT::i32:
05367     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
05368                         (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
05369                                                    PPCISD::FCTIDZ),
05370                       dl, MVT::f64, Src);
05371     break;
05372   case MVT::i64:
05373     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
05374            "i64 FP_TO_UINT is supported only with FPCVT");
05375     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
05376                                                         PPCISD::FCTIDUZ,
05377                       dl, MVT::f64, Src);
05378     break;
05379   }
05380 
05381   // Convert the FP value to an int value through memory.
05382   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
05383     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
05384   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
05385   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
05386   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
05387 
05388   // Emit a store to the stack slot.
05389   SDValue Chain;
05390   if (i32Stack) {
05391     MachineFunction &MF = DAG.getMachineFunction();
05392     MachineMemOperand *MMO =
05393       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
05394     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
05395     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
05396               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
05397   } else
05398     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
05399                          MPI, false, false, 0);
05400 
05401   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
05402   // add in a bias.
05403   if (Op.getValueType() == MVT::i32 && !i32Stack) {
05404     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
05405                         DAG.getConstant(4, FIPtr.getValueType()));
05406     MPI = MachinePointerInfo();
05407   }
05408 
05409   return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
05410                      false, false, false, 0);
05411 }
05412 
05413 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
05414                                            SelectionDAG &DAG) const {
05415   SDLoc dl(Op);
05416   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
05417   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
05418     return SDValue();
05419 
05420   if (Op.getOperand(0).getValueType() == MVT::i1)
05421     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
05422                        DAG.getConstantFP(1.0, Op.getValueType()),
05423                        DAG.getConstantFP(0.0, Op.getValueType()));
05424 
05425   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
05426          "UINT_TO_FP is supported only with FPCVT");
05427 
05428   // If we have FCFIDS, then use it when converting to single-precision.
05429   // Otherwise, convert to double-precision and then round.
05430   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
05431                    (Op.getOpcode() == ISD::UINT_TO_FP ?
05432                     PPCISD::FCFIDUS : PPCISD::FCFIDS) :
05433                    (Op.getOpcode() == ISD::UINT_TO_FP ?
05434                     PPCISD::FCFIDU : PPCISD::FCFID);
05435   MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
05436                    MVT::f32 : MVT::f64;
05437 
05438   if (Op.getOperand(0).getValueType() == MVT::i64) {
05439     SDValue SINT = Op.getOperand(0);
05440     // When converting to single-precision, we actually need to convert
05441     // to double-precision first and then round to single-precision.
05442     // To avoid double-rounding effects during that operation, we have
05443     // to prepare the input operand.  Bits that might be truncated when
05444     // converting to double-precision are replaced by a bit that won't
05445     // be lost at this stage, but is below the single-precision rounding
05446     // position.
05447     //
05448     // However, if -enable-unsafe-fp-math is in effect, accept double
05449     // rounding to avoid the extra overhead.
05450     if (Op.getValueType() == MVT::f32 &&
05451         !Subtarget.hasFPCVT() &&
05452         !DAG.getTarget().Options.UnsafeFPMath) {
05453 
05454       // Twiddle input to make sure the low 11 bits are zero.  (If this
05455       // is the case, we are guaranteed the value will fit into the 53 bit
05456       // mantissa of an IEEE double-precision value without rounding.)
05457       // If any of those low 11 bits were not zero originally, make sure
05458       // bit 12 (value 2048) is set instead, so that the final rounding
05459       // to single-precision gets the correct result.
05460       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
05461                                   SINT, DAG.getConstant(2047, MVT::i64));
05462       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
05463                           Round, DAG.getConstant(2047, MVT::i64));
05464       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
05465       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
05466                           Round, DAG.getConstant(-2048, MVT::i64));
05467 
05468       // However, we cannot use that value unconditionally: if the magnitude
05469       // of the input value is small, the bit-twiddling we did above might
05470       // end up visibly changing the output.  Fortunately, in that case, we
05471       // don't need to twiddle bits since the original input will convert
05472       // exactly to double-precision floating-point already.  Therefore,
05473       // construct a conditional to use the original value if the top 11
05474       // bits are all sign-bit copies, and use the rounded value computed
05475       // above otherwise.
05476       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
05477                                  SINT, DAG.getConstant(53, MVT::i32));
05478       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
05479                          Cond, DAG.getConstant(1, MVT::i64));
05480       Cond = DAG.getSetCC(dl, MVT::i32,
05481                           Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
05482 
05483       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
05484     }
05485 
05486     SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
05487     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
05488 
05489     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
05490       FP = DAG.getNode(ISD::FP_ROUND, dl,
05491                        MVT::f32, FP, DAG.getIntPtrConstant(0));
05492     return FP;
05493   }
05494 
05495   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
05496          "Unhandled INT_TO_FP type in custom expander!");
05497   // Since we only generate this in 64-bit mode, we can take advantage of
05498   // 64-bit registers.  In particular, sign extend the input value into the
05499   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
05500   // then lfd it and fcfid it.
05501   MachineFunction &MF = DAG.getMachineFunction();
05502   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
05503   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05504 
05505   SDValue Ld;
05506   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
05507     int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
05508     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
05509 
05510     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
05511                                  MachinePointerInfo::getFixedStack(FrameIdx),
05512                                  false, false, 0);
05513 
05514     assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
05515            "Expected an i32 store");
05516     MachineMemOperand *MMO =
05517       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
05518                               MachineMemOperand::MOLoad, 4, 4);
05519     SDValue Ops[] = { Store, FIdx };
05520     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
05521                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
05522                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
05523                                  Ops, MVT::i32, MMO);
05524   } else {
05525     assert(Subtarget.isPPC64() &&
05526            "i32->FP without LFIWAX supported only on PPC64");
05527 
05528     int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
05529     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
05530 
05531     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
05532                                 Op.getOperand(0));
05533 
05534     // STD the extended value into the stack slot.
05535     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
05536                                  MachinePointerInfo::getFixedStack(FrameIdx),
05537                                  false, false, 0);
05538 
05539     // Load the value as a double.
05540     Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
05541                      MachinePointerInfo::getFixedStack(FrameIdx),
05542                      false, false, false, 0);
05543   }
05544 
05545   // FCFID it and return it.
05546   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
05547   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
05548     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
05549   return FP;
05550 }
05551 
05552 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
05553                                             SelectionDAG &DAG) const {
05554   SDLoc dl(Op);
05555   /*
05556    The rounding mode is in bits 30:31 of FPSR, and has the following
05557    settings:
05558      00 Round to nearest
05559      01 Round to 0
05560      10 Round to +inf
05561      11 Round to -inf
05562 
05563   FLT_ROUNDS, on the other hand, expects the following:
05564     -1 Undefined
05565      0 Round to 0
05566      1 Round to nearest
05567      2 Round to +inf
05568      3 Round to -inf
05569 
05570   To perform the conversion, we do:
05571     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
05572   */
05573 
05574   MachineFunction &MF = DAG.getMachineFunction();
05575   EVT VT = Op.getValueType();
05576   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05577 
05578   // Save FP Control Word to register
05579   EVT NodeTys[] = {
05580     MVT::f64,    // return register
05581     MVT::Glue    // unused in this context
05582   };
05583   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
05584 
05585   // Save FP register to stack slot
05586   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
05587   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
05588   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
05589                                StackSlot, MachinePointerInfo(), false, false,0);
05590 
05591   // Load FP Control Word from low 32 bits of stack slot.
05592   SDValue Four = DAG.getConstant(4, PtrVT);
05593   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
05594   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
05595                             false, false, false, 0);
05596 
05597   // Transform as necessary
05598   SDValue CWD1 =
05599     DAG.getNode(ISD::AND, dl, MVT::i32,
05600                 CWD, DAG.getConstant(3, MVT::i32));
05601   SDValue CWD2 =
05602     DAG.getNode(ISD::SRL, dl, MVT::i32,
05603                 DAG.getNode(ISD::AND, dl, MVT::i32,
05604                             DAG.getNode(ISD::XOR, dl, MVT::i32,
05605                                         CWD, DAG.getConstant(3, MVT::i32)),
05606                             DAG.getConstant(3, MVT::i32)),
05607                 DAG.getConstant(1, MVT::i32));
05608 
05609   SDValue RetVal =
05610     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
05611 
05612   return DAG.getNode((VT.getSizeInBits() < 16 ?
05613                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
05614 }
05615 
05616 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
05617   EVT VT = Op.getValueType();
05618   unsigned BitWidth = VT.getSizeInBits();
05619   SDLoc dl(Op);
05620   assert(Op.getNumOperands() == 3 &&
05621          VT == Op.getOperand(1).getValueType() &&
05622          "Unexpected SHL!");
05623 
05624   // Expand into a bunch of logical ops.  Note that these ops
05625   // depend on the PPC behavior for oversized shift amounts.
05626   SDValue Lo = Op.getOperand(0);
05627   SDValue Hi = Op.getOperand(1);
05628   SDValue Amt = Op.getOperand(2);
05629   EVT AmtVT = Amt.getValueType();
05630 
05631   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
05632                              DAG.getConstant(BitWidth, AmtVT), Amt);
05633   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
05634   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
05635   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
05636   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
05637                              DAG.getConstant(-BitWidth, AmtVT));
05638   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
05639   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
05640   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
05641   SDValue OutOps[] = { OutLo, OutHi };
05642   return DAG.getMergeValues(OutOps, dl);
05643 }
05644 
05645 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
05646   EVT VT = Op.getValueType();
05647   SDLoc dl(Op);
05648   unsigned BitWidth = VT.getSizeInBits();
05649   assert(Op.getNumOperands() == 3 &&
05650          VT == Op.getOperand(1).getValueType() &&
05651          "Unexpected SRL!");
05652 
05653   // Expand into a bunch of logical ops.  Note that these ops
05654   // depend on the PPC behavior for oversized shift amounts.
05655   SDValue Lo = Op.getOperand(0);
05656   SDValue Hi = Op.getOperand(1);
05657   SDValue Amt = Op.getOperand(2);
05658   EVT AmtVT = Amt.getValueType();
05659 
05660   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
05661                              DAG.getConstant(BitWidth, AmtVT), Amt);
05662   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
05663   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
05664   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
05665   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
05666                              DAG.getConstant(-BitWidth, AmtVT));
05667   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
05668   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
05669   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
05670   SDValue OutOps[] = { OutLo, OutHi };
05671   return DAG.getMergeValues(OutOps, dl);
05672 }
05673 
05674 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
05675   SDLoc dl(Op);
05676   EVT VT = Op.getValueType();
05677   unsigned BitWidth = VT.getSizeInBits();
05678   assert(Op.getNumOperands() == 3 &&
05679          VT == Op.getOperand(1).getValueType() &&
05680          "Unexpected SRA!");
05681 
05682   // Expand into a bunch of logical ops, followed by a select_cc.
05683   SDValue Lo = Op.getOperand(0);
05684   SDValue Hi = Op.getOperand(1);
05685   SDValue Amt = Op.getOperand(2);
05686   EVT AmtVT = Amt.getValueType();
05687 
05688   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
05689                              DAG.getConstant(BitWidth, AmtVT), Amt);
05690   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
05691   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
05692   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
05693   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
05694                              DAG.getConstant(-BitWidth, AmtVT));
05695   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
05696   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
05697   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
05698                                   Tmp4, Tmp6, ISD::SETLE);
05699   SDValue OutOps[] = { OutLo, OutHi };
05700   return DAG.getMergeValues(OutOps, dl);
05701 }
05702 
05703 //===----------------------------------------------------------------------===//
05704 // Vector related lowering.
05705 //
05706 
05707 /// BuildSplatI - Build a canonical splati of Val with an element size of
05708 /// SplatSize.  Cast the result to VT.
05709 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
05710                              SelectionDAG &DAG, SDLoc dl) {
05711   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
05712 
05713   static const EVT VTys[] = { // canonical VT to use for each size.
05714     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
05715   };
05716 
05717   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
05718 
05719   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
05720   if (Val == -1)
05721     SplatSize = 1;
05722 
05723   EVT CanonicalVT = VTys[SplatSize-1];
05724 
05725   // Build a canonical splat for this value.
05726   SDValue Elt = DAG.getConstant(Val, MVT::i32);
05727   SmallVector<SDValue, 8> Ops;
05728   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
05729   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
05730   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
05731 }
05732 
05733 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
05734 /// specified intrinsic ID.
05735 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
05736                                 SelectionDAG &DAG, SDLoc dl,
05737                                 EVT DestVT = MVT::Other) {
05738   if (DestVT == MVT::Other) DestVT = Op.getValueType();
05739   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
05740                      DAG.getConstant(IID, MVT::i32), Op);
05741 }
05742 
05743 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
05744 /// specified intrinsic ID.
05745 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
05746                                 SelectionDAG &DAG, SDLoc dl,
05747                                 EVT DestVT = MVT::Other) {
05748   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
05749   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
05750                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
05751 }
05752 
05753 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
05754 /// specified intrinsic ID.
05755 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
05756                                 SDValue Op2, SelectionDAG &DAG,
05757                                 SDLoc dl, EVT DestVT = MVT::Other) {
05758   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
05759   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
05760                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
05761 }
05762 
05763 
05764 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
05765 /// amount.  The result has the specified value type.
05766 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
05767                              EVT VT, SelectionDAG &DAG, SDLoc dl) {
05768   // Force LHS/RHS to be the right type.
05769   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
05770   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
05771 
05772   int Ops[16];
05773   for (unsigned i = 0; i != 16; ++i)
05774     Ops[i] = i + Amt;
05775   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
05776   return DAG.getNode(ISD::BITCAST, dl, VT, T);
05777 }
05778 
05779 // If this is a case we can't handle, return null and let the default
05780 // expansion code take care of it.  If we CAN select this case, and if it
05781 // selects to a single instruction, return Op.  Otherwise, if we can codegen
05782 // this case more efficiently than a constant pool load, lower it to the
05783 // sequence of ops that should be used.
05784 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
05785                                              SelectionDAG &DAG) const {
05786   SDLoc dl(Op);
05787   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
05788   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
05789 
05790   // Check if this is a splat of a constant value.
05791   APInt APSplatBits, APSplatUndef;
05792   unsigned SplatBitSize;
05793   bool HasAnyUndefs;
05794   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
05795                              HasAnyUndefs, 0, true) || SplatBitSize > 32)
05796     return SDValue();
05797 
05798   unsigned SplatBits = APSplatBits.getZExtValue();
05799   unsigned SplatUndef = APSplatUndef.getZExtValue();
05800   unsigned SplatSize = SplatBitSize / 8;
05801 
05802   // First, handle single instruction cases.
05803 
05804   // All zeros?
05805   if (SplatBits == 0) {
05806     // Canonicalize all zero vectors to be v4i32.
05807     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
05808       SDValue Z = DAG.getConstant(0, MVT::i32);
05809       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
05810       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
05811     }
05812     return Op;
05813   }
05814 
05815   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
05816   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
05817                     (32-SplatBitSize));
05818   if (SextVal >= -16 && SextVal <= 15)
05819     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
05820 
05821 
05822   // Two instruction sequences.
05823 
05824   // If this value is in the range [-32,30] and is even, use:
05825   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
05826   // If this value is in the range [17,31] and is odd, use:
05827   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
05828   // If this value is in the range [-31,-17] and is odd, use:
05829   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
05830   // Note the last two are three-instruction sequences.
05831   if (SextVal >= -32 && SextVal <= 31) {
05832     // To avoid having these optimizations undone by constant folding,
05833     // we convert to a pseudo that will be expanded later into one of
05834     // the above forms.
05835     SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
05836     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
05837               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
05838     SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
05839     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
05840     if (VT == Op.getValueType())
05841       return RetVal;
05842     else
05843       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
05844   }
05845 
05846   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
05847   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
05848   // for fneg/fabs.
05849   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
05850     // Make -1 and vspltisw -1:
05851     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
05852 
05853     // Make the VSLW intrinsic, computing 0x8000_0000.
05854     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
05855                                    OnesV, DAG, dl);
05856 
05857     // xor by OnesV to invert it.
05858     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
05859     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
05860   }
05861 
05862   // The remaining cases assume either big endian element order or
05863   // a splat-size that equates to the element size of the vector
05864   // to be built.  An example that doesn't work for little endian is
05865   // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
05866   // and a vector element size of 16 bits.  The code below will
05867   // produce the vector in big endian element order, which for little
05868   // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
05869 
05870   // For now, just avoid these optimizations in that case.
05871   // FIXME: Develop correct optimizations for LE with mismatched
05872   // splat and element sizes.
05873 
05874   if (Subtarget.isLittleEndian() &&
05875       SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
05876     return SDValue();
05877 
05878   // Check to see if this is a wide variety of vsplti*, binop self cases.
05879   static const signed char SplatCsts[] = {
05880     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
05881     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
05882   };
05883 
05884   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
05885     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
05886     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
05887     int i = SplatCsts[idx];
05888 
05889     // Figure out what shift amount will be used by altivec if shifted by i in
05890     // this splat size.
05891     unsigned TypeShiftAmt = i & (SplatBitSize-1);
05892 
05893     // vsplti + shl self.
05894     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
05895       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
05896       static const unsigned IIDs[] = { // Intrinsic to use for each size.
05897         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
05898         Intrinsic::ppc_altivec_vslw
05899       };
05900       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
05901       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
05902     }
05903 
05904     // vsplti + srl self.
05905     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
05906       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
05907       static const unsigned IIDs[] = { // Intrinsic to use for each size.
05908         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
05909         Intrinsic::ppc_altivec_vsrw
05910       };
05911       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
05912       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
05913     }
05914 
05915     // vsplti + sra self.
05916     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
05917       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
05918       static const unsigned IIDs[] = { // Intrinsic to use for each size.
05919         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
05920         Intrinsic::ppc_altivec_vsraw
05921       };
05922       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
05923       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
05924     }
05925 
05926     // vsplti + rol self.
05927     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
05928                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
05929       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
05930       static const unsigned IIDs[] = { // Intrinsic to use for each size.
05931         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
05932         Intrinsic::ppc_altivec_vrlw
05933       };
05934       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
05935       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
05936     }
05937 
05938     // t = vsplti c, result = vsldoi t, t, 1
05939     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
05940       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
05941       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
05942     }
05943     // t = vsplti c, result = vsldoi t, t, 2
05944     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
05945       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
05946       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
05947     }
05948     // t = vsplti c, result = vsldoi t, t, 3
05949     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
05950       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
05951       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
05952     }
05953   }
05954 
05955   return SDValue();
05956 }
05957 
05958 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
05959 /// the specified operations to build the shuffle.
05960 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
05961                                       SDValue RHS, SelectionDAG &DAG,
05962                                       SDLoc dl) {
05963   unsigned OpNum = (PFEntry >> 26) & 0x0F;
05964   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
05965   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
05966 
05967   enum {
05968     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
05969     OP_VMRGHW,
05970     OP_VMRGLW,
05971     OP_VSPLTISW0,
05972     OP_VSPLTISW1,
05973     OP_VSPLTISW2,
05974     OP_VSPLTISW3,
05975     OP_VSLDOI4,
05976     OP_VSLDOI8,
05977     OP_VSLDOI12
05978   };
05979 
05980   if (OpNum == OP_COPY) {
05981     if (LHSID == (1*9+2)*9+3) return LHS;
05982     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
05983     return RHS;
05984   }
05985 
05986   SDValue OpLHS, OpRHS;
05987   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
05988   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
05989 
05990   int ShufIdxs[16];
05991   switch (OpNum) {
05992   default: llvm_unreachable("Unknown i32 permute!");
05993   case OP_VMRGHW:
05994     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
05995     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
05996     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
05997     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
05998     break;
05999   case OP_VMRGLW:
06000     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
06001     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
06002     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
06003     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
06004     break;
06005   case OP_VSPLTISW0:
06006     for (unsigned i = 0; i != 16; ++i)
06007       ShufIdxs[i] = (i&3)+0;
06008     break;
06009   case OP_VSPLTISW1:
06010     for (unsigned i = 0; i != 16; ++i)
06011       ShufIdxs[i] = (i&3)+4;
06012     break;
06013   case OP_VSPLTISW2:
06014     for (unsigned i = 0; i != 16; ++i)
06015       ShufIdxs[i] = (i&3)+8;
06016     break;
06017   case OP_VSPLTISW3:
06018     for (unsigned i = 0; i != 16; ++i)
06019       ShufIdxs[i] = (i&3)+12;
06020     break;
06021   case OP_VSLDOI4:
06022     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
06023   case OP_VSLDOI8:
06024     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
06025   case OP_VSLDOI12:
06026     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
06027   }
06028   EVT VT = OpLHS.getValueType();
06029   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
06030   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
06031   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
06032   return DAG.getNode(ISD::BITCAST, dl, VT, T);
06033 }
06034 
06035 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
06036 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
06037 /// return the code it can be lowered into.  Worst case, it can always be
06038 /// lowered into a vperm.
06039 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
06040                                                SelectionDAG &DAG) const {
06041   SDLoc dl(Op);
06042   SDValue V1 = Op.getOperand(0);
06043   SDValue V2 = Op.getOperand(1);
06044   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
06045   EVT VT = Op.getValueType();
06046   bool isLittleEndian = Subtarget.isLittleEndian();
06047 
06048   // Cases that are handled by instructions that take permute immediates
06049   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
06050   // selected by the instruction selector.
06051   if (V2.getOpcode() == ISD::UNDEF) {
06052     if (PPC::isSplatShuffleMask(SVOp, 1) ||
06053         PPC::isSplatShuffleMask(SVOp, 2) ||
06054         PPC::isSplatShuffleMask(SVOp, 4) ||
06055         PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
06056         PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
06057         PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
06058         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
06059         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
06060         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
06061         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
06062         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
06063         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
06064       return Op;
06065     }
06066   }
06067 
06068   // Altivec has a variety of "shuffle immediates" that take two vector inputs
06069   // and produce a fixed permutation.  If any of these match, do not lower to
06070   // VPERM.
06071   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
06072   if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
06073       PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
06074       PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
06075       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
06076       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
06077       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
06078       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
06079       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
06080       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
06081     return Op;
06082 
06083   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
06084   // perfect shuffle table to emit an optimal matching sequence.
06085   ArrayRef<int> PermMask = SVOp->getMask();
06086 
06087   unsigned PFIndexes[4];
06088   bool isFourElementShuffle = true;
06089   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
06090     unsigned EltNo = 8;   // Start out undef.
06091     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
06092       if (PermMask[i*4+j] < 0)
06093         continue;   // Undef, ignore it.
06094 
06095       unsigned ByteSource = PermMask[i*4+j];
06096       if ((ByteSource & 3) != j) {
06097         isFourElementShuffle = false;
06098         break;
06099       }
06100 
06101       if (EltNo == 8) {
06102         EltNo = ByteSource/4;
06103       } else if (EltNo != ByteSource/4) {
06104         isFourElementShuffle = false;
06105         break;
06106       }
06107     }
06108     PFIndexes[i] = EltNo;
06109   }
06110 
06111   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
06112   // perfect shuffle vector to determine if it is cost effective to do this as
06113   // discrete instructions, or whether we should use a vperm.
06114   // For now, we skip this for little endian until such time as we have a
06115   // little-endian perfect shuffle table.
06116   if (isFourElementShuffle && !isLittleEndian) {
06117     // Compute the index in the perfect shuffle table.
06118     unsigned PFTableIndex =
06119       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
06120 
06121     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
06122     unsigned Cost  = (PFEntry >> 30);
06123 
06124     // Determining when to avoid vperm is tricky.  Many things affect the cost
06125     // of vperm, particularly how many times the perm mask needs to be computed.
06126     // For example, if the perm mask can be hoisted out of a loop or is already
06127     // used (perhaps because there are multiple permutes with the same shuffle
06128     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
06129     // the loop requires an extra register.
06130     //
06131     // As a compromise, we only emit discrete instructions if the shuffle can be
06132     // generated in 3 or fewer operations.  When we have loop information
06133     // available, if this block is within a loop, we should avoid using vperm
06134     // for 3-operation perms and use a constant pool load instead.
06135     if (Cost < 3)
06136       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
06137   }
06138 
06139   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
06140   // vector that will get spilled to the constant pool.
06141   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
06142 
06143   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
06144   // that it is in input element units, not in bytes.  Convert now.
06145 
06146   // For little endian, the order of the input vectors is reversed, and
06147   // the permutation mask is complemented with respect to 31.  This is
06148   // necessary to produce proper semantics with the big-endian-biased vperm
06149   // instruction.
06150   EVT EltVT = V1.getValueType().getVectorElementType();
06151   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
06152 
06153   SmallVector<SDValue, 16> ResultMask;
06154   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
06155     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
06156 
06157     for (unsigned j = 0; j != BytesPerElement; ++j)
06158       if (isLittleEndian)
06159         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
06160                                              MVT::i32));
06161       else
06162         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
06163                                              MVT::i32));
06164   }
06165 
06166   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
06167                                   ResultMask);
06168   if (isLittleEndian)
06169     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
06170                        V2, V1, VPermMask);
06171   else
06172     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
06173                        V1, V2, VPermMask);
06174 }
06175 
06176 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
06177 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
06178 /// information about the intrinsic.
06179 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
06180                                   bool &isDot) {
06181   unsigned IntrinsicID =
06182     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
06183   CompareOpc = -1;
06184   isDot = false;
06185   switch (IntrinsicID) {
06186   default: return false;
06187     // Comparison predicates.
06188   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
06189   case In