LLVM API Documentation

X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86InstrBuilder.h"
00019 #include "X86MachineFunctionInfo.h"
00020 #include "X86TargetMachine.h"
00021 #include "X86TargetObjectFile.h"
00022 #include "llvm/ADT/SmallSet.h"
00023 #include "llvm/ADT/Statistic.h"
00024 #include "llvm/ADT/StringExtras.h"
00025 #include "llvm/ADT/StringSwitch.h"
00026 #include "llvm/ADT/VariadicFunction.h"
00027 #include "llvm/CodeGen/IntrinsicLowering.h"
00028 #include "llvm/CodeGen/MachineFrameInfo.h"
00029 #include "llvm/CodeGen/MachineFunction.h"
00030 #include "llvm/CodeGen/MachineInstrBuilder.h"
00031 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00032 #include "llvm/CodeGen/MachineModuleInfo.h"
00033 #include "llvm/CodeGen/MachineRegisterInfo.h"
00034 #include "llvm/IR/CallSite.h"
00035 #include "llvm/IR/CallingConv.h"
00036 #include "llvm/IR/Constants.h"
00037 #include "llvm/IR/DerivedTypes.h"
00038 #include "llvm/IR/Function.h"
00039 #include "llvm/IR/GlobalAlias.h"
00040 #include "llvm/IR/GlobalVariable.h"
00041 #include "llvm/IR/Instructions.h"
00042 #include "llvm/IR/Intrinsics.h"
00043 #include "llvm/MC/MCAsmInfo.h"
00044 #include "llvm/MC/MCContext.h"
00045 #include "llvm/MC/MCExpr.h"
00046 #include "llvm/MC/MCSymbol.h"
00047 #include "llvm/Support/CommandLine.h"
00048 #include "llvm/Support/Debug.h"
00049 #include "llvm/Support/ErrorHandling.h"
00050 #include "llvm/Support/MathExtras.h"
00051 #include "llvm/Target/TargetOptions.h"
00052 #include <bitset>
00053 #include <numeric>
00054 #include <cctype>
00055 using namespace llvm;
00056 
00057 #define DEBUG_TYPE "x86-isel"
00058 
00059 STATISTIC(NumTailCalls, "Number of tail calls");
00060 
00061 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00062     "x86-experimental-vector-widening-legalization", cl::init(false),
00063     cl::desc("Enable an experimental vector type legalization through widening "
00064              "rather than promotion."),
00065     cl::Hidden);
00066 
00067 static cl::opt<bool> ExperimentalVectorShuffleLowering(
00068     "x86-experimental-vector-shuffle-lowering", cl::init(false),
00069     cl::desc("Enable an experimental vector shuffle lowering code path."),
00070     cl::Hidden);
00071 
00072 // Forward declarations.
00073 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
00074                        SDValue V2);
00075 
00076 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
00077                                 SelectionDAG &DAG, SDLoc dl,
00078                                 unsigned vectorWidth) {
00079   assert((vectorWidth == 128 || vectorWidth == 256) &&
00080          "Unsupported vector width");
00081   EVT VT = Vec.getValueType();
00082   EVT ElVT = VT.getVectorElementType();
00083   unsigned Factor = VT.getSizeInBits()/vectorWidth;
00084   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
00085                                   VT.getVectorNumElements()/Factor);
00086 
00087   // Extract from UNDEF is UNDEF.
00088   if (Vec.getOpcode() == ISD::UNDEF)
00089     return DAG.getUNDEF(ResultVT);
00090 
00091   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
00092   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
00093 
00094   // This is the index of the first element of the vectorWidth-bit chunk
00095   // we want.
00096   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
00097                                * ElemsPerChunk);
00098 
00099   // If the input is a buildvector just emit a smaller one.
00100   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
00101     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
00102                        makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
00103                                     ElemsPerChunk));
00104 
00105   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00106   SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
00107                                VecIdx);
00108 
00109   return Result;
00110 
00111 }
00112 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
00113 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
00114 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
00115 /// instructions or a simple subregister reference. Idx is an index in the
00116 /// 128 bits we want.  It need not be aligned to a 128-bit bounday.  That makes
00117 /// lowering EXTRACT_VECTOR_ELT operations easier.
00118 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
00119                                    SelectionDAG &DAG, SDLoc dl) {
00120   assert((Vec.getValueType().is256BitVector() ||
00121           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
00122   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
00123 }
00124 
00125 /// Generate a DAG to grab 256-bits from a 512-bit vector.
00126 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
00127                                    SelectionDAG &DAG, SDLoc dl) {
00128   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
00129   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
00130 }
00131 
00132 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
00133                                unsigned IdxVal, SelectionDAG &DAG,
00134                                SDLoc dl, unsigned vectorWidth) {
00135   assert((vectorWidth == 128 || vectorWidth == 256) &&
00136          "Unsupported vector width");
00137   // Inserting UNDEF is Result
00138   if (Vec.getOpcode() == ISD::UNDEF)
00139     return Result;
00140   EVT VT = Vec.getValueType();
00141   EVT ElVT = VT.getVectorElementType();
00142   EVT ResultVT = Result.getValueType();
00143 
00144   // Insert the relevant vectorWidth bits.
00145   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
00146 
00147   // This is the index of the first element of the vectorWidth-bit chunk
00148   // we want.
00149   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
00150                                * ElemsPerChunk);
00151 
00152   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00153   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
00154                      VecIdx);
00155 }
00156 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
00157 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
00158 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
00159 /// simple superregister reference.  Idx is an index in the 128 bits
00160 /// we want.  It need not be aligned to a 128-bit bounday.  That makes
00161 /// lowering INSERT_VECTOR_ELT operations easier.
00162 static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
00163                                   unsigned IdxVal, SelectionDAG &DAG,
00164                                   SDLoc dl) {
00165   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
00166   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
00167 }
00168 
00169 static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
00170                                   unsigned IdxVal, SelectionDAG &DAG,
00171                                   SDLoc dl) {
00172   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
00173   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
00174 }
00175 
00176 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
00177 /// instructions. This is used because creating CONCAT_VECTOR nodes of
00178 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
00179 /// large BUILD_VECTORS.
00180 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
00181                                    unsigned NumElems, SelectionDAG &DAG,
00182                                    SDLoc dl) {
00183   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00184   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
00185 }
00186 
00187 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
00188                                    unsigned NumElems, SelectionDAG &DAG,
00189                                    SDLoc dl) {
00190   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00191   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
00192 }
00193 
00194 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00195   if (TT.isOSBinFormatMachO()) {
00196     if (TT.getArch() == Triple::x86_64)
00197       return new X86_64MachoTargetObjectFile();
00198     return new TargetLoweringObjectFileMachO();
00199   }
00200 
00201   if (TT.isOSLinux())
00202     return new X86LinuxTargetObjectFile();
00203   if (TT.isOSBinFormatELF())
00204     return new TargetLoweringObjectFileELF();
00205   if (TT.isKnownWindowsMSVCEnvironment())
00206     return new X86WindowsTargetObjectFile();
00207   if (TT.isOSBinFormatCOFF())
00208     return new TargetLoweringObjectFileCOFF();
00209   llvm_unreachable("unknown subtarget type");
00210 }
00211 
00212 // FIXME: This should stop caching the target machine as soon as
00213 // we can remove resetOperationActions et al.
00214 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
00215   : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
00216   Subtarget = &TM.getSubtarget<X86Subtarget>();
00217   X86ScalarSSEf64 = Subtarget->hasSSE2();
00218   X86ScalarSSEf32 = Subtarget->hasSSE1();
00219   TD = getDataLayout();
00220 
00221   resetOperationActions();
00222 }
00223 
00224 void X86TargetLowering::resetOperationActions() {
00225   const TargetMachine &TM = getTargetMachine();
00226   static bool FirstTimeThrough = true;
00227 
00228   // If none of the target options have changed, then we don't need to reset the
00229   // operation actions.
00230   if (!FirstTimeThrough && TO == TM.Options) return;
00231 
00232   if (!FirstTimeThrough) {
00233     // Reinitialize the actions.
00234     initActions();
00235     FirstTimeThrough = false;
00236   }
00237 
00238   TO = TM.Options;
00239 
00240   // Set up the TargetLowering object.
00241   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
00242 
00243   // X86 is weird, it always uses i8 for shift amounts and setcc results.
00244   setBooleanContents(ZeroOrOneBooleanContent);
00245   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00246   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00247 
00248   // For 64-bit since we have so many registers use the ILP scheduler, for
00249   // 32-bit code use the register pressure specific scheduling.
00250   // For Atom, always use ILP scheduling.
00251   if (Subtarget->isAtom())
00252     setSchedulingPreference(Sched::ILP);
00253   else if (Subtarget->is64Bit())
00254     setSchedulingPreference(Sched::ILP);
00255   else
00256     setSchedulingPreference(Sched::RegPressure);
00257   const X86RegisterInfo *RegInfo =
00258       TM.getSubtarget<X86Subtarget>().getRegisterInfo();
00259   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00260 
00261   // Bypass expensive divides on Atom when compiling with O2
00262   if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
00263     addBypassSlowDiv(32, 8);
00264     if (Subtarget->is64Bit())
00265       addBypassSlowDiv(64, 16);
00266   }
00267 
00268   if (Subtarget->isTargetKnownWindowsMSVC()) {
00269     // Setup Windows compiler runtime calls.
00270     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00271     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00272     setLibcallName(RTLIB::SREM_I64, "_allrem");
00273     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00274     setLibcallName(RTLIB::MUL_I64, "_allmul");
00275     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00276     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00277     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00278     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00279     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00280 
00281     // The _ftol2 runtime function has an unusual calling conv, which
00282     // is modeled by a special pseudo-instruction.
00283     setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
00284     setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
00285     setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
00286     setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
00287   }
00288 
00289   if (Subtarget->isTargetDarwin()) {
00290     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00291     setUseUnderscoreSetJmp(false);
00292     setUseUnderscoreLongJmp(false);
00293   } else if (Subtarget->isTargetWindowsGNU()) {
00294     // MS runtime is weird: it exports _setjmp, but longjmp!
00295     setUseUnderscoreSetJmp(true);
00296     setUseUnderscoreLongJmp(false);
00297   } else {
00298     setUseUnderscoreSetJmp(true);
00299     setUseUnderscoreLongJmp(true);
00300   }
00301 
00302   // Set up the register classes.
00303   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00304   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00305   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00306   if (Subtarget->is64Bit())
00307     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00308 
00309   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00310 
00311   // We don't accept any truncstore of integer registers.
00312   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00313   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00314   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00315   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00316   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00317   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00318 
00319   // SETOEQ and SETUNE require checking two conditions.
00320   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00321   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00322   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00323   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00324   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00325   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00326 
00327   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00328   // operation.
00329   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00330   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00331   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00332 
00333   if (Subtarget->is64Bit()) {
00334     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00335     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00336   } else if (!TM.Options.UseSoftFloat) {
00337     // We have an algorithm for SSE2->double, and we turn this into a
00338     // 64-bit FILD followed by conditional FADD for other targets.
00339     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00340     // We have an algorithm for SSE2, and we turn this into a 64-bit
00341     // FILD for other targets.
00342     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00343   }
00344 
00345   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00346   // this operation.
00347   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00348   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00349 
00350   if (!TM.Options.UseSoftFloat) {
00351     // SSE has no i16 to fp conversion, only i32
00352     if (X86ScalarSSEf32) {
00353       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00354       // f32 and f64 cases are Legal, f80 case is not
00355       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00356     } else {
00357       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00358       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00359     }
00360   } else {
00361     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00362     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00363   }
00364 
00365   // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00366   // are Legal, f80 is custom lowered.
00367   setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00368   setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00369 
00370   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00371   // this operation.
00372   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00373   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00374 
00375   if (X86ScalarSSEf32) {
00376     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00377     // f32 and f64 cases are Legal, f80 case is not
00378     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00379   } else {
00380     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00381     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00382   }
00383 
00384   // Handle FP_TO_UINT by promoting the destination to a larger signed
00385   // conversion.
00386   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00387   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00388   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00389 
00390   if (Subtarget->is64Bit()) {
00391     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
00392     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00393   } else if (!TM.Options.UseSoftFloat) {
00394     // Since AVX is a superset of SSE3, only check for SSE here.
00395     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00396       // Expand FP_TO_UINT into a select.
00397       // FIXME: We would like to use a Custom expander here eventually to do
00398       // the optimal thing for SSE vs. the default expansion in the legalizer.
00399       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00400     else
00401       // With SSE3 we can use fisttpll to convert to a signed i64; without
00402       // SSE, we're stuck with a fistpll.
00403       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00404   }
00405 
00406   if (isTargetFTOL()) {
00407     // Use the _ftol2 runtime function, which has a pseudo-instruction
00408     // to handle its weird calling convention.
00409     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00410   }
00411 
00412   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00413   if (!X86ScalarSSEf64) {
00414     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00415     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00416     if (Subtarget->is64Bit()) {
00417       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00418       // Without SSE, i64->f64 goes through memory.
00419       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00420     }
00421   }
00422 
00423   // Scalar integer divide and remainder are lowered to use operations that
00424   // produce two results, to match the available instructions. This exposes
00425   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00426   // into a single instruction.
00427   //
00428   // Scalar integer multiply-high is also lowered to use two-result
00429   // operations, to match the available instructions. However, plain multiply
00430   // (low) operations are left as Legal, as there are single-result
00431   // instructions for this in x86. Using the two-result multiply instructions
00432   // when both high and low results are needed must be arranged by dagcombine.
00433   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00434     MVT VT = IntVTs[i];
00435     setOperationAction(ISD::MULHS, VT, Expand);
00436     setOperationAction(ISD::MULHU, VT, Expand);
00437     setOperationAction(ISD::SDIV, VT, Expand);
00438     setOperationAction(ISD::UDIV, VT, Expand);
00439     setOperationAction(ISD::SREM, VT, Expand);
00440     setOperationAction(ISD::UREM, VT, Expand);
00441 
00442     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00443     setOperationAction(ISD::ADDC, VT, Custom);
00444     setOperationAction(ISD::ADDE, VT, Custom);
00445     setOperationAction(ISD::SUBC, VT, Custom);
00446     setOperationAction(ISD::SUBE, VT, Custom);
00447   }
00448 
00449   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00450   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00451   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00452   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00453   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00454   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00455   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00456   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00457   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00458   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00459   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00460   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00461   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00462   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00463   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00464   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00465   if (Subtarget->is64Bit())
00466     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00467   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00468   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00469   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00470   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00471   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
00472   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00473   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00474   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00475 
00476   // Promote the i8 variants and force them on up to i32 which has a shorter
00477   // encoding.
00478   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00479   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00480   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00481   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00482   if (Subtarget->hasBMI()) {
00483     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00484     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00485     if (Subtarget->is64Bit())
00486       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00487   } else {
00488     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00489     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00490     if (Subtarget->is64Bit())
00491       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00492   }
00493 
00494   if (Subtarget->hasLZCNT()) {
00495     // When promoting the i8 variants, force them to i32 for a shorter
00496     // encoding.
00497     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00498     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00499     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00500     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00501     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00502     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00503     if (Subtarget->is64Bit())
00504       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00505   } else {
00506     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00507     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00508     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00509     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00510     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00511     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00512     if (Subtarget->is64Bit()) {
00513       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00514       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00515     }
00516   }
00517 
00518   // Special handling for half-precision floating point conversions.
00519   // If we don't have F16C support, then lower half float conversions
00520   // into library calls.
00521   if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
00522     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00523     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00524   }
00525 
00526   // There's never any support for operations beyond MVT::f32.
00527   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00528   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00529   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00530   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00531 
00532   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
00533   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00534   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00535   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00536 
00537   if (Subtarget->hasPOPCNT()) {
00538     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00539   } else {
00540     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00541     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00542     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00543     if (Subtarget->is64Bit())
00544       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00545   }
00546 
00547   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00548 
00549   if (!Subtarget->hasMOVBE())
00550     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00551 
00552   // These should be promoted to a larger select which is supported.
00553   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00554   // X86 wants to expand cmov itself.
00555   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00556   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00557   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00558   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00559   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00560   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00561   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00562   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00563   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00564   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00565   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00566   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00567   if (Subtarget->is64Bit()) {
00568     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00569     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00570   }
00571   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00572   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00573   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00574   // support continuation, user-level threading, and etc.. As a result, no
00575   // other SjLj exception interfaces are implemented and please don't build
00576   // your own exception handling based on them.
00577   // LLVM/Clang supports zero-cost DWARF exception handling.
00578   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00579   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00580 
00581   // Darwin ABI issue.
00582   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00583   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00584   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00585   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00586   if (Subtarget->is64Bit())
00587     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00588   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00589   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00590   if (Subtarget->is64Bit()) {
00591     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00592     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00593     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00594     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00595     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00596   }
00597   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00598   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00599   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00600   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00601   if (Subtarget->is64Bit()) {
00602     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00603     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00604     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00605   }
00606 
00607   if (Subtarget->hasSSE1())
00608     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00609 
00610   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00611 
00612   // Expand certain atomics
00613   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00614     MVT VT = IntVTs[i];
00615     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00616     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00617     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00618   }
00619 
00620   if (Subtarget->hasCmpxchg16b()) {
00621     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00622   }
00623 
00624   // FIXME - use subtarget debug flags
00625   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00626       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00627     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00628   }
00629 
00630   if (Subtarget->is64Bit()) {
00631     setExceptionPointerRegister(X86::RAX);
00632     setExceptionSelectorRegister(X86::RDX);
00633   } else {
00634     setExceptionPointerRegister(X86::EAX);
00635     setExceptionSelectorRegister(X86::EDX);
00636   }
00637   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00638   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00639 
00640   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00641   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00642 
00643   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00644   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00645 
00646   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00647   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00648   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00649   if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
00650     // TargetInfo::X86_64ABIBuiltinVaList
00651     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00652     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00653   } else {
00654     // TargetInfo::CharPtrBuiltinVaList
00655     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00656     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00657   }
00658 
00659   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00660   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00661 
00662   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
00663 
00664   if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
00665     // f32 and f64 use SSE.
00666     // Set up the FP register classes.
00667     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00668     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00669 
00670     // Use ANDPD to simulate FABS.
00671     setOperationAction(ISD::FABS , MVT::f64, Custom);
00672     setOperationAction(ISD::FABS , MVT::f32, Custom);
00673 
00674     // Use XORP to simulate FNEG.
00675     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00676     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00677 
00678     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00679     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00680     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00681 
00682     // Lower this to FGETSIGNx86 plus an AND.
00683     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00684     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00685 
00686     // We don't support sin/cos/fmod
00687     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00688     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00689     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00690     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00691     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00692     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00693 
00694     // Expand FP immediates into loads from the stack, except for the special
00695     // cases we handle.
00696     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00697     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00698   } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
00699     // Use SSE for f32, x87 for f64.
00700     // Set up the FP register classes.
00701     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00702     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00703 
00704     // Use ANDPS to simulate FABS.
00705     setOperationAction(ISD::FABS , MVT::f32, Custom);
00706 
00707     // Use XORP to simulate FNEG.
00708     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00709 
00710     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00711 
00712     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00713     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00714     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00715 
00716     // We don't support sin/cos/fmod
00717     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00718     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00719     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00720 
00721     // Special cases we handle for FP constants.
00722     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00723     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00724     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00725     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00726     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00727 
00728     if (!TM.Options.UnsafeFPMath) {
00729       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00730       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00731       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00732     }
00733   } else if (!TM.Options.UseSoftFloat) {
00734     // f32 and f64 in x87.
00735     // Set up the FP register classes.
00736     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00737     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00738 
00739     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00740     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00741     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00742     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00743 
00744     if (!TM.Options.UnsafeFPMath) {
00745       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00746       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00747       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00748       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00749       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00750       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00751     }
00752     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00753     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00754     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00755     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00756     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00757     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00758     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00759     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00760   }
00761 
00762   // We don't support FMA.
00763   setOperationAction(ISD::FMA, MVT::f64, Expand);
00764   setOperationAction(ISD::FMA, MVT::f32, Expand);
00765 
00766   // Long double always uses X87.
00767   if (!TM.Options.UseSoftFloat) {
00768     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00769     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00770     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00771     {
00772       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00773       addLegalFPImmediate(TmpFlt);  // FLD0
00774       TmpFlt.changeSign();
00775       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00776 
00777       bool ignored;
00778       APFloat TmpFlt2(+1.0);
00779       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00780                       &ignored);
00781       addLegalFPImmediate(TmpFlt2);  // FLD1
00782       TmpFlt2.changeSign();
00783       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00784     }
00785 
00786     if (!TM.Options.UnsafeFPMath) {
00787       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00788       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00789       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00790     }
00791 
00792     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00793     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00794     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00795     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00796     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00797     setOperationAction(ISD::FMA, MVT::f80, Expand);
00798   }
00799 
00800   // Always use a library call for pow.
00801   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00802   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00803   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00804 
00805   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00806   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00807   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00808   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00809   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00810 
00811   // First set operation action for all vector types to either promote
00812   // (for widening) or expand (for scalarization). Then we will selectively
00813   // turn on ones that can be effectively codegen'd.
00814   for (int i = MVT::FIRST_VECTOR_VALUETYPE;
00815            i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
00816     MVT VT = (MVT::SimpleValueType)i;
00817     setOperationAction(ISD::ADD , VT, Expand);
00818     setOperationAction(ISD::SUB , VT, Expand);
00819     setOperationAction(ISD::FADD, VT, Expand);
00820     setOperationAction(ISD::FNEG, VT, Expand);
00821     setOperationAction(ISD::FSUB, VT, Expand);
00822     setOperationAction(ISD::MUL , VT, Expand);
00823     setOperationAction(ISD::FMUL, VT, Expand);
00824     setOperationAction(ISD::SDIV, VT, Expand);
00825     setOperationAction(ISD::UDIV, VT, Expand);
00826     setOperationAction(ISD::FDIV, VT, Expand);
00827     setOperationAction(ISD::SREM, VT, Expand);
00828     setOperationAction(ISD::UREM, VT, Expand);
00829     setOperationAction(ISD::LOAD, VT, Expand);
00830     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00831     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00832     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00833     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00834     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00835     setOperationAction(ISD::FABS, VT, Expand);
00836     setOperationAction(ISD::FSIN, VT, Expand);
00837     setOperationAction(ISD::FSINCOS, VT, Expand);
00838     setOperationAction(ISD::FCOS, VT, Expand);
00839     setOperationAction(ISD::FSINCOS, VT, Expand);
00840     setOperationAction(ISD::FREM, VT, Expand);
00841     setOperationAction(ISD::FMA,  VT, Expand);
00842     setOperationAction(ISD::FPOWI, VT, Expand);
00843     setOperationAction(ISD::FSQRT, VT, Expand);
00844     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00845     setOperationAction(ISD::FFLOOR, VT, Expand);
00846     setOperationAction(ISD::FCEIL, VT, Expand);
00847     setOperationAction(ISD::FTRUNC, VT, Expand);
00848     setOperationAction(ISD::FRINT, VT, Expand);
00849     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00850     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00851     setOperationAction(ISD::MULHS, VT, Expand);
00852     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00853     setOperationAction(ISD::MULHU, VT, Expand);
00854     setOperationAction(ISD::SDIVREM, VT, Expand);
00855     setOperationAction(ISD::UDIVREM, VT, Expand);
00856     setOperationAction(ISD::FPOW, VT, Expand);
00857     setOperationAction(ISD::CTPOP, VT, Expand);
00858     setOperationAction(ISD::CTTZ, VT, Expand);
00859     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00860     setOperationAction(ISD::CTLZ, VT, Expand);
00861     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00862     setOperationAction(ISD::SHL, VT, Expand);
00863     setOperationAction(ISD::SRA, VT, Expand);
00864     setOperationAction(ISD::SRL, VT, Expand);
00865     setOperationAction(ISD::ROTL, VT, Expand);
00866     setOperationAction(ISD::ROTR, VT, Expand);
00867     setOperationAction(ISD::BSWAP, VT, Expand);
00868     setOperationAction(ISD::SETCC, VT, Expand);
00869     setOperationAction(ISD::FLOG, VT, Expand);
00870     setOperationAction(ISD::FLOG2, VT, Expand);
00871     setOperationAction(ISD::FLOG10, VT, Expand);
00872     setOperationAction(ISD::FEXP, VT, Expand);
00873     setOperationAction(ISD::FEXP2, VT, Expand);
00874     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00875     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00876     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00877     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00878     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00879     setOperationAction(ISD::TRUNCATE, VT, Expand);
00880     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00881     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00882     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00883     setOperationAction(ISD::VSELECT, VT, Expand);
00884     setOperationAction(ISD::SELECT_CC, VT, Expand);
00885     for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
00886              InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00887       setTruncStoreAction(VT,
00888                           (MVT::SimpleValueType)InnerVT, Expand);
00889     setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
00890     setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
00891 
00892     // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
00893     // we have to deal with them whether we ask for Expansion or not. Setting
00894     // Expand causes its own optimisation problems though, so leave them legal.
00895     if (VT.getVectorElementType() == MVT::i1)
00896       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
00897   }
00898 
00899   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00900   // with -msoft-float, disable use of MMX as well.
00901   if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
00902     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00903     // No operations on x86mmx supported, everything uses intrinsics.
00904   }
00905 
00906   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00907   // into smaller operations.
00908   setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
00909   setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
00910   setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
00911   setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
00912   setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
00913   setOperationAction(ISD::AND,                MVT::v4i16, Expand);
00914   setOperationAction(ISD::AND,                MVT::v2i32, Expand);
00915   setOperationAction(ISD::AND,                MVT::v1i64, Expand);
00916   setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
00917   setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
00918   setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
00919   setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
00920   setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
00921   setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
00922   setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
00923   setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
00924   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
00925   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
00926   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
00927   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
00928   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00929   setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
00930   setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
00931   setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
00932   setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
00933   setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
00934   setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
00935   setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
00936   setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
00937 
00938   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
00939     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00940 
00941     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00942     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00943     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00944     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00945     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00946     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00947     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00948     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00949     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00950     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00951     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00952     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00953   }
00954 
00955   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
00956     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00957 
00958     // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
00959     // registers cannot be used even for integer operations.
00960     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00961     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00962     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00963     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00964 
00965     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00966     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00967     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00968     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00969     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00970     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00971     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00972     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00973     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00974     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00975     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00976     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00977     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00978     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00979     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00980     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00981     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00982     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00983     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00984     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00985     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00986     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00987 
00988     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00989     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00990     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00991     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00992 
00993     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00994     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00995     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00996     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00997     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00998 
00999     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
01000     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01001       MVT VT = (MVT::SimpleValueType)i;
01002       // Do not attempt to custom lower non-power-of-2 vectors
01003       if (!isPowerOf2_32(VT.getVectorNumElements()))
01004         continue;
01005       // Do not attempt to custom lower non-128-bit vectors
01006       if (!VT.is128BitVector())
01007         continue;
01008       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01009       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01010       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01011     }
01012 
01013     // We support custom legalizing of sext and anyext loads for specific
01014     // memory vector types which we can load as a scalar (or sequence of
01015     // scalars) and extend in-register to a legal 128-bit vector type. For sext
01016     // loads these must work with a single scalar load.
01017     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom);
01018     if (Subtarget->is64Bit()) {
01019       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom);
01020       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom);
01021     }
01022     setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom);
01023     setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom);
01024     setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom);
01025     setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
01026     setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Custom);
01027     setLoadExtAction(ISD::EXTLOAD, MVT::v8i8, Custom);
01028 
01029     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
01030     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
01031     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
01032     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
01033     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
01034     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
01035 
01036     if (Subtarget->is64Bit()) {
01037       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01038       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01039     }
01040 
01041     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
01042     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01043       MVT VT = (MVT::SimpleValueType)i;
01044 
01045       // Do not attempt to promote non-128-bit vectors
01046       if (!VT.is128BitVector())
01047         continue;
01048 
01049       setOperationAction(ISD::AND,    VT, Promote);
01050       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
01051       setOperationAction(ISD::OR,     VT, Promote);
01052       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
01053       setOperationAction(ISD::XOR,    VT, Promote);
01054       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
01055       setOperationAction(ISD::LOAD,   VT, Promote);
01056       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
01057       setOperationAction(ISD::SELECT, VT, Promote);
01058       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
01059     }
01060 
01061     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
01062 
01063     // Custom lower v2i64 and v2f64 selects.
01064     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
01065     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
01066     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
01067     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
01068 
01069     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
01070     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
01071 
01072     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
01073     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
01074     // As there is no 64-bit GPR available, we need build a special custom
01075     // sequence to convert from v2i32 to v2f32.
01076     if (!Subtarget->is64Bit())
01077       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
01078 
01079     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
01080     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
01081 
01082     setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
01083 
01084     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
01085     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
01086     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
01087   }
01088 
01089   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
01090     setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
01091     setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
01092     setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
01093     setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
01094     setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
01095     setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
01096     setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
01097     setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
01098     setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
01099     setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
01100 
01101     setOperationAction(ISD::FFLOOR,             MVT::v4f32, Legal);
01102     setOperationAction(ISD::FCEIL,              MVT::v4f32, Legal);
01103     setOperationAction(ISD::FTRUNC,             MVT::v4f32, Legal);
01104     setOperationAction(ISD::FRINT,              MVT::v4f32, Legal);
01105     setOperationAction(ISD::FNEARBYINT,         MVT::v4f32, Legal);
01106     setOperationAction(ISD::FFLOOR,             MVT::v2f64, Legal);
01107     setOperationAction(ISD::FCEIL,              MVT::v2f64, Legal);
01108     setOperationAction(ISD::FTRUNC,             MVT::v2f64, Legal);
01109     setOperationAction(ISD::FRINT,              MVT::v2f64, Legal);
01110     setOperationAction(ISD::FNEARBYINT,         MVT::v2f64, Legal);
01111 
01112     // FIXME: Do we need to handle scalar-to-vector here?
01113     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
01114 
01115     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
01116     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
01117     setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
01118     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
01119     setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom);
01120     // There is no BLENDI for byte vectors. We don't need to custom lower
01121     // some vselects for now.
01122     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
01123 
01124     // SSE41 brings specific instructions for doing vector sign extend even in
01125     // cases where we don't have SRA.
01126     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Custom);
01127     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Custom);
01128     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, Custom);
01129 
01130     // i8 and i16 vectors are custom because the source register and source
01131     // source memory operand types are not the same width.  f32 vectors are
01132     // custom since the immediate controlling the insert encodes additional
01133     // information.
01134     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
01135     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
01136     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
01137     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
01138 
01139     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
01140     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
01141     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
01142     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
01143 
01144     // FIXME: these should be Legal, but that's only for the case where
01145     // the index is constant.  For now custom expand to deal with that.
01146     if (Subtarget->is64Bit()) {
01147       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01148       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01149     }
01150   }
01151 
01152   if (Subtarget->hasSSE2()) {
01153     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01154     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01155 
01156     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01157     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01158 
01159     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01160     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01161 
01162     // In the customized shift lowering, the legal cases in AVX2 will be
01163     // recognized.
01164     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01165     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01166 
01167     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01168     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01169 
01170     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01171   }
01172 
01173   if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
01174     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01175     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01176     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01177     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01178     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01179     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01180 
01181     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01182     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01183     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01184 
01185     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01186     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01187     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01188     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01189     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01190     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01191     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01192     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01193     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01194     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01195     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01196     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01197 
01198     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01199     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01200     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01201     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01202     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01203     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01204     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01205     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01206     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01207     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01208     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01209     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01210 
01211     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01212     // even though v8i16 is a legal type.
01213     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01214     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01215     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01216 
01217     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01218     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01219     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01220 
01221     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01222     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01223 
01224     setLoadExtAction(ISD::EXTLOAD,              MVT::v4f32, Legal);
01225 
01226     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01227     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01228 
01229     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01230     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01231 
01232     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01233     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01234 
01235     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01236     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01237     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01238     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01239 
01240     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01241     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01242     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01243 
01244     setOperationAction(ISD::VSELECT,           MVT::v4f64, Custom);
01245     setOperationAction(ISD::VSELECT,           MVT::v4i64, Custom);
01246     setOperationAction(ISD::VSELECT,           MVT::v8i32, Custom);
01247     setOperationAction(ISD::VSELECT,           MVT::v8f32, Custom);
01248 
01249     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01250     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01251     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01252     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01253     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01254     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01255     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01256     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01257     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01258     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01259     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01260     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01261 
01262     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
01263       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01264       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01265       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01266       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01267       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01268       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01269     }
01270 
01271     if (Subtarget->hasInt256()) {
01272       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01273       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01274       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01275       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01276 
01277       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01278       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01279       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01280       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01281 
01282       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01283       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01284       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01285       // Don't lower v32i8 because there is no 128-bit byte mul
01286 
01287       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01288       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01289       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01290       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01291 
01292       setOperationAction(ISD::VSELECT,         MVT::v16i16, Custom);
01293       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01294     } else {
01295       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01296       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01297       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01298       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01299 
01300       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01301       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01302       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01303       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01304 
01305       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01306       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01307       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01308       // Don't lower v32i8 because there is no 128-bit byte mul
01309     }
01310 
01311     // In the customized shift lowering, the legal cases in AVX2 will be
01312     // recognized.
01313     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01314     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01315 
01316     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01317     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01318 
01319     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01320 
01321     // Custom lower several nodes for 256-bit types.
01322     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01323              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01324       MVT VT = (MVT::SimpleValueType)i;
01325 
01326       // Extract subvector is special because the value type
01327       // (result) is 128-bit but the source is 256-bit wide.
01328       if (VT.is128BitVector())
01329         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01330 
01331       // Do not attempt to custom lower other non-256-bit vectors
01332       if (!VT.is256BitVector())
01333         continue;
01334 
01335       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01336       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01337       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01338       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01339       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01340       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01341       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01342     }
01343 
01344     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01345     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
01346       MVT VT = (MVT::SimpleValueType)i;
01347 
01348       // Do not attempt to promote non-256-bit vectors
01349       if (!VT.is256BitVector())
01350         continue;
01351 
01352       setOperationAction(ISD::AND,    VT, Promote);
01353       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01354       setOperationAction(ISD::OR,     VT, Promote);
01355       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01356       setOperationAction(ISD::XOR,    VT, Promote);
01357       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01358       setOperationAction(ISD::LOAD,   VT, Promote);
01359       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01360       setOperationAction(ISD::SELECT, VT, Promote);
01361       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01362     }
01363   }
01364 
01365   if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
01366     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01367     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01368     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01369     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01370 
01371     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01372     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01373     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01374 
01375     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01376     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01377     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01378     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01379     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01380     setLoadExtAction(ISD::EXTLOAD,              MVT::v8f32, Legal);
01381     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01382     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01383     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01384     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01385     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01386 
01387     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01388     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01389     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01390     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01391     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01392     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01393 
01394     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01395     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01396     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01397     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01398     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01399     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01400     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01401     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01402 
01403     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
01404     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
01405     setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
01406     setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
01407     if (Subtarget->is64Bit()) {
01408       setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
01409       setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
01410       setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
01411       setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
01412     }
01413     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01414     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01415     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01416     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01417     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01418     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01419     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01420     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01421     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01422     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01423 
01424     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01425     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01426     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01427     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01428     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01429     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01430     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01431     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01432     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01433     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01434     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01435     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01436     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01437 
01438     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01439     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01440     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01441     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01442     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,    Custom);
01443     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
01444 
01445     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01446     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01447 
01448     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01449 
01450     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01451     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01452     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01453     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01454     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01455     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01456     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01457     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01458     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01459 
01460     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01461     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01462 
01463     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01464     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01465 
01466     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01467 
01468     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01469     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01470 
01471     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01472     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01473 
01474     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01475     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01476 
01477     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01478     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01479     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01480     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01481     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01482     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01483 
01484     if (Subtarget->hasCDI()) {
01485       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
01486       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01487     }
01488 
01489     // Custom lower several nodes.
01490     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01491              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01492       MVT VT = (MVT::SimpleValueType)i;
01493 
01494       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01495       // Extract subvector is special because the value type
01496       // (result) is 256/128-bit but the source is 512-bit wide.
01497       if (VT.is128BitVector() || VT.is256BitVector())
01498         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01499 
01500       if (VT.getVectorElementType() == MVT::i1)
01501         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01502 
01503       // Do not attempt to custom lower other non-512-bit vectors
01504       if (!VT.is512BitVector())
01505         continue;
01506 
01507       if ( EltSize >= 32) {
01508         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01509         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01510         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01511         setOperationAction(ISD::VSELECT,             VT, Legal);
01512         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01513         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01514         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01515       }
01516     }
01517     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01518       MVT VT = (MVT::SimpleValueType)i;
01519 
01520       // Do not attempt to promote non-256-bit vectors
01521       if (!VT.is512BitVector())
01522         continue;
01523 
01524       setOperationAction(ISD::SELECT, VT, Promote);
01525       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01526     }
01527   }// has  AVX-512
01528 
01529   if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
01530     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01531     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01532   }
01533 
01534   // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
01535   // of this type with custom code.
01536   for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
01537            VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
01538     setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
01539                        Custom);
01540   }
01541 
01542   // We want to custom lower some of our intrinsics.
01543   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01544   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01545   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01546   if (!Subtarget->is64Bit())
01547     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01548 
01549   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01550   // handle type legalization for these operations here.
01551   //
01552   // FIXME: We really should do custom legalization for addition and
01553   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01554   // than generic legalization for 64-bit multiplication-with-overflow, though.
01555   for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
01556     // Add/Sub/Mul with overflow operations are custom lowered.
01557     MVT VT = IntVTs[i];
01558     setOperationAction(ISD::SADDO, VT, Custom);
01559     setOperationAction(ISD::UADDO, VT, Custom);
01560     setOperationAction(ISD::SSUBO, VT, Custom);
01561     setOperationAction(ISD::USUBO, VT, Custom);
01562     setOperationAction(ISD::SMULO, VT, Custom);
01563     setOperationAction(ISD::UMULO, VT, Custom);
01564   }
01565 
01566   // There are no 8-bit 3-address imul/mul instructions
01567   setOperationAction(ISD::SMULO, MVT::i8, Expand);
01568   setOperationAction(ISD::UMULO, MVT::i8, Expand);
01569 
01570   if (!Subtarget->is64Bit()) {
01571     // These libcalls are not available in 32-bit.
01572     setLibcallName(RTLIB::SHL_I128, nullptr);
01573     setLibcallName(RTLIB::SRL_I128, nullptr);
01574     setLibcallName(RTLIB::SRA_I128, nullptr);
01575   }
01576 
01577   // Combine sin / cos into one node or libcall if possible.
01578   if (Subtarget->hasSinCos()) {
01579     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01580     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01581     if (Subtarget->isTargetDarwin()) {
01582       // For MacOSX, we don't want to the normal expansion of a libcall to
01583       // sincos. We want to issue a libcall to __sincos_stret to avoid memory
01584       // traffic.
01585       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01586       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01587     }
01588   }
01589 
01590   if (Subtarget->isTargetWin64()) {
01591     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01592     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01593     setOperationAction(ISD::SREM, MVT::i128, Custom);
01594     setOperationAction(ISD::UREM, MVT::i128, Custom);
01595     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01596     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01597   }
01598 
01599   // We have target-specific dag combine patterns for the following nodes:
01600   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01601   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01602   setTargetDAGCombine(ISD::VSELECT);
01603   setTargetDAGCombine(ISD::SELECT);
01604   setTargetDAGCombine(ISD::SHL);
01605   setTargetDAGCombine(ISD::SRA);
01606   setTargetDAGCombine(ISD::SRL);
01607   setTargetDAGCombine(ISD::OR);
01608   setTargetDAGCombine(ISD::AND);
01609   setTargetDAGCombine(ISD::ADD);
01610   setTargetDAGCombine(ISD::FADD);
01611   setTargetDAGCombine(ISD::FSUB);
01612   setTargetDAGCombine(ISD::FMA);
01613   setTargetDAGCombine(ISD::SUB);
01614   setTargetDAGCombine(ISD::LOAD);
01615   setTargetDAGCombine(ISD::STORE);
01616   setTargetDAGCombine(ISD::ZERO_EXTEND);
01617   setTargetDAGCombine(ISD::ANY_EXTEND);
01618   setTargetDAGCombine(ISD::SIGN_EXTEND);
01619   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01620   setTargetDAGCombine(ISD::TRUNCATE);
01621   setTargetDAGCombine(ISD::SINT_TO_FP);
01622   setTargetDAGCombine(ISD::SETCC);
01623   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
01624   setTargetDAGCombine(ISD::BUILD_VECTOR);
01625   if (Subtarget->is64Bit())
01626     setTargetDAGCombine(ISD::MUL);
01627   setTargetDAGCombine(ISD::XOR);
01628 
01629   computeRegisterProperties();
01630 
01631   // On Darwin, -Os means optimize for size without hurting performance,
01632   // do not reduce the limit.
01633   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01634   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
01635   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01636   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01637   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01638   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01639   setPrefLoopAlignment(4); // 2^4 bytes.
01640 
01641   // Predictable cmov don't hurt on atom because it's in-order.
01642   PredictableSelectIsExpensive = !Subtarget->isAtom();
01643 
01644   setPrefFunctionAlignment(4); // 2^4 bytes.
01645 }
01646 
01647 // This has so far only been implemented for 64-bit MachO.
01648 bool X86TargetLowering::useLoadStackGuardNode() const {
01649   return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO &&
01650          Subtarget->is64Bit();
01651 }
01652 
01653 TargetLoweringBase::LegalizeTypeAction
01654 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01655   if (ExperimentalVectorWideningLegalization &&
01656       VT.getVectorNumElements() != 1 &&
01657       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01658     return TypeWidenVector;
01659 
01660   return TargetLoweringBase::getPreferredVectorAction(VT);
01661 }
01662 
01663 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01664   if (!VT.isVector())
01665     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01666 
01667   if (Subtarget->hasAVX512())
01668     switch(VT.getVectorNumElements()) {
01669     case  8: return MVT::v8i1;
01670     case 16: return MVT::v16i1;
01671   }
01672 
01673   return VT.changeVectorElementTypeToInteger();
01674 }
01675 
01676 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
01677 /// the desired ByVal argument alignment.
01678 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01679   if (MaxAlign == 16)
01680     return;
01681   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01682     if (VTy->getBitWidth() == 128)
01683       MaxAlign = 16;
01684   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01685     unsigned EltAlign = 0;
01686     getMaxByValAlign(ATy->getElementType(), EltAlign);
01687     if (EltAlign > MaxAlign)
01688       MaxAlign = EltAlign;
01689   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01690     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
01691       unsigned EltAlign = 0;
01692       getMaxByValAlign(STy->getElementType(i), EltAlign);
01693       if (EltAlign > MaxAlign)
01694         MaxAlign = EltAlign;
01695       if (MaxAlign == 16)
01696         break;
01697     }
01698   }
01699 }
01700 
01701 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
01702 /// function arguments in the caller parameter area. For X86, aggregates
01703 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01704 /// are at 4-byte boundaries.
01705 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
01706   if (Subtarget->is64Bit()) {
01707     // Max of 8 and alignment of type.
01708     unsigned TyAlign = TD->getABITypeAlignment(Ty);
01709     if (TyAlign > 8)
01710       return TyAlign;
01711     return 8;
01712   }
01713 
01714   unsigned Align = 4;
01715   if (Subtarget->hasSSE1())
01716     getMaxByValAlign(Ty, Align);
01717   return Align;
01718 }
01719 
01720 /// getOptimalMemOpType - Returns the target specific optimal type for load
01721 /// and store operations as a result of memset, memcpy, and memmove
01722 /// lowering. If DstAlign is zero that means it's safe to destination
01723 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01724 /// means there isn't a need to check it against alignment requirement,
01725 /// probably because the source does not need to be loaded. If 'IsMemset' is
01726 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01727 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01728 /// source is constant so it does not need to be loaded.
01729 /// It returns EVT::Other if the type should be determined using generic
01730 /// target-independent logic.
01731 EVT
01732 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01733                                        unsigned DstAlign, unsigned SrcAlign,
01734                                        bool IsMemset, bool ZeroMemset,
01735                                        bool MemcpyStrSrc,
01736                                        MachineFunction &MF) const {
01737   const Function *F = MF.getFunction();
01738   if ((!IsMemset || ZeroMemset) &&
01739       !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
01740                                        Attribute::NoImplicitFloat)) {
01741     if (Size >= 16 &&
01742         (Subtarget->isUnalignedMemAccessFast() ||
01743          ((DstAlign == 0 || DstAlign >= 16) &&
01744           (SrcAlign == 0 || SrcAlign >= 16)))) {
01745       if (Size >= 32) {
01746         if (Subtarget->hasInt256())
01747           return MVT::v8i32;
01748         if (Subtarget->hasFp256())
01749           return MVT::v8f32;
01750       }
01751       if (Subtarget->hasSSE2())
01752         return MVT::v4i32;
01753       if (Subtarget->hasSSE1())
01754         return MVT::v4f32;
01755     } else if (!MemcpyStrSrc && Size >= 8 &&
01756                !Subtarget->is64Bit() &&
01757                Subtarget->hasSSE2()) {
01758       // Do not use f64 to lower memcpy if source is string constant. It's
01759       // better to use i32 to avoid the loads.
01760       return MVT::f64;
01761     }
01762   }
01763   if (Subtarget->is64Bit() && Size >= 8)
01764     return MVT::i64;
01765   return MVT::i32;
01766 }
01767 
01768 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
01769   if (VT == MVT::f32)
01770     return X86ScalarSSEf32;
01771   else if (VT == MVT::f64)
01772     return X86ScalarSSEf64;
01773   return true;
01774 }
01775 
01776 bool
01777 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
01778                                                   unsigned,
01779                                                   unsigned,
01780                                                   bool *Fast) const {
01781   if (Fast)
01782     *Fast = Subtarget->isUnalignedMemAccessFast();
01783   return true;
01784 }
01785 
01786 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
01787 /// current function.  The returned value is a member of the
01788 /// MachineJumpTableInfo::JTEntryKind enum.
01789 unsigned X86TargetLowering::getJumpTableEncoding() const {
01790   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
01791   // symbol.
01792   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
01793       Subtarget->isPICStyleGOT())
01794     return MachineJumpTableInfo::EK_Custom32;
01795 
01796   // Otherwise, use the normal jump table encoding heuristics.
01797   return TargetLowering::getJumpTableEncoding();
01798 }
01799 
01800 const MCExpr *
01801 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
01802                                              const MachineBasicBlock *MBB,
01803                                              unsigned uid,MCContext &Ctx) const{
01804   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
01805          Subtarget->isPICStyleGOT());
01806   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
01807   // entries.
01808   return MCSymbolRefExpr::Create(MBB->getSymbol(),
01809                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
01810 }
01811 
01812 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
01813 /// jumptable.
01814 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
01815                                                     SelectionDAG &DAG) const {
01816   if (!Subtarget->is64Bit())
01817     // This doesn't have SDLoc associated with it, but is not really the
01818     // same as a Register.
01819     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
01820   return Table;
01821 }
01822 
01823 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
01824 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
01825 /// MCExpr.
01826 const MCExpr *X86TargetLowering::
01827 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
01828                              MCContext &Ctx) const {
01829   // X86-64 uses RIP relative addressing based on the jump table label.
01830   if (Subtarget->isPICStyleRIPRel())
01831     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
01832 
01833   // Otherwise, the reference is relative to the PIC base.
01834   return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
01835 }
01836 
01837 // FIXME: Why this routine is here? Move to RegInfo!
01838 std::pair<const TargetRegisterClass*, uint8_t>
01839 X86TargetLowering::findRepresentativeClass(MVT VT) const{
01840   const TargetRegisterClass *RRC = nullptr;
01841   uint8_t Cost = 1;
01842   switch (VT.SimpleTy) {
01843   default:
01844     return TargetLowering::findRepresentativeClass(VT);
01845   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
01846     RRC = Subtarget->is64Bit() ?
01847       (const TargetRegisterClass*)&X86::GR64RegClass :
01848       (const TargetRegisterClass*)&X86::GR32RegClass;
01849     break;
01850   case MVT::x86mmx:
01851     RRC = &X86::VR64RegClass;
01852     break;
01853   case MVT::f32: case MVT::f64:
01854   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01855   case MVT::v4f32: case MVT::v2f64:
01856   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
01857   case MVT::v4f64:
01858     RRC = &X86::VR128RegClass;
01859     break;
01860   }
01861   return std::make_pair(RRC, Cost);
01862 }
01863 
01864 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
01865                                                unsigned &Offset) const {
01866   if (!Subtarget->isTargetLinux())
01867     return false;
01868 
01869   if (Subtarget->is64Bit()) {
01870     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
01871     Offset = 0x28;
01872     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
01873       AddressSpace = 256;
01874     else
01875       AddressSpace = 257;
01876   } else {
01877     // %gs:0x14 on i386
01878     Offset = 0x14;
01879     AddressSpace = 256;
01880   }
01881   return true;
01882 }
01883 
01884 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
01885                                             unsigned DestAS) const {
01886   assert(SrcAS != DestAS && "Expected different address spaces!");
01887 
01888   return SrcAS < 256 && DestAS < 256;
01889 }
01890 
01891 //===----------------------------------------------------------------------===//
01892 //               Return Value Calling Convention Implementation
01893 //===----------------------------------------------------------------------===//
01894 
01895 #include "X86GenCallingConv.inc"
01896 
01897 bool
01898 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
01899                                   MachineFunction &MF, bool isVarArg,
01900                         const SmallVectorImpl<ISD::OutputArg> &Outs,
01901                         LLVMContext &Context) const {
01902   SmallVector<CCValAssign, 16> RVLocs;
01903   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
01904   return CCInfo.CheckReturn(Outs, RetCC_X86);
01905 }
01906 
01907 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
01908   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
01909   return ScratchRegs;
01910 }
01911 
01912 SDValue
01913 X86TargetLowering::LowerReturn(SDValue Chain,
01914                                CallingConv::ID CallConv, bool isVarArg,
01915                                const SmallVectorImpl<ISD::OutputArg> &Outs,
01916                                const SmallVectorImpl<SDValue> &OutVals,
01917                                SDLoc dl, SelectionDAG &DAG) const {
01918   MachineFunction &MF = DAG.getMachineFunction();
01919   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
01920 
01921   SmallVector<CCValAssign, 16> RVLocs;
01922   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
01923   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
01924 
01925   SDValue Flag;
01926   SmallVector<SDValue, 6> RetOps;
01927   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
01928   // Operand #1 = Bytes To Pop
01929   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
01930                    MVT::i16));
01931 
01932   // Copy the result values into the output registers.
01933   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01934     CCValAssign &VA = RVLocs[i];
01935     assert(VA.isRegLoc() && "Can only return in registers!");
01936     SDValue ValToCopy = OutVals[i];
01937     EVT ValVT = ValToCopy.getValueType();
01938 
01939     // Promote values to the appropriate types
01940     if (VA.getLocInfo() == CCValAssign::SExt)
01941       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
01942     else if (VA.getLocInfo() == CCValAssign::ZExt)
01943       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
01944     else if (VA.getLocInfo() == CCValAssign::AExt)
01945       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
01946     else if (VA.getLocInfo() == CCValAssign::BCvt)
01947       ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
01948 
01949     assert(VA.getLocInfo() != CCValAssign::FPExt &&
01950            "Unexpected FP-extend for return value.");  
01951 
01952     // If this is x86-64, and we disabled SSE, we can't return FP values,
01953     // or SSE or MMX vectors.
01954     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
01955          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
01956           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
01957       report_fatal_error("SSE register return with SSE disabled");
01958     }
01959     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
01960     // llvm-gcc has never done it right and no one has noticed, so this
01961     // should be OK for now.
01962     if (ValVT == MVT::f64 &&
01963         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
01964       report_fatal_error("SSE2 register return with SSE2 disabled");
01965 
01966     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
01967     // the RET instruction and handled by the FP Stackifier.
01968     if (VA.getLocReg() == X86::FP0 ||
01969         VA.getLocReg() == X86::FP1) {
01970       // If this is a copy from an xmm register to ST(0), use an FPExtend to
01971       // change the value to the FP stack register class.
01972       if (isScalarFPTypeInSSEReg(VA.getValVT()))
01973         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
01974       RetOps.push_back(ValToCopy);
01975       // Don't emit a copytoreg.
01976       continue;
01977     }
01978 
01979     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
01980     // which is returned in RAX / RDX.
01981     if (Subtarget->is64Bit()) {
01982       if (ValVT == MVT::x86mmx) {
01983         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
01984           ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
01985           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
01986                                   ValToCopy);
01987           // If we don't have SSE2 available, convert to v4f32 so the generated
01988           // register is legal.
01989           if (!Subtarget->hasSSE2())
01990             ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
01991         }
01992       }
01993     }
01994 
01995     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
01996     Flag = Chain.getValue(1);
01997     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
01998   }
01999 
02000   // The x86-64 ABIs require that for returning structs by value we copy
02001   // the sret argument into %rax/%eax (depending on ABI) for the return.
02002   // Win32 requires us to put the sret argument to %eax as well.
02003   // We saved the argument into a virtual register in the entry block,
02004   // so now we copy the value out and into %rax/%eax.
02005   if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
02006       (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
02007     MachineFunction &MF = DAG.getMachineFunction();
02008     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02009     unsigned Reg = FuncInfo->getSRetReturnReg();
02010     assert(Reg &&
02011            "SRetReturnReg should have been set in LowerFormalArguments().");
02012     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
02013 
02014     unsigned RetValReg
02015         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02016           X86::RAX : X86::EAX;
02017     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02018     Flag = Chain.getValue(1);
02019 
02020     // RAX/EAX now acts like a return value.
02021     RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
02022   }
02023 
02024   RetOps[0] = Chain;  // Update chain.
02025 
02026   // Add the flag if we have it.
02027   if (Flag.getNode())
02028     RetOps.push_back(Flag);
02029 
02030   return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
02031 }
02032 
02033 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02034   if (N->getNumValues() != 1)
02035     return false;
02036   if (!N->hasNUsesOfValue(1, 0))
02037     return false;
02038 
02039   SDValue TCChain = Chain;
02040   SDNode *Copy = *N->use_begin();
02041   if (Copy->getOpcode() == ISD::CopyToReg) {
02042     // If the copy has a glue operand, we conservatively assume it isn't safe to
02043     // perform a tail call.
02044     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02045       return false;
02046     TCChain = Copy->getOperand(0);
02047   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02048     return false;
02049 
02050   bool HasRet = false;
02051   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02052        UI != UE; ++UI) {
02053     if (UI->getOpcode() != X86ISD::RET_FLAG)
02054       return false;
02055     HasRet = true;
02056   }
02057 
02058   if (!HasRet)
02059     return false;
02060 
02061   Chain = TCChain;
02062   return true;
02063 }
02064 
02065 EVT
02066 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02067                                             ISD::NodeType ExtendKind) const {
02068   MVT ReturnMVT;
02069   // TODO: Is this also valid on 32-bit?
02070   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02071     ReturnMVT = MVT::i8;
02072   else
02073     ReturnMVT = MVT::i32;
02074 
02075   EVT MinVT = getRegisterType(Context, ReturnMVT);
02076   return VT.bitsLT(MinVT) ? MinVT : VT;
02077 }
02078 
02079 /// LowerCallResult - Lower the result values of a call into the
02080 /// appropriate copies out of appropriate physical registers.
02081 ///
02082 SDValue
02083 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02084                                    CallingConv::ID CallConv, bool isVarArg,
02085                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02086                                    SDLoc dl, SelectionDAG &DAG,
02087                                    SmallVectorImpl<SDValue> &InVals) const {
02088 
02089   // Assign locations to each value returned by this call.
02090   SmallVector<CCValAssign, 16> RVLocs;
02091   bool Is64Bit = Subtarget->is64Bit();
02092   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02093                  *DAG.getContext());
02094   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02095 
02096   // Copy all of the result registers out of their specified physreg.
02097   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02098     CCValAssign &VA = RVLocs[i];
02099     EVT CopyVT = VA.getValVT();
02100 
02101     // If this is x86-64, and we disabled SSE, we can't return FP values
02102     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02103         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02104       report_fatal_error("SSE register return with SSE disabled");
02105     }
02106 
02107     // If we prefer to use the value in xmm registers, copy it out as f80 and
02108     // use a truncate to move it from fp stack reg to xmm reg.
02109     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02110         isScalarFPTypeInSSEReg(VA.getValVT()))
02111       CopyVT = MVT::f80;
02112 
02113     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02114                                CopyVT, InFlag).getValue(1);
02115     SDValue Val = Chain.getValue(0);
02116 
02117     if (CopyVT != VA.getValVT())
02118       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02119                         // This truncation won't change the value.
02120                         DAG.getIntPtrConstant(1));
02121 
02122     InFlag = Chain.getValue(2);
02123     InVals.push_back(Val);
02124   }
02125 
02126   return Chain;
02127 }
02128 
02129 //===----------------------------------------------------------------------===//
02130 //                C & StdCall & Fast Calling Convention implementation
02131 //===----------------------------------------------------------------------===//
02132 //  StdCall calling convention seems to be standard for many Windows' API
02133 //  routines and around. It differs from C calling convention just a little:
02134 //  callee should clean up the stack, not caller. Symbols should be also
02135 //  decorated in some fancy way :) It doesn't support any vector arguments.
02136 //  For info on fast calling convention see Fast Calling Convention (tail call)
02137 //  implementation LowerX86_32FastCCCallTo.
02138 
02139 /// CallIsStructReturn - Determines whether a call uses struct return
02140 /// semantics.
02141 enum StructReturnType {
02142   NotStructReturn,
02143   RegStructReturn,
02144   StackStructReturn
02145 };
02146 static StructReturnType
02147 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
02148   if (Outs.empty())
02149     return NotStructReturn;
02150 
02151   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02152   if (!Flags.isSRet())
02153     return NotStructReturn;
02154   if (Flags.isInReg())
02155     return RegStructReturn;
02156   return StackStructReturn;
02157 }
02158 
02159 /// ArgsAreStructReturn - Determines whether a function uses struct
02160 /// return semantics.
02161 static StructReturnType
02162 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
02163   if (Ins.empty())
02164     return NotStructReturn;
02165 
02166   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02167   if (!Flags.isSRet())
02168     return NotStructReturn;
02169   if (Flags.isInReg())
02170     return RegStructReturn;
02171   return StackStructReturn;
02172 }
02173 
02174 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
02175 /// by "Src" to address "Dst" with size and alignment information specified by
02176 /// the specific parameter attribute. The copy will be passed as a byval
02177 /// function parameter.
02178 static SDValue
02179 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02180                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02181                           SDLoc dl) {
02182   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
02183 
02184   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02185                        /*isVolatile*/false, /*AlwaysInline=*/true,
02186                        MachinePointerInfo(), MachinePointerInfo());
02187 }
02188 
02189 /// IsTailCallConvention - Return true if the calling convention is one that
02190 /// supports tail call optimization.
02191 static bool IsTailCallConvention(CallingConv::ID CC) {
02192   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02193           CC == CallingConv::HiPE);
02194 }
02195 
02196 /// \brief Return true if the calling convention is a C calling convention.
02197 static bool IsCCallConvention(CallingConv::ID CC) {
02198   return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
02199           CC == CallingConv::X86_64_SysV);
02200 }
02201 
02202 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02203   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02204     return false;
02205 
02206   CallSite CS(CI);
02207   CallingConv::ID CalleeCC = CS.getCallingConv();
02208   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
02209     return false;
02210 
02211   return true;
02212 }
02213 
02214 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
02215 /// a tailcall target by changing its ABI.
02216 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
02217                                    bool GuaranteedTailCallOpt) {
02218   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
02219 }
02220 
02221 SDValue
02222 X86TargetLowering::LowerMemArgument(SDValue Chain,
02223                                     CallingConv::ID CallConv,
02224                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02225                                     SDLoc dl, SelectionDAG &DAG,
02226                                     const CCValAssign &VA,
02227                                     MachineFrameInfo *MFI,
02228                                     unsigned i) const {
02229   // Create the nodes corresponding to a load from this parameter slot.
02230   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02231   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
02232       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02233   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02234   EVT ValVT;
02235 
02236   // If value is passed by pointer we have address passed instead of the value
02237   // itself.
02238   if (VA.getLocInfo() == CCValAssign::Indirect)
02239     ValVT = VA.getLocVT();
02240   else
02241     ValVT = VA.getValVT();
02242 
02243   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02244   // changed with more analysis.
02245   // In case of tail call optimization mark all arguments mutable. Since they
02246   // could be overwritten by lowering of arguments in case of a tail call.
02247   if (Flags.isByVal()) {
02248     unsigned Bytes = Flags.getByValSize();
02249     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02250     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02251     return DAG.getFrameIndex(FI, getPointerTy());
02252   } else {
02253     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02254                                     VA.getLocMemOffset(), isImmutable);
02255     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02256     return DAG.getLoad(ValVT, dl, Chain, FIN,
02257                        MachinePointerInfo::getFixedStack(FI),
02258                        false, false, false, 0);
02259   }
02260 }
02261 
02262 SDValue
02263 X86TargetLowering::LowerFormalArguments(SDValue Chain,
02264                                         CallingConv::ID CallConv,
02265                                         bool isVarArg,
02266                                       const SmallVectorImpl<ISD::InputArg> &Ins,
02267                                         SDLoc dl,
02268                                         SelectionDAG &DAG,
02269                                         SmallVectorImpl<SDValue> &InVals)
02270                                           const {
02271   MachineFunction &MF = DAG.getMachineFunction();
02272   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02273 
02274   const Function* Fn = MF.getFunction();
02275   if (Fn->hasExternalLinkage() &&
02276       Subtarget->isTargetCygMing() &&
02277       Fn->getName() == "main")
02278     FuncInfo->setForceFramePointer(true);
02279 
02280   MachineFrameInfo *MFI = MF.getFrameInfo();
02281   bool Is64Bit = Subtarget->is64Bit();
02282   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02283 
02284   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02285          "Var args not supported with calling convention fastcc, ghc or hipe");
02286 
02287   // Assign locations to all of the incoming arguments.
02288   SmallVector<CCValAssign, 16> ArgLocs;
02289   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02290 
02291   // Allocate shadow area for Win64
02292   if (IsWin64)
02293     CCInfo.AllocateStack(32, 8);
02294 
02295   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02296 
02297   unsigned LastVal = ~0U;
02298   SDValue ArgValue;
02299   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02300     CCValAssign &VA = ArgLocs[i];
02301     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02302     // places.
02303     assert(VA.getValNo() != LastVal &&
02304            "Don't support value assigned to multiple locs yet");
02305     (void)LastVal;
02306     LastVal = VA.getValNo();
02307 
02308     if (VA.isRegLoc()) {
02309       EVT RegVT = VA.getLocVT();
02310       const TargetRegisterClass *RC;
02311       if (RegVT == MVT::i32)
02312         RC = &X86::GR32RegClass;
02313       else if (Is64Bit && RegVT == MVT::i64)
02314         RC = &X86::GR64RegClass;
02315       else if (RegVT == MVT::f32)
02316         RC = &X86::FR32RegClass;
02317       else if (RegVT == MVT::f64)
02318         RC = &X86::FR64RegClass;
02319       else if (RegVT.is512BitVector())
02320         RC = &X86::VR512RegClass;
02321       else if (RegVT.is256BitVector())
02322         RC = &X86::VR256RegClass;
02323       else if (RegVT.is128BitVector())
02324         RC = &X86::VR128RegClass;
02325       else if (RegVT == MVT::x86mmx)
02326         RC = &X86::VR64RegClass;
02327       else if (RegVT == MVT::i1)
02328         RC = &X86::VK1RegClass;
02329       else if (RegVT == MVT::v8i1)
02330         RC = &X86::VK8RegClass;
02331       else if (RegVT == MVT::v16i1)
02332         RC = &X86::VK16RegClass;
02333       else if (RegVT == MVT::v32i1)
02334         RC = &X86::VK32RegClass;
02335       else if (RegVT == MVT::v64i1)
02336         RC = &X86::VK64RegClass;
02337       else
02338         llvm_unreachable("Unknown argument type!");
02339 
02340       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02341       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02342 
02343       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02344       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02345       // right size.
02346       if (VA.getLocInfo() == CCValAssign::SExt)
02347         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02348                                DAG.getValueType(VA.getValVT()));
02349       else if (VA.getLocInfo() == CCValAssign::ZExt)
02350         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02351                                DAG.getValueType(VA.getValVT()));
02352       else if (VA.getLocInfo() == CCValAssign::BCvt)
02353         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
02354 
02355       if (VA.isExtInLoc()) {
02356         // Handle MMX values passed in XMM regs.
02357         if (RegVT.isVector())
02358           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02359         else
02360           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02361       }
02362     } else {
02363       assert(VA.isMemLoc());
02364       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02365     }
02366 
02367     // If value is passed via pointer - do a load.
02368     if (VA.getLocInfo() == CCValAssign::Indirect)
02369       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02370                              MachinePointerInfo(), false, false, false, 0);
02371 
02372     InVals.push_back(ArgValue);
02373   }
02374 
02375   if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
02376     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02377       // The x86-64 ABIs require that for returning structs by value we copy
02378       // the sret argument into %rax/%eax (depending on ABI) for the return.
02379       // Win32 requires us to put the sret argument to %eax as well.
02380       // Save the argument into a virtual register so that we can access it
02381       // from the return points.
02382       if (Ins[i].Flags.isSRet()) {
02383         unsigned Reg = FuncInfo->getSRetReturnReg();
02384         if (!Reg) {
02385           MVT PtrTy = getPointerTy();
02386           Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02387           FuncInfo->setSRetReturnReg(Reg);
02388         }
02389         SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02390         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02391         break;
02392       }
02393     }
02394   }
02395 
02396   unsigned StackSize = CCInfo.getNextStackOffset();
02397   // Align stack specially for tail calls.
02398   if (FuncIsMadeTailCallSafe(CallConv,
02399                              MF.getTarget().Options.GuaranteedTailCallOpt))
02400     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02401 
02402   // If the function takes variable number of arguments, make a frame index for
02403   // the start of the first vararg value... for expansion of llvm.va_start.
02404   if (isVarArg) {
02405     if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02406                     CallConv != CallingConv::X86_ThisCall)) {
02407       FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
02408     }
02409     if (Is64Bit) {
02410       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
02411 
02412       // FIXME: We should really autogenerate these arrays
02413       static const MCPhysReg GPR64ArgRegsWin64[] = {
02414         X86::RCX, X86::RDX, X86::R8,  X86::R9
02415       };
02416       static const MCPhysReg GPR64ArgRegs64Bit[] = {
02417         X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02418       };
02419       static const MCPhysReg XMMArgRegs64Bit[] = {
02420         X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02421         X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02422       };
02423       const MCPhysReg *GPR64ArgRegs;
02424       unsigned NumXMMRegs = 0;
02425 
02426       if (IsWin64) {
02427         // The XMM registers which might contain var arg parameters are shadowed
02428         // in their paired GPR.  So we only need to save the GPR to their home
02429         // slots.
02430         TotalNumIntRegs = 4;
02431         GPR64ArgRegs = GPR64ArgRegsWin64;
02432       } else {
02433         TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
02434         GPR64ArgRegs = GPR64ArgRegs64Bit;
02435 
02436         NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
02437                                                 TotalNumXMMRegs);
02438       }
02439       unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
02440                                                        TotalNumIntRegs);
02441 
02442       bool NoImplicitFloatOps = Fn->getAttributes().
02443         hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
02444       assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02445              "SSE register cannot be used when SSE is disabled!");
02446       assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
02447                NoImplicitFloatOps) &&
02448              "SSE register cannot be used when SSE is disabled!");
02449       if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
02450           !Subtarget->hasSSE1())
02451         // Kernel mode asks for SSE to be disabled, so don't push them
02452         // on the stack.
02453         TotalNumXMMRegs = 0;
02454 
02455       if (IsWin64) {
02456         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
02457         // Get to the caller-allocated home save location.  Add 8 to account
02458         // for the return address.
02459         int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02460         FuncInfo->setRegSaveFrameIndex(
02461           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02462         // Fixup to set vararg frame on shadow area (4 x i64).
02463         if (NumIntRegs < 4)
02464           FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02465       } else {
02466         // For X86-64, if there are vararg parameters that are passed via
02467         // registers, then we must store them to their spots on the stack so
02468         // they may be loaded by deferencing the result of va_next.
02469         FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02470         FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
02471         FuncInfo->setRegSaveFrameIndex(
02472           MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
02473                                false));
02474       }
02475 
02476       // Store the integer parameter registers.
02477       SmallVector<SDValue, 8> MemOps;
02478       SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02479                                         getPointerTy());
02480       unsigned Offset = FuncInfo->getVarArgsGPOffset();
02481       for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
02482         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
02483                                   DAG.getIntPtrConstant(Offset));
02484         unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
02485                                      &X86::GR64RegClass);
02486         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
02487         SDValue Store =
02488           DAG.getStore(Val.getValue(1), dl, Val, FIN,
02489                        MachinePointerInfo::getFixedStack(
02490                          FuncInfo->getRegSaveFrameIndex(), Offset),
02491                        false, false, 0);
02492         MemOps.push_back(Store);
02493         Offset += 8;
02494       }
02495 
02496       if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
02497         // Now store the XMM (fp + vector) parameter registers.
02498         SmallVector<SDValue, 12> SaveXMMOps;
02499         SaveXMMOps.push_back(Chain);
02500 
02501         unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02502         SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
02503         SaveXMMOps.push_back(ALVal);
02504 
02505         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02506                                FuncInfo->getRegSaveFrameIndex()));
02507         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02508                                FuncInfo->getVarArgsFPOffset()));
02509 
02510         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
02511           unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
02512                                        &X86::VR128RegClass);
02513           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
02514           SaveXMMOps.push_back(Val);
02515         }
02516         MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02517                                      MVT::Other, SaveXMMOps));
02518       }
02519 
02520       if (!MemOps.empty())
02521         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02522     }
02523   }
02524 
02525   // Some CCs need callee pop.
02526   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02527                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02528     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02529   } else {
02530     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02531     // If this is an sret function, the return should pop the hidden pointer.
02532     if (!Is64Bit && !IsTailCallConvention(CallConv) &&
02533         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02534         argsAreStructReturn(Ins) == StackStructReturn)
02535       FuncInfo->setBytesToPopOnReturn(4);
02536   }
02537 
02538   if (!Is64Bit) {
02539     // RegSaveFrameIndex is X86-64 only.
02540     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02541     if (CallConv == CallingConv::X86_FastCall ||
02542         CallConv == CallingConv::X86_ThisCall)
02543       // fastcc functions can't have varargs.
02544       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02545   }
02546 
02547   FuncInfo->setArgumentStackSize(StackSize);
02548 
02549   return Chain;
02550 }
02551 
02552 SDValue
02553 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
02554                                     SDValue StackPtr, SDValue Arg,
02555                                     SDLoc dl, SelectionDAG &DAG,
02556                                     const CCValAssign &VA,
02557                                     ISD::ArgFlagsTy Flags) const {
02558   unsigned LocMemOffset = VA.getLocMemOffset();
02559   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
02560   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
02561   if (Flags.isByVal())
02562     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
02563 
02564   return DAG.getStore(Chain, dl, Arg, PtrOff,
02565                       MachinePointerInfo::getStack(LocMemOffset),
02566                       false, false, 0);
02567 }
02568 
02569 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
02570 /// optimization is performed and it is required.
02571 SDValue
02572 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
02573                                            SDValue &OutRetAddr, SDValue Chain,
02574                                            bool IsTailCall, bool Is64Bit,
02575                                            int FPDiff, SDLoc dl) const {
02576   // Adjust the Return address stack slot.
02577   EVT VT = getPointerTy();
02578   OutRetAddr = getReturnAddressFrameIndex(DAG);
02579 
02580   // Load the "old" Return address.
02581   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
02582                            false, false, false, 0);
02583   return SDValue(OutRetAddr.getNode(), 1);
02584 }
02585 
02586 /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
02587 /// optimization is performed and it is required (FPDiff!=0).
02588 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
02589                                         SDValue Chain, SDValue RetAddrFrIdx,
02590                                         EVT PtrVT, unsigned SlotSize,
02591                                         int FPDiff, SDLoc dl) {
02592   // Store the return address to the appropriate stack slot.
02593   if (!FPDiff) return Chain;
02594   // Calculate the new stack slot for the return address.
02595   int NewReturnAddrFI =
02596     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
02597                                          false);
02598   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
02599   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
02600                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
02601                        false, false, 0);
02602   return Chain;
02603 }
02604 
02605 SDValue
02606 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
02607                              SmallVectorImpl<SDValue> &InVals) const {
02608   SelectionDAG &DAG                     = CLI.DAG;
02609   SDLoc &dl                             = CLI.DL;
02610   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
02611   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
02612   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
02613   SDValue Chain                         = CLI.Chain;
02614   SDValue Callee                        = CLI.Callee;
02615   CallingConv::ID CallConv              = CLI.CallConv;
02616   bool &isTailCall                      = CLI.IsTailCall;
02617   bool isVarArg                         = CLI.IsVarArg;
02618 
02619   MachineFunction &MF = DAG.getMachineFunction();
02620   bool Is64Bit        = Subtarget->is64Bit();
02621   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
02622   StructReturnType SR = callIsStructReturn(Outs);
02623   bool IsSibcall      = false;
02624 
02625   if (MF.getTarget().Options.DisableTailCalls)
02626     isTailCall = false;
02627 
02628   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
02629   if (IsMustTail) {
02630     // Force this to be a tail call.  The verifier rules are enough to ensure
02631     // that we can lower this successfully without moving the return address
02632     // around.
02633     isTailCall = true;
02634   } else if (isTailCall) {
02635     // Check if it's really possible to do a tail call.
02636     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
02637                     isVarArg, SR != NotStructReturn,
02638                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
02639                     Outs, OutVals, Ins, DAG);
02640 
02641     // Sibcalls are automatically detected tailcalls which do not require
02642     // ABI changes.
02643     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
02644       IsSibcall = true;
02645 
02646     if (isTailCall)
02647       ++NumTailCalls;
02648   }
02649 
02650   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02651          "Var args not supported with calling convention fastcc, ghc or hipe");
02652 
02653   // Analyze operands of the call, assigning locations to each operand.
02654   SmallVector<CCValAssign, 16> ArgLocs;
02655   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02656 
02657   // Allocate shadow area for Win64
02658   if (IsWin64)
02659     CCInfo.AllocateStack(32, 8);
02660 
02661   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
02662 
02663   // Get a count of how many bytes are to be pushed on the stack.
02664   unsigned NumBytes = CCInfo.getNextStackOffset();
02665   if (IsSibcall)
02666     // This is a sibcall. The memory operands are available in caller's
02667     // own caller's stack.
02668     NumBytes = 0;
02669   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
02670            IsTailCallConvention(CallConv))
02671     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
02672 
02673   int FPDiff = 0;
02674   if (isTailCall && !IsSibcall && !IsMustTail) {
02675     // Lower arguments at fp - stackoffset + fpdiff.
02676     X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
02677     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
02678 
02679     FPDiff = NumBytesCallerPushed - NumBytes;
02680 
02681     // Set the delta of movement of the returnaddr stackslot.
02682     // But only set if delta is greater than previous delta.
02683     if (FPDiff < X86Info->getTCReturnAddrDelta())
02684       X86Info->setTCReturnAddrDelta(FPDiff);
02685   }
02686 
02687   unsigned NumBytesToPush = NumBytes;
02688   unsigned NumBytesToPop = NumBytes;
02689 
02690   // If we have an inalloca argument, all stack space has already been allocated
02691   // for us and be right at the top of the stack.  We don't support multiple
02692   // arguments passed in memory when using inalloca.
02693   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
02694     NumBytesToPush = 0;
02695     if (!ArgLocs.back().isMemLoc())
02696       report_fatal_error("cannot use inalloca attribute on a register "
02697                          "parameter");
02698     if (ArgLocs.back().getLocMemOffset() != 0)
02699       report_fatal_error("any parameter with the inalloca attribute must be "
02700                          "the only memory argument");
02701   }
02702 
02703   if (!IsSibcall)
02704     Chain = DAG.getCALLSEQ_START(
02705         Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
02706 
02707   SDValue RetAddrFrIdx;
02708   // Load return address for tail calls.
02709   if (isTailCall && FPDiff)
02710     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
02711                                     Is64Bit, FPDiff, dl);
02712 
02713   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
02714   SmallVector<SDValue, 8> MemOpChains;
02715   SDValue StackPtr;
02716 
02717   // Walk the register/memloc assignments, inserting copies/loads.  In the case
02718   // of tail call optimization arguments are handle later.
02719   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
02720       DAG.getSubtarget().getRegisterInfo());
02721   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02722     // Skip inalloca arguments, they have already been written.
02723     ISD::ArgFlagsTy Flags = Outs[i].Flags;
02724     if (Flags.isInAlloca())
02725       continue;
02726 
02727     CCValAssign &VA = ArgLocs[i];
02728     EVT RegVT = VA.getLocVT();
02729     SDValue Arg = OutVals[i];
02730     bool isByVal = Flags.isByVal();
02731 
02732     // Promote the value if needed.
02733     switch (VA.getLocInfo()) {
02734     default: llvm_unreachable("Unknown loc info!");
02735     case CCValAssign::Full: break;
02736     case CCValAssign::SExt:
02737       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02738       break;
02739     case CCValAssign::ZExt:
02740       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
02741       break;
02742     case CCValAssign::AExt:
02743       if (RegVT.is128BitVector()) {
02744         // Special case: passing MMX values in XMM registers.
02745         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
02746         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
02747         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
02748       } else
02749         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
02750       break;
02751     case CCValAssign::BCvt:
02752       Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
02753       break;
02754     case CCValAssign::Indirect: {
02755       // Store the argument.
02756       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
02757       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
02758       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
02759                            MachinePointerInfo::getFixedStack(FI),
02760                            false, false, 0);
02761       Arg = SpillSlot;
02762       break;
02763     }
02764     }
02765 
02766     if (VA.isRegLoc()) {
02767       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
02768       if (isVarArg && IsWin64) {
02769         // Win64 ABI requires argument XMM reg to be copied to the corresponding
02770         // shadow reg if callee is a varargs function.
02771         unsigned ShadowReg = 0;
02772         switch (VA.getLocReg()) {
02773         case X86::XMM0: ShadowReg = X86::RCX; break;
02774         case X86::XMM1: ShadowReg = X86::RDX; break;
02775         case X86::XMM2: ShadowReg = X86::R8; break;
02776         case X86::XMM3: ShadowReg = X86::R9; break;
02777         }
02778         if (ShadowReg)
02779           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
02780       }
02781     } else if (!IsSibcall && (!isTailCall || isByVal)) {
02782       assert(VA.isMemLoc());
02783       if (!StackPtr.getNode())
02784         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
02785                                       getPointerTy());
02786       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
02787                                              dl, DAG, VA, Flags));
02788     }
02789   }
02790 
02791   if (!MemOpChains.empty())
02792     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
02793 
02794   if (Subtarget->isPICStyleGOT()) {
02795     // ELF / PIC requires GOT in the EBX register before function calls via PLT
02796     // GOT pointer.
02797     if (!isTailCall) {
02798       RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
02799                DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
02800     } else {
02801       // If we are tail calling and generating PIC/GOT style code load the
02802       // address of the callee into ECX. The value in ecx is used as target of
02803       // the tail jump. This is done to circumvent the ebx/callee-saved problem
02804       // for tail calls on PIC/GOT architectures. Normally we would just put the
02805       // address of GOT into ebx and then call target@PLT. But for tail calls
02806       // ebx would be restored (since ebx is callee saved) before jumping to the
02807       // target@PLT.
02808 
02809       // Note: The actual moving to ECX is done further down.
02810       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02811       if (G && !G->getGlobal()->hasHiddenVisibility() &&
02812           !G->getGlobal()->hasProtectedVisibility())
02813         Callee = LowerGlobalAddress(Callee, DAG);
02814       else if (isa<ExternalSymbolSDNode>(Callee))
02815         Callee = LowerExternalSymbol(Callee, DAG);
02816     }
02817   }
02818 
02819   if (Is64Bit && isVarArg && !IsWin64) {
02820     // From AMD64 ABI document:
02821     // For calls that may call functions that use varargs or stdargs
02822     // (prototype-less calls or calls to functions containing ellipsis (...) in
02823     // the declaration) %al is used as hidden argument to specify the number
02824     // of SSE registers used. The contents of %al do not need to match exactly
02825     // the number of registers, but must be an ubound on the number of SSE
02826     // registers used and is in the range 0 - 8 inclusive.
02827 
02828     // Count the number of XMM registers allocated.
02829     static const MCPhysReg XMMArgRegs[] = {
02830       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02831       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02832     };
02833     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
02834     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02835            && "SSE registers cannot be used when SSE is disabled");
02836 
02837     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
02838                                         DAG.getConstant(NumXMMRegs, MVT::i8)));
02839   }
02840 
02841   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
02842   // don't need this because the eligibility check rejects calls that require
02843   // shuffling arguments passed in memory.
02844   if (!IsSibcall && isTailCall) {
02845     // Force all the incoming stack arguments to be loaded from the stack
02846     // before any new outgoing arguments are stored to the stack, because the
02847     // outgoing stack slots may alias the incoming argument stack slots, and
02848     // the alias isn't otherwise explicit. This is slightly more conservative
02849     // than necessary, because it means that each store effectively depends
02850     // on every argument instead of just those arguments it would clobber.
02851     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
02852 
02853     SmallVector<SDValue, 8> MemOpChains2;
02854     SDValue FIN;
02855     int FI = 0;
02856     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02857       CCValAssign &VA = ArgLocs[i];
02858       if (VA.isRegLoc())
02859         continue;
02860       assert(VA.isMemLoc());
02861       SDValue Arg = OutVals[i];
02862       ISD::ArgFlagsTy Flags = Outs[i].Flags;
02863       // Skip inalloca arguments.  They don't require any work.
02864       if (Flags.isInAlloca())
02865         continue;
02866       // Create frame index.
02867       int32_t Offset = VA.getLocMemOffset()+FPDiff;
02868       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
02869       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
02870       FIN = DAG.getFrameIndex(FI, getPointerTy());
02871 
02872       if (Flags.isByVal()) {
02873         // Copy relative to framepointer.
02874         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
02875         if (!StackPtr.getNode())
02876           StackPtr = DAG.getCopyFromReg(Chain, dl,
02877                                         RegInfo->getStackRegister(),
02878                                         getPointerTy());
02879         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
02880 
02881         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
02882                                                          ArgChain,
02883                                                          Flags, DAG, dl));
02884       } else {
02885         // Store relative to framepointer.
02886         MemOpChains2.push_back(
02887           DAG.getStore(ArgChain, dl, Arg, FIN,
02888                        MachinePointerInfo::getFixedStack(FI),
02889                        false, false, 0));
02890       }
02891     }
02892 
02893     if (!MemOpChains2.empty())
02894       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
02895 
02896     // Store the return address to the appropriate stack slot.
02897     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
02898                                      getPointerTy(), RegInfo->getSlotSize(),
02899                                      FPDiff, dl);
02900   }
02901 
02902   // Build a sequence of copy-to-reg nodes chained together with token chain
02903   // and flag operands which copy the outgoing args into registers.
02904   SDValue InFlag;
02905   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
02906     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
02907                              RegsToPass[i].second, InFlag);
02908     InFlag = Chain.getValue(1);
02909   }
02910 
02911   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
02912     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
02913     // In the 64-bit large code model, we have to make all calls
02914     // through a register, since the call instruction's 32-bit
02915     // pc-relative offset may not be large enough to hold the whole
02916     // address.
02917   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
02918     // If the callee is a GlobalAddress node (quite common, every direct call
02919     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
02920     // it.
02921 
02922     // We should use extra load for direct calls to dllimported functions in
02923     // non-JIT mode.
02924     const GlobalValue *GV = G->getGlobal();
02925     if (!GV->hasDLLImportStorageClass()) {
02926       unsigned char OpFlags = 0;
02927       bool ExtraLoad = false;
02928       unsigned WrapperKind = ISD::DELETED_NODE;
02929 
02930       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
02931       // external symbols most go through the PLT in PIC mode.  If the symbol
02932       // has hidden or protected visibility, or if it is static or local, then
02933       // we don't need to use the PLT - we can directly call it.
02934       if (Subtarget->isTargetELF() &&
02935           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
02936           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
02937         OpFlags = X86II::MO_PLT;
02938       } else if (Subtarget->isPICStyleStubAny() &&
02939                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
02940                  (!Subtarget->getTargetTriple().isMacOSX() ||
02941                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
02942         // PC-relative references to external symbols should go through $stub,
02943         // unless we're building with the leopard linker or later, which
02944         // automatically synthesizes these stubs.
02945         OpFlags = X86II::MO_DARWIN_STUB;
02946       } else if (Subtarget->isPICStyleRIPRel() &&
02947                  isa<Function>(GV) &&
02948                  cast<Function>(GV)->getAttributes().
02949                    hasAttribute(AttributeSet::FunctionIndex,
02950                                 Attribute::NonLazyBind)) {
02951         // If the function is marked as non-lazy, generate an indirect call
02952         // which loads from the GOT directly. This avoids runtime overhead
02953         // at the cost of eager binding (and one extra byte of encoding).
02954         OpFlags = X86II::MO_GOTPCREL;
02955         WrapperKind = X86ISD::WrapperRIP;
02956         ExtraLoad = true;
02957       }
02958 
02959       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
02960                                           G->getOffset(), OpFlags);
02961 
02962       // Add a wrapper if needed.
02963       if (WrapperKind != ISD::DELETED_NODE)
02964         Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
02965       // Add extra indirection if needed.
02966       if (ExtraLoad)
02967         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
02968                              MachinePointerInfo::getGOT(),
02969                              false, false, false, 0);
02970     }
02971   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
02972     unsigned char OpFlags = 0;
02973 
02974     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
02975     // external symbols should go through the PLT.
02976     if (Subtarget->isTargetELF() &&
02977         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
02978       OpFlags = X86II::MO_PLT;
02979     } else if (Subtarget->isPICStyleStubAny() &&
02980                (!Subtarget->getTargetTriple().isMacOSX() ||
02981                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
02982       // PC-relative references to external symbols should go through $stub,
02983       // unless we're building with the leopard linker or later, which
02984       // automatically synthesizes these stubs.
02985       OpFlags = X86II::MO_DARWIN_STUB;
02986     }
02987 
02988     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
02989                                          OpFlags);
02990   }
02991 
02992   // Returns a chain & a flag for retval copy to use.
02993   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
02994   SmallVector<SDValue, 8> Ops;
02995 
02996   if (!IsSibcall && isTailCall) {
02997     Chain = DAG.getCALLSEQ_END(Chain,
02998                                DAG.getIntPtrConstant(NumBytesToPop, true),
02999                                DAG.getIntPtrConstant(0, true), InFlag, dl);
03000     InFlag = Chain.getValue(1);
03001   }
03002 
03003   Ops.push_back(Chain);
03004   Ops.push_back(Callee);
03005 
03006   if (isTailCall)
03007     Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
03008 
03009   // Add argument registers to the end of the list so that they are known live
03010   // into the call.
03011   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03012     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03013                                   RegsToPass[i].second.getValueType()));
03014 
03015   // Add a register mask operand representing the call-preserved registers.
03016   const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
03017   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
03018   assert(Mask && "Missing call preserved mask for calling convention");
03019   Ops.push_back(DAG.getRegisterMask(Mask));
03020 
03021   if (InFlag.getNode())
03022     Ops.push_back(InFlag);
03023 
03024   if (isTailCall) {
03025     // We used to do:
03026     //// If this is the first return lowered for this function, add the regs
03027     //// to the liveout set for the function.
03028     // This isn't right, although it's probably harmless on x86; liveouts
03029     // should be computed from returns not tail calls.  Consider a void
03030     // function making a tail call to a function returning int.
03031     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03032   }
03033 
03034   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03035   InFlag = Chain.getValue(1);
03036 
03037   // Create the CALLSEQ_END node.
03038   unsigned NumBytesForCalleeToPop;
03039   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03040                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03041     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03042   else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
03043            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03044            SR == StackStructReturn)
03045     // If this is a call to a struct-return function, the callee
03046     // pops the hidden struct pointer, so we have to push it back.
03047     // This is common for Darwin/X86, Linux & Mingw32 targets.
03048     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03049     NumBytesForCalleeToPop = 4;
03050   else
03051     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03052 
03053   // Returns a flag for retval copy to use.
03054   if (!IsSibcall) {
03055     Chain = DAG.getCALLSEQ_END(Chain,
03056                                DAG.getIntPtrConstant(NumBytesToPop, true),
03057                                DAG.getIntPtrConstant(NumBytesForCalleeToPop,
03058                                                      true),
03059                                InFlag, dl);
03060     InFlag = Chain.getValue(1);
03061   }
03062 
03063   // Handle result values, copying them out of physregs into vregs that we
03064   // return.
03065   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03066                          Ins, dl, DAG, InVals);
03067 }
03068 
03069 //===----------------------------------------------------------------------===//
03070 //                Fast Calling Convention (tail call) implementation
03071 //===----------------------------------------------------------------------===//
03072 
03073 //  Like std call, callee cleans arguments, convention except that ECX is
03074 //  reserved for storing the tail called function address. Only 2 registers are
03075 //  free for argument passing (inreg). Tail call optimization is performed
03076 //  provided:
03077 //                * tailcallopt is enabled
03078 //                * caller/callee are fastcc
03079 //  On X86_64 architecture with GOT-style position independent code only local
03080 //  (within module) calls are supported at the moment.
03081 //  To keep the stack aligned according to platform abi the function
03082 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03083 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03084 //  If a tail called function callee has more arguments than the caller the
03085 //  caller needs to make sure that there is room to move the RETADDR to. This is
03086 //  achieved by reserving an area the size of the argument delta right after the
03087 //  original RETADDR, but before the saved framepointer or the spilled registers
03088 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03089 //  stack layout:
03090 //    arg1
03091 //    arg2
03092 //    RETADDR
03093 //    [ new RETADDR
03094 //      move area ]
03095 //    (possible EBP)
03096 //    ESI
03097 //    EDI
03098 //    local1 ..
03099 
03100 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
03101 /// for a 16 byte align requirement.
03102 unsigned
03103 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03104                                                SelectionDAG& DAG) const {
03105   MachineFunction &MF = DAG.getMachineFunction();
03106   const TargetMachine &TM = MF.getTarget();
03107   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03108       TM.getSubtargetImpl()->getRegisterInfo());
03109   const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
03110   unsigned StackAlignment = TFI.getStackAlignment();
03111   uint64_t AlignMask = StackAlignment - 1;
03112   int64_t Offset = StackSize;
03113   unsigned SlotSize = RegInfo->getSlotSize();
03114   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03115     // Number smaller than 12 so just add the difference.
03116     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03117   } else {
03118     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03119     Offset = ((~AlignMask) & Offset) + StackAlignment +
03120       (StackAlignment-SlotSize);
03121   }
03122   return Offset;
03123 }
03124 
03125 /// MatchingStackOffset - Return true if the given stack call argument is
03126 /// already available in the same position (relatively) of the caller's
03127 /// incoming argument stack.
03128 static
03129 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03130                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03131                          const X86InstrInfo *TII) {
03132   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03133   int FI = INT_MAX;
03134   if (Arg.getOpcode() == ISD::CopyFromReg) {
03135     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03136     if (!TargetRegisterInfo::isVirtualRegister(VR))
03137       return false;
03138     MachineInstr *Def = MRI->getVRegDef(VR);
03139     if (!Def)
03140       return false;
03141     if (!Flags.isByVal()) {
03142       if (!TII->isLoadFromStackSlot(Def, FI))
03143         return false;
03144     } else {
03145       unsigned Opcode = Def->getOpcode();
03146       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
03147           Def->getOperand(1).isFI()) {
03148         FI = Def->getOperand(1).getIndex();
03149         Bytes = Flags.getByValSize();
03150       } else
03151         return false;
03152     }
03153   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03154     if (Flags.isByVal())
03155       // ByVal argument is passed in as a pointer but it's now being
03156       // dereferenced. e.g.
03157       // define @foo(%struct.X* %A) {
03158       //   tail call @bar(%struct.X* byval %A)
03159       // }
03160       return false;
03161     SDValue Ptr = Ld->getBasePtr();
03162     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03163     if (!FINode)
03164       return false;
03165     FI = FINode->getIndex();
03166   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03167     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03168     FI = FINode->getIndex();
03169     Bytes = Flags.getByValSize();
03170   } else
03171     return false;
03172 
03173   assert(FI != INT_MAX);
03174   if (!MFI->isFixedObjectIndex(FI))
03175     return false;
03176   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03177 }
03178 
03179 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03180 /// for tail call optimization. Targets which want to do tail call
03181 /// optimization should implement this function.
03182 bool
03183 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03184                                                      CallingConv::ID CalleeCC,
03185                                                      bool isVarArg,
03186                                                      bool isCalleeStructRet,
03187                                                      bool isCallerStructRet,
03188                                                      Type *RetTy,
03189                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03190                                     const SmallVectorImpl<SDValue> &OutVals,
03191                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03192                                                      SelectionDAG &DAG) const {
03193   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
03194     return false;
03195 
03196   // If -tailcallopt is specified, make fastcc functions tail-callable.
03197   const MachineFunction &MF = DAG.getMachineFunction();
03198   const Function *CallerF = MF.getFunction();
03199 
03200   // If the function return type is x86_fp80 and the callee return type is not,
03201   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03202   // perform a tailcall optimization here.
03203   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03204     return false;
03205 
03206   CallingConv::ID CallerCC = CallerF->getCallingConv();
03207   bool CCMatch = CallerCC == CalleeCC;
03208   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03209   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03210 
03211   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03212     if (IsTailCallConvention(CalleeCC) && CCMatch)
03213       return true;
03214     return false;
03215   }
03216 
03217   // Look for obvious safe cases to perform tail call optimization that do not
03218   // require ABI changes. This is what gcc calls sibcall.
03219 
03220   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03221   // emit a special epilogue.
03222   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03223       DAG.getSubtarget().getRegisterInfo());
03224   if (RegInfo->needsStackRealignment(MF))
03225     return false;
03226 
03227   // Also avoid sibcall optimization if either caller or callee uses struct
03228   // return semantics.
03229   if (isCalleeStructRet || isCallerStructRet)
03230     return false;
03231 
03232   // An stdcall/thiscall caller is expected to clean up its arguments; the
03233   // callee isn't going to do that.
03234   // FIXME: this is more restrictive than needed. We could produce a tailcall
03235   // when the stack adjustment matches. For example, with a thiscall that takes
03236   // only one argument.
03237   if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
03238                    CallerCC == CallingConv::X86_ThisCall))
03239     return false;
03240 
03241   // Do not sibcall optimize vararg calls unless all arguments are passed via
03242   // registers.
03243   if (isVarArg && !Outs.empty()) {
03244 
03245     // Optimizing for varargs on Win64 is unlikely to be safe without
03246     // additional testing.
03247     if (IsCalleeWin64 || IsCallerWin64)
03248       return false;
03249 
03250     SmallVector<CCValAssign, 16> ArgLocs;
03251     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03252                    *DAG.getContext());
03253 
03254     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03255     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03256       if (!ArgLocs[i].isRegLoc())
03257         return false;
03258   }
03259 
03260   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03261   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03262   // this into a sibcall.
03263   bool Unused = false;
03264   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03265     if (!Ins[i].Used) {
03266       Unused = true;
03267       break;
03268     }
03269   }
03270   if (Unused) {
03271     SmallVector<CCValAssign, 16> RVLocs;
03272     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03273                    *DAG.getContext());
03274     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03275     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03276       CCValAssign &VA = RVLocs[i];
03277       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03278         return false;
03279     }
03280   }
03281 
03282   // If the calling conventions do not match, then we'd better make sure the
03283   // results are returned in the same way as what the caller expects.
03284   if (!CCMatch) {
03285     SmallVector<CCValAssign, 16> RVLocs1;
03286     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03287                     *DAG.getContext());
03288     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03289 
03290     SmallVector<CCValAssign, 16> RVLocs2;
03291     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03292                     *DAG.getContext());
03293     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03294 
03295     if (RVLocs1.size() != RVLocs2.size())
03296       return false;
03297     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03298       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03299         return false;
03300       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03301         return false;
03302       if (RVLocs1[i].isRegLoc()) {
03303         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03304           return false;
03305       } else {
03306         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03307           return false;
03308       }
03309     }
03310   }
03311 
03312   // If the callee takes no arguments then go on to check the results of the
03313   // call.
03314   if (!Outs.empty()) {
03315     // Check if stack adjustment is needed. For now, do not do this if any
03316     // argument is passed on the stack.
03317     SmallVector<CCValAssign, 16> ArgLocs;
03318     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03319                    *DAG.getContext());
03320 
03321     // Allocate shadow area for Win64
03322     if (IsCalleeWin64)
03323       CCInfo.AllocateStack(32, 8);
03324 
03325     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03326     if (CCInfo.getNextStackOffset()) {
03327       MachineFunction &MF = DAG.getMachineFunction();
03328       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
03329         return false;
03330 
03331       // Check if the arguments are already laid out in the right way as
03332       // the caller's fixed stack objects.
03333       MachineFrameInfo *MFI = MF.getFrameInfo();
03334       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03335       const X86InstrInfo *TII =
03336           static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
03337       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03338         CCValAssign &VA = ArgLocs[i];
03339         SDValue Arg = OutVals[i];
03340         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03341         if (VA.getLocInfo() == CCValAssign::Indirect)
03342           return false;
03343         if (!VA.isRegLoc()) {
03344           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03345                                    MFI, MRI, TII))
03346             return false;
03347         }
03348       }
03349     }
03350 
03351     // If the tailcall address may be in a register, then make sure it's
03352     // possible to register allocate for it. In 32-bit, the call address can
03353     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03354     // callee-saved registers are restored. These happen to be the same
03355     // registers used to pass 'inreg' arguments so watch out for those.
03356     if (!Subtarget->is64Bit() &&
03357         ((!isa<GlobalAddressSDNode>(Callee) &&
03358           !isa<ExternalSymbolSDNode>(Callee)) ||
03359          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03360       unsigned NumInRegs = 0;
03361       // In PIC we need an extra register to formulate the address computation
03362       // for the callee.
03363       unsigned MaxInRegs =
03364   (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03365 
03366       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03367         CCValAssign &VA = ArgLocs[i];
03368         if (!VA.isRegLoc())
03369           continue;
03370         unsigned Reg = VA.getLocReg();
03371         switch (Reg) {
03372         default: break;
03373         case X86::EAX: case X86::EDX: case X86::ECX:
03374           if (++NumInRegs == MaxInRegs)
03375             return false;
03376           break;
03377         }
03378       }
03379     }
03380   }
03381 
03382   return true;
03383 }
03384 
03385 FastISel *
03386 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03387                                   const TargetLibraryInfo *libInfo) const {
03388   return X86::createFastISel(funcInfo, libInfo);
03389 }
03390 
03391 //===----------------------------------------------------------------------===//
03392 //                           Other Lowering Hooks
03393 //===----------------------------------------------------------------------===//
03394 
03395 static bool MayFoldLoad(SDValue Op) {
03396   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03397 }
03398 
03399 static bool MayFoldIntoStore(SDValue Op) {
03400   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03401 }
03402 
03403 static bool isTargetShuffle(unsigned Opcode) {
03404   switch(Opcode) {
03405   default: return false;
03406   case X86ISD::PSHUFB:
03407   case X86ISD::PSHUFD:
03408   case X86ISD::PSHUFHW:
03409   case X86ISD::PSHUFLW:
03410   case X86ISD::SHUFP:
03411   case X86ISD::PALIGNR:
03412   case X86ISD::MOVLHPS:
03413   case X86ISD::MOVLHPD:
03414   case X86ISD::MOVHLPS:
03415   case X86ISD::MOVLPS:
03416   case X86ISD::MOVLPD:
03417   case X86ISD::MOVSHDUP:
03418   case X86ISD::MOVSLDUP:
03419   case X86ISD::MOVDDUP:
03420   case X86ISD::MOVSS:
03421   case X86ISD::MOVSD:
03422   case X86ISD::UNPCKL:
03423   case X86ISD::UNPCKH:
03424   case X86ISD::VPERMILP:
03425   case X86ISD::VPERM2X128:
03426   case X86ISD::VPERMI:
03427     return true;
03428   }
03429 }
03430 
03431 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03432                                     SDValue V1, SelectionDAG &DAG) {
03433   switch(Opc) {
03434   default: llvm_unreachable("Unknown x86 shuffle node");
03435   case X86ISD::MOVSHDUP:
03436   case X86ISD::MOVSLDUP:
03437   case X86ISD::MOVDDUP:
03438     return DAG.getNode(Opc, dl, VT, V1);
03439   }
03440 }
03441 
03442 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03443                                     SDValue V1, unsigned TargetMask,
03444                                     SelectionDAG &DAG) {
03445   switch(Opc) {
03446   default: llvm_unreachable("Unknown x86 shuffle node");
03447   case X86ISD::PSHUFD:
03448   case X86ISD::PSHUFHW:
03449   case X86ISD::PSHUFLW:
03450   case X86ISD::VPERMILP:
03451   case X86ISD::VPERMI:
03452     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
03453   }
03454 }
03455 
03456 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03457                                     SDValue V1, SDValue V2, unsigned TargetMask,
03458                                     SelectionDAG &DAG) {
03459   switch(Opc) {
03460   default: llvm_unreachable("Unknown x86 shuffle node");
03461   case X86ISD::PALIGNR:
03462   case X86ISD::VALIGN:
03463   case X86ISD::SHUFP:
03464   case X86ISD::VPERM2X128:
03465     return DAG.getNode(Opc, dl, VT, V1, V2,
03466                        DAG.getConstant(TargetMask, MVT::i8));
03467   }
03468 }
03469 
03470 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03471                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03472   switch(Opc) {
03473   default: llvm_unreachable("Unknown x86 shuffle node");
03474   case X86ISD::MOVLHPS:
03475   case X86ISD::MOVLHPD:
03476   case X86ISD::MOVHLPS:
03477   case X86ISD::MOVLPS:
03478   case X86ISD::MOVLPD:
03479   case X86ISD::MOVSS:
03480   case X86ISD::MOVSD:
03481   case X86ISD::UNPCKL:
03482   case X86ISD::UNPCKH:
03483     return DAG.getNode(Opc, dl, VT, V1, V2);
03484   }
03485 }
03486 
03487 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03488   MachineFunction &MF = DAG.getMachineFunction();
03489   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03490       DAG.getSubtarget().getRegisterInfo());
03491   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03492   int ReturnAddrIndex = FuncInfo->getRAIndex();
03493 
03494   if (ReturnAddrIndex == 0) {
03495     // Set up a frame object for the return address.
03496     unsigned SlotSize = RegInfo->getSlotSize();
03497     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03498                                                            -(int64_t)SlotSize,
03499                                                            false);
03500     FuncInfo->setRAIndex(ReturnAddrIndex);
03501   }
03502 
03503   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
03504 }
03505 
03506 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03507                                        bool hasSymbolicDisplacement) {
03508   // Offset should fit into 32 bit immediate field.
03509   if (!isInt<32>(Offset))
03510     return false;
03511 
03512   // If we don't have a symbolic displacement - we don't have any extra
03513   // restrictions.
03514   if (!hasSymbolicDisplacement)
03515     return true;
03516 
03517   // FIXME: Some tweaks might be needed for medium code model.
03518   if (M != CodeModel::Small && M != CodeModel::Kernel)
03519     return false;
03520 
03521   // For small code model we assume that latest object is 16MB before end of 31
03522   // bits boundary. We may also accept pretty large negative constants knowing
03523   // that all objects are in the positive half of address space.
03524   if (M == CodeModel::Small && Offset < 16*1024*1024)
03525     return true;
03526 
03527   // For kernel code model we know that all object resist in the negative half
03528   // of 32bits address space. We may not accept negative offsets, since they may
03529   // be just off and we may accept pretty large positive ones.
03530   if (M == CodeModel::Kernel && Offset > 0)
03531     return true;
03532 
03533   return false;
03534 }
03535 
03536 /// isCalleePop - Determines whether the callee is required to pop its
03537 /// own arguments. Callee pop is necessary to support tail calls.
03538 bool X86::isCalleePop(CallingConv::ID CallingConv,
03539                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
03540   if (IsVarArg)
03541     return false;
03542 
03543   switch (CallingConv) {
03544   default:
03545     return false;
03546   case CallingConv::X86_StdCall:
03547     return !is64Bit;
03548   case CallingConv::X86_FastCall:
03549     return !is64Bit;
03550   case CallingConv::X86_ThisCall:
03551     return !is64Bit;
03552   case CallingConv::Fast:
03553     return TailCallOpt;
03554   case CallingConv::GHC:
03555     return TailCallOpt;
03556   case CallingConv::HiPE:
03557     return TailCallOpt;
03558   }
03559 }
03560 
03561 /// \brief Return true if the condition is an unsigned comparison operation.
03562 static bool isX86CCUnsigned(unsigned X86CC) {
03563   switch (X86CC) {
03564   default: llvm_unreachable("Invalid integer condition!");
03565   case X86::COND_E:     return true;
03566   case X86::COND_G:     return false;
03567   case X86::COND_GE:    return false;
03568   case X86::COND_L:     return false;
03569   case X86::COND_LE:    return false;
03570   case X86::COND_NE:    return true;
03571   case X86::COND_B:     return true;
03572   case X86::COND_A:     return true;
03573   case X86::COND_BE:    return true;
03574   case X86::COND_AE:    return true;
03575   }
03576   llvm_unreachable("covered switch fell through?!");
03577 }
03578 
03579 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
03580 /// specific condition code, returning the condition code and the LHS/RHS of the
03581 /// comparison to make.
03582 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
03583                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
03584   if (!isFP) {
03585     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
03586       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
03587         // X > -1   -> X == 0, jump !sign.
03588         RHS = DAG.getConstant(0, RHS.getValueType());
03589         return X86::COND_NS;
03590       }
03591       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
03592         // X < 0   -> X == 0, jump on sign.
03593         return X86::COND_S;
03594       }
03595       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
03596         // X < 1   -> X <= 0
03597         RHS = DAG.getConstant(0, RHS.getValueType());
03598         return X86::COND_LE;
03599       }
03600     }
03601 
03602     switch (SetCCOpcode) {
03603     default: llvm_unreachable("Invalid integer condition!");
03604     case ISD::SETEQ:  return X86::COND_E;
03605     case ISD::SETGT:  return X86::COND_G;
03606     case ISD::SETGE:  return X86::COND_GE;
03607     case ISD::SETLT:  return X86::COND_L;
03608     case ISD::SETLE:  return X86::COND_LE;
03609     case ISD::SETNE:  return X86::COND_NE;
03610     case ISD::SETULT: return X86::COND_B;
03611     case ISD::SETUGT: return X86::COND_A;
03612     case ISD::SETULE: return X86::COND_BE;
03613     case ISD::SETUGE: return X86::COND_AE;
03614     }
03615   }
03616 
03617   // First determine if it is required or is profitable to flip the operands.
03618 
03619   // If LHS is a foldable load, but RHS is not, flip the condition.
03620   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
03621       !ISD::isNON_EXTLoad(RHS.getNode())) {
03622     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
03623     std::swap(LHS, RHS);
03624   }
03625 
03626   switch (SetCCOpcode) {
03627   default: break;
03628   case ISD::SETOLT:
03629   case ISD::SETOLE:
03630   case ISD::SETUGT:
03631   case ISD::SETUGE:
03632     std::swap(LHS, RHS);
03633     break;
03634   }
03635 
03636   // On a floating point condition, the flags are set as follows:
03637   // ZF  PF  CF   op
03638   //  0 | 0 | 0 | X > Y
03639   //  0 | 0 | 1 | X < Y
03640   //  1 | 0 | 0 | X == Y
03641   //  1 | 1 | 1 | unordered
03642   switch (SetCCOpcode) {
03643   default: llvm_unreachable("Condcode should be pre-legalized away");
03644   case ISD::SETUEQ:
03645   case ISD::SETEQ:   return X86::COND_E;
03646   case ISD::SETOLT:              // flipped
03647   case ISD::SETOGT:
03648   case ISD::SETGT:   return X86::COND_A;
03649   case ISD::SETOLE:              // flipped
03650   case ISD::SETOGE:
03651   case ISD::SETGE:   return X86::COND_AE;
03652   case ISD::SETUGT:              // flipped
03653   case ISD::SETULT:
03654   case ISD::SETLT:   return X86::COND_B;
03655   case ISD::SETUGE:              // flipped
03656   case ISD::SETULE:
03657   case ISD::SETLE:   return X86::COND_BE;
03658   case ISD::SETONE:
03659   case ISD::SETNE:   return X86::COND_NE;
03660   case ISD::SETUO:   return X86::COND_P;
03661   case ISD::SETO:    return X86::COND_NP;
03662   case ISD::SETOEQ:
03663   case ISD::SETUNE:  return X86::COND_INVALID;
03664   }
03665 }
03666 
03667 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
03668 /// code. Current x86 isa includes the following FP cmov instructions:
03669 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
03670 static bool hasFPCMov(unsigned X86CC) {
03671   switch (X86CC) {
03672   default:
03673     return false;
03674   case X86::COND_B:
03675   case X86::COND_BE:
03676   case X86::COND_E:
03677   case X86::COND_P:
03678   case X86::COND_A:
03679   case X86::COND_AE:
03680   case X86::COND_NE:
03681   case X86::COND_NP:
03682     return true;
03683   }
03684 }
03685 
03686 /// isFPImmLegal - Returns true if the target can instruction select the
03687 /// specified FP immediate natively. If false, the legalizer will
03688 /// materialize the FP immediate as a load from a constant pool.
03689 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
03690   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
03691     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
03692       return true;
03693   }
03694   return false;
03695 }
03696 
03697 /// \brief Returns true if it is beneficial to convert a load of a constant
03698 /// to just the constant itself.
03699 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
03700                                                           Type *Ty) const {
03701   assert(Ty->isIntegerTy());
03702 
03703   unsigned BitSize = Ty->getPrimitiveSizeInBits();
03704   if (BitSize == 0 || BitSize > 64)
03705     return false;
03706   return true;
03707 }
03708 
03709 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
03710 /// the specified range (L, H].
03711 static bool isUndefOrInRange(int Val, int Low, int Hi) {
03712   return (Val < 0) || (Val >= Low && Val < Hi);
03713 }
03714 
03715 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
03716 /// specified value.
03717 static bool isUndefOrEqual(int Val, int CmpVal) {
03718   return (Val < 0 || Val == CmpVal);
03719 }
03720 
03721 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
03722 /// from position Pos and ending in Pos+Size, falls within the specified
03723 /// sequential range (L, L+Pos]. or is undef.
03724 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
03725                                        unsigned Pos, unsigned Size, int Low) {
03726   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
03727     if (!isUndefOrEqual(Mask[i], Low))
03728       return false;
03729   return true;
03730 }
03731 
03732 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
03733 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
03734 /// the second operand.
03735 static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
03736   if (VT == MVT::v4f32 || VT == MVT::v4i32 )
03737     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
03738   if (VT == MVT::v2f64 || VT == MVT::v2i64)
03739     return (Mask[0] < 2 && Mask[1] < 2);
03740   return false;
03741 }
03742 
03743 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
03744 /// is suitable for input to PSHUFHW.
03745 static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03746   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03747     return false;
03748 
03749   // Lower quadword copied in order or undef.
03750   if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
03751     return false;
03752 
03753   // Upper quadword shuffled.
03754   for (unsigned i = 4; i != 8; ++i)
03755     if (!isUndefOrInRange(Mask[i], 4, 8))
03756       return false;
03757 
03758   if (VT == MVT::v16i16) {
03759     // Lower quadword copied in order or undef.
03760     if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
03761       return false;
03762 
03763     // Upper quadword shuffled.
03764     for (unsigned i = 12; i != 16; ++i)
03765       if (!isUndefOrInRange(Mask[i], 12, 16))
03766         return false;
03767   }
03768 
03769   return true;
03770 }
03771 
03772 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
03773 /// is suitable for input to PSHUFLW.
03774 static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03775   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03776     return false;
03777 
03778   // Upper quadword copied in order.
03779   if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
03780     return false;
03781 
03782   // Lower quadword shuffled.
03783   for (unsigned i = 0; i != 4; ++i)
03784     if (!isUndefOrInRange(Mask[i], 0, 4))
03785       return false;
03786 
03787   if (VT == MVT::v16i16) {
03788     // Upper quadword copied in order.
03789     if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
03790       return false;
03791 
03792     // Lower quadword shuffled.
03793     for (unsigned i = 8; i != 12; ++i)
03794       if (!isUndefOrInRange(Mask[i], 8, 12))
03795         return false;
03796   }
03797 
03798   return true;
03799 }
03800 
03801 /// \brief Return true if the mask specifies a shuffle of elements that is
03802 /// suitable for input to intralane (palignr) or interlane (valign) vector
03803 /// right-shift.
03804 static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
03805   unsigned NumElts = VT.getVectorNumElements();
03806   unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
03807   unsigned NumLaneElts = NumElts/NumLanes;
03808 
03809   // Do not handle 64-bit element shuffles with palignr.
03810   if (NumLaneElts == 2)
03811     return false;
03812 
03813   for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
03814     unsigned i;
03815     for (i = 0; i != NumLaneElts; ++i) {
03816       if (Mask[i+l] >= 0)
03817         break;
03818     }
03819 
03820     // Lane is all undef, go to next lane
03821     if (i == NumLaneElts)
03822       continue;
03823 
03824     int Start = Mask[i+l];
03825 
03826     // Make sure its in this lane in one of the sources
03827     if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
03828         !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
03829       return false;
03830 
03831     // If not lane 0, then we must match lane 0
03832     if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
03833       return false;
03834 
03835     // Correct second source to be contiguous with first source
03836     if (Start >= (int)NumElts)
03837       Start -= NumElts - NumLaneElts;
03838 
03839     // Make sure we're shifting in the right direction.
03840     if (Start <= (int)(i+l))
03841       return false;
03842 
03843     Start -= i;
03844 
03845     // Check the rest of the elements to see if they are consecutive.
03846     for (++i; i != NumLaneElts; ++i) {
03847       int Idx = Mask[i+l];
03848 
03849       // Make sure its in this lane
03850       if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
03851           !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
03852         return false;
03853 
03854       // If not lane 0, then we must match lane 0
03855       if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
03856         return false;
03857 
03858       if (Idx >= (int)NumElts)
03859         Idx -= NumElts - NumLaneElts;
03860 
03861       if (!isUndefOrEqual(Idx, Start+i))
03862         return false;
03863 
03864     }
03865   }
03866 
03867   return true;
03868 }
03869 
03870 /// \brief Return true if the node specifies a shuffle of elements that is
03871 /// suitable for input to PALIGNR.
03872 static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
03873                           const X86Subtarget *Subtarget) {
03874   if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
03875       (VT.is256BitVector() && !Subtarget->hasInt256()) ||
03876       VT.is512BitVector())
03877     // FIXME: Add AVX512BW.
03878     return false;
03879 
03880   return isAlignrMask(Mask, VT, false);
03881 }
03882 
03883 /// \brief Return true if the node specifies a shuffle of elements that is
03884 /// suitable for input to VALIGN.
03885 static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
03886                           const X86Subtarget *Subtarget) {
03887   // FIXME: Add AVX512VL.
03888   if (!VT.is512BitVector() || !Subtarget->hasAVX512())
03889     return false;
03890   return isAlignrMask(Mask, VT, true);
03891 }
03892 
03893 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
03894 /// the two vector operands have swapped position.
03895 static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
03896                                      unsigned NumElems) {
03897   for (unsigned i = 0; i != NumElems; ++i) {
03898     int idx = Mask[i];
03899     if (idx < 0)
03900       continue;
03901     else if (idx < (int)NumElems)
03902       Mask[i] = idx + NumElems;
03903     else
03904       Mask[i] = idx - NumElems;
03905   }
03906 }
03907 
03908 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
03909 /// specifies a shuffle of elements that is suitable for input to 128/256-bit
03910 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
03911 /// reverse of what x86 shuffles want.
03912 static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
03913 
03914   unsigned NumElems = VT.getVectorNumElements();
03915   unsigned NumLanes = VT.getSizeInBits()/128;
03916   unsigned NumLaneElems = NumElems/NumLanes;
03917 
03918   if (NumLaneElems != 2 && NumLaneElems != 4)
03919     return false;
03920 
03921   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
03922   bool symetricMaskRequired =
03923     (VT.getSizeInBits() >= 256) && (EltSize == 32);
03924 
03925   // VSHUFPSY divides the resulting vector into 4 chunks.
03926   // The sources are also splitted into 4 chunks, and each destination
03927   // chunk must come from a different source chunk.
03928   //
03929   //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
03930   //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
03931   //
03932   //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
03933   //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
03934   //
03935   // VSHUFPDY divides the resulting vector into 4 chunks.
03936   // The sources are also splitted into 4 chunks, and each destination
03937   // chunk must come from a different source chunk.
03938   //
03939   //  SRC1 =>      X3       X2       X1       X0
03940   //  SRC2 =>      Y3       Y2       Y1       Y0
03941   //
03942   //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
03943   //
03944   SmallVector<int, 4> MaskVal(NumLaneElems, -1);
03945   unsigned HalfLaneElems = NumLaneElems/2;
03946   for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
03947     for (unsigned i = 0; i != NumLaneElems; ++i) {
03948       int Idx = Mask[i+l];
03949       unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
03950       if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
03951         return false;
03952       // For VSHUFPSY, the mask of the second half must be the same as the
03953       // first but with the appropriate offsets. This works in the same way as
03954       // VPERMILPS works with masks.
03955       if (!symetricMaskRequired || Idx < 0)
03956         continue;
03957       if (MaskVal[i] < 0) {
03958         MaskVal[i] = Idx - l;
03959         continue;
03960       }
03961       if ((signed)(Idx - l) != MaskVal[i])
03962         return false;
03963     }
03964   }
03965 
03966   return true;
03967 }
03968 
03969 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
03970 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
03971 static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
03972   if (!VT.is128BitVector())
03973     return false;
03974 
03975   unsigned NumElems = VT.getVectorNumElements();
03976 
03977   if (NumElems != 4)
03978     return false;
03979 
03980   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
03981   return isUndefOrEqual(Mask[0], 6) &&
03982          isUndefOrEqual(Mask[1], 7) &&
03983          isUndefOrEqual(Mask[2], 2) &&
03984          isUndefOrEqual(Mask[3], 3);
03985 }
03986 
03987 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
03988 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
03989 /// <2, 3, 2, 3>
03990 static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
03991   if (!VT.is128BitVector())
03992     return false;
03993 
03994   unsigned NumElems = VT.getVectorNumElements();
03995 
03996   if (NumElems != 4)
03997     return false;
03998 
03999   return isUndefOrEqual(Mask[0], 2) &&
04000          isUndefOrEqual(Mask[1], 3) &&
04001          isUndefOrEqual(Mask[2], 2) &&
04002          isUndefOrEqual(Mask[3], 3);
04003 }
04004 
04005 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
04006 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
04007 static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
04008   if (!VT.is128BitVector())
04009     return false;
04010 
04011   unsigned NumElems = VT.getVectorNumElements();
04012 
04013   if (NumElems != 2 && NumElems != 4)
04014     return false;
04015 
04016   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04017     if (!isUndefOrEqual(Mask[i], i + NumElems))
04018       return false;
04019 
04020   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
04021     if (!isUndefOrEqual(Mask[i], i))
04022       return false;
04023 
04024   return true;
04025 }
04026 
04027 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
04028 /// specifies a shuffle of elements that is suitable for input to MOVLHPS.
04029 static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
04030   if (!VT.is128BitVector())
04031     return false;
04032 
04033   unsigned NumElems = VT.getVectorNumElements();
04034 
04035   if (NumElems != 2 && NumElems != 4)
04036     return false;
04037 
04038   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04039     if (!isUndefOrEqual(Mask[i], i))
04040       return false;
04041 
04042   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04043     if (!isUndefOrEqual(Mask[i + e], i + NumElems))
04044       return false;
04045 
04046   return true;
04047 }
04048 
04049 /// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand
04050 /// specifies a shuffle of elements that is suitable for input to INSERTPS.
04051 /// i. e: If all but one element come from the same vector.
04052 static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
04053   // TODO: Deal with AVX's VINSERTPS
04054   if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
04055     return false;
04056 
04057   unsigned CorrectPosV1 = 0;
04058   unsigned CorrectPosV2 = 0;
04059   for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
04060     if (Mask[i] == -1) {
04061       ++CorrectPosV1;
04062       ++CorrectPosV2;
04063       continue;
04064     }
04065 
04066     if (Mask[i] == i)
04067       ++CorrectPosV1;
04068     else if (Mask[i] == i + 4)
04069       ++CorrectPosV2;
04070   }
04071 
04072   if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
04073     // We have 3 elements (undefs count as elements from any vector) from one
04074     // vector, and one from another.
04075     return true;
04076 
04077   return false;
04078 }
04079 
04080 //
04081 // Some special combinations that can be optimized.
04082 //
04083 static
04084 SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
04085                                SelectionDAG &DAG) {
04086   MVT VT = SVOp->getSimpleValueType(0);
04087   SDLoc dl(SVOp);
04088 
04089   if (VT != MVT::v8i32 && VT != MVT::v8f32)
04090     return SDValue();
04091 
04092   ArrayRef<int> Mask = SVOp->getMask();
04093 
04094   // These are the special masks that may be optimized.
04095   static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
04096   static const int MaskToOptimizeOdd[]  = {1, 9, 3, 11, 5, 13, 7, 15};
04097   bool MatchEvenMask = true;
04098   bool MatchOddMask  = true;
04099   for (int i=0; i<8; ++i) {
04100     if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
04101       MatchEvenMask = false;
04102     if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
04103       MatchOddMask = false;
04104   }
04105 
04106   if (!MatchEvenMask && !MatchOddMask)
04107     return SDValue();
04108 
04109   SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
04110 
04111   SDValue Op0 = SVOp->getOperand(0);
04112   SDValue Op1 = SVOp->getOperand(1);
04113 
04114   if (MatchEvenMask) {
04115     // Shift the second operand right to 32 bits.
04116     static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
04117     Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
04118   } else {
04119     // Shift the first operand left to 32 bits.
04120     static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
04121     Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
04122   }
04123   static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
04124   return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
04125 }
04126 
04127 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
04128 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
04129 static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
04130                          bool HasInt256, bool V2IsSplat = false) {
04131 
04132   assert(VT.getSizeInBits() >= 128 &&
04133          "Unsupported vector type for unpckl");
04134 
04135   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04136   unsigned NumLanes;
04137   unsigned NumOf256BitLanes;
04138   unsigned NumElts = VT.getVectorNumElements();
04139   if (VT.is256BitVector()) {
04140     if (NumElts != 4 && NumElts != 8 &&
04141         (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04142     return false;
04143     NumLanes = 2;
04144     NumOf256BitLanes = 1;
04145   } else if (VT.is512BitVector()) {
04146     assert(VT.getScalarType().getSizeInBits() >= 32 &&
04147            "Unsupported vector type for unpckh");
04148     NumLanes = 2;
04149     NumOf256BitLanes = 2;
04150   } else {
04151     NumLanes = 1;
04152     NumOf256BitLanes = 1;
04153   }
04154 
04155   unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
04156   unsigned NumLaneElts = NumEltsInStride/NumLanes;
04157 
04158   for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
04159     for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
04160       for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04161         int BitI  = Mask[l256*NumEltsInStride+l+i];
04162         int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
04163         if (!isUndefOrEqual(BitI, j+l256*NumElts))
04164           return false;
04165         if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
04166           return false;
04167         if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
04168           return false;
04169       }
04170     }
04171   }
04172   return true;
04173 }
04174 
04175 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
04176 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
04177 static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
04178                          bool HasInt256, bool V2IsSplat = false) {
04179   assert(VT.getSizeInBits() >= 128 &&
04180          "Unsupported vector type for unpckh");
04181 
04182   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04183   unsigned NumLanes;
04184   unsigned NumOf256BitLanes;
04185   unsigned NumElts = VT.getVectorNumElements();
04186   if (VT.is256BitVector()) {
04187     if (NumElts != 4 && NumElts != 8 &&
04188         (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04189     return false;
04190     NumLanes = 2;
04191     NumOf256BitLanes = 1;
04192   } else if (VT.is512BitVector()) {
04193     assert(VT.getScalarType().getSizeInBits() >= 32 &&
04194            "Unsupported vector type for unpckh");
04195     NumLanes = 2;
04196     NumOf256BitLanes = 2;
04197   } else {
04198     NumLanes = 1;
04199     NumOf256BitLanes = 1;
04200   }
04201 
04202   unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
04203   unsigned NumLaneElts = NumEltsInStride/NumLanes;
04204 
04205   for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
04206     for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
04207       for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04208         int BitI  = Mask[l256*NumEltsInStride+l+i];
04209         int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
04210         if (!isUndefOrEqual(BitI, j+l256*NumElts))
04211           return false;
04212         if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
04213           return false;
04214         if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
04215           return false;
04216       }
04217     }
04218   }
04219   return true;
04220 }
04221 
04222 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
04223 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
04224 /// <0, 0, 1, 1>
04225 static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04226   unsigned NumElts = VT.getVectorNumElements();
04227   bool Is256BitVec = VT.is256BitVector();
04228 
04229   if (VT.is512BitVector())
04230     return false;
04231   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04232          "Unsupported vector type for unpckh");
04233 
04234   if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
04235       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04236     return false;
04237 
04238   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
04239   // FIXME: Need a better way to get rid of this, there's no latency difference
04240   // between UNPCKLPD and MOVDDUP, the later should always be checked first and
04241   // the former later. We should also remove the "_undef" special mask.
04242   if (NumElts == 4 && Is256BitVec)
04243     return false;
04244 
04245   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04246   // independently on 128-bit lanes.
04247   unsigned NumLanes = VT.getSizeInBits()/128;
04248   unsigned NumLaneElts = NumElts/NumLanes;
04249 
04250   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04251     for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04252       int BitI  = Mask[l+i];
04253       int BitI1 = Mask[l+i+1];
04254 
04255       if (!isUndefOrEqual(BitI, j))
04256         return false;
04257       if (!isUndefOrEqual(BitI1, j))
04258         return false;
04259     }
04260   }
04261 
04262   return true;
04263 }
04264 
04265 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
04266 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
04267 /// <2, 2, 3, 3>
04268 static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04269   unsigned NumElts = VT.getVectorNumElements();
04270 
04271   if (VT.is512BitVector())
04272     return false;
04273 
04274   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04275          "Unsupported vector type for unpckh");
04276 
04277   if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
04278       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04279     return false;
04280 
04281   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04282   // independently on 128-bit lanes.
04283   unsigned NumLanes = VT.getSizeInBits()/128;
04284   unsigned NumLaneElts = NumElts/NumLanes;
04285 
04286   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04287     for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04288       int BitI  = Mask[l+i];
04289       int BitI1 = Mask[l+i+1];
04290       if (!isUndefOrEqual(BitI, j))
04291         return false;
04292       if (!isUndefOrEqual(BitI1, j))
04293         return false;
04294     }
04295   }
04296   return true;
04297 }
04298 
04299 // Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
04300 // (src1[0], src0[1]), manipulation with 256-bit sub-vectors
04301 static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
04302   if (!VT.is512BitVector())
04303     return false;
04304 
04305   unsigned NumElts = VT.getVectorNumElements();
04306   unsigned HalfSize = NumElts/2;
04307   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
04308     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
04309       *Imm = 1;
04310       return true;
04311     }
04312   }
04313   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
04314     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
04315       *Imm = 0;
04316       return true;
04317     }
04318   }
04319   return false;
04320 }
04321 
04322 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
04323 /// specifies a shuffle of elements that is suitable for input to MOVSS,
04324 /// MOVSD, and MOVD, i.e. setting the lowest element.
04325 static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
04326   if (VT.getVectorElementType().getSizeInBits() < 32)
04327     return false;
04328   if (!VT.is128BitVector())
04329     return false;
04330 
04331   unsigned NumElts = VT.getVectorNumElements();
04332 
04333   if (!isUndefOrEqual(Mask[0], NumElts))
04334     return false;
04335 
04336   for (unsigned i = 1; i != NumElts; ++i)
04337     if (!isUndefOrEqual(Mask[i], i))
04338       return false;
04339 
04340   return true;
04341 }
04342 
04343 /// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
04344 /// as permutations between 128-bit chunks or halves. As an example: this
04345 /// shuffle bellow:
04346 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
04347 /// The first half comes from the second half of V1 and the second half from the
04348 /// the second half of V2.
04349 static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04350   if (!HasFp256 || !VT.is256BitVector())
04351     return false;
04352 
04353   // The shuffle result is divided into half A and half B. In total the two
04354   // sources have 4 halves, namely: C, D, E, F. The final values of A and
04355   // B must come from C, D, E or F.
04356   unsigned HalfSize = VT.getVectorNumElements()/2;
04357   bool MatchA = false, MatchB = false;
04358 
04359   // Check if A comes from one of C, D, E, F.
04360   for (unsigned Half = 0; Half != 4; ++Half) {
04361     if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
04362       MatchA = true;
04363       break;
04364     }
04365   }
04366 
04367   // Check if B comes from one of C, D, E, F.
04368   for (unsigned Half = 0; Half != 4; ++Half) {
04369     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
04370       MatchB = true;
04371       break;
04372     }
04373   }
04374 
04375   return MatchA && MatchB;
04376 }
04377 
04378 /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
04379 /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
04380 static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
04381   MVT VT = SVOp->getSimpleValueType(0);
04382 
04383   unsigned HalfSize = VT.getVectorNumElements()/2;
04384 
04385   unsigned FstHalf = 0, SndHalf = 0;
04386   for (unsigned i = 0; i < HalfSize; ++i) {
04387     if (SVOp->getMaskElt(i) > 0) {
04388       FstHalf = SVOp->getMaskElt(i)/HalfSize;
04389       break;
04390     }
04391   }
04392   for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
04393     if (SVOp->getMaskElt(i) > 0) {
04394       SndHalf = SVOp->getMaskElt(i)/HalfSize;
04395       break;
04396     }
04397   }
04398 
04399   return (FstHalf | (SndHalf << 4));
04400 }
04401 
04402 // Symetric in-lane mask. Each lane has 4 elements (for imm8)
04403 static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
04404   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04405   if (EltSize < 32)
04406     return false;
04407 
04408   unsigned NumElts = VT.getVectorNumElements();
04409   Imm8 = 0;
04410   if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
04411     for (unsigned i = 0; i != NumElts; ++i) {
04412       if (Mask[i] < 0)
04413         continue;
04414       Imm8 |= Mask[i] << (i*2);
04415     }
04416     return true;
04417   }
04418 
04419   unsigned LaneSize = 4;
04420   SmallVector<int, 4> MaskVal(LaneSize, -1);
04421 
04422   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04423     for (unsigned i = 0; i != LaneSize; ++i) {
04424       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04425         return false;
04426       if (Mask[i+l] < 0)
04427         continue;
04428       if (MaskVal[i] < 0) {
04429         MaskVal[i] = Mask[i+l] - l;
04430         Imm8 |= MaskVal[i] << (i*2);
04431         continue;
04432       }
04433       if (Mask[i+l] != (signed)(MaskVal[i]+l))
04434         return false;
04435     }
04436   }
04437   return true;
04438 }
04439 
04440 /// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
04441 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
04442 /// Note that VPERMIL mask matching is different depending whether theunderlying
04443 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
04444 /// to the same elements of the low, but to the higher half of the source.
04445 /// In VPERMILPD the two lanes could be shuffled independently of each other
04446 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
04447 static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
04448   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04449   if (VT.getSizeInBits() < 256 || EltSize < 32)
04450     return false;
04451   bool symetricMaskRequired = (EltSize == 32);
04452   unsigned NumElts = VT.getVectorNumElements();
04453 
04454   unsigned NumLanes = VT.getSizeInBits()/128;
04455   unsigned LaneSize = NumElts/NumLanes;
04456   // 2 or 4 elements in one lane
04457 
04458   SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
04459   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04460     for (unsigned i = 0; i != LaneSize; ++i) {
04461       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04462         return false;
04463       if (symetricMaskRequired) {
04464         if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
04465           ExpectedMaskVal[i] = Mask[i+l] - l;
04466           continue;
04467         }
04468         if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
04469           return false;
04470       }
04471     }
04472   }
04473   return true;
04474 }
04475 
04476 /// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
04477 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
04478 /// element of vector 2 and the other elements to come from vector 1 in order.
04479 static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
04480                                bool V2IsSplat = false, bool V2IsUndef = false) {
04481   if (!VT.is128BitVector())
04482     return false;
04483 
04484   unsigned NumOps = VT.getVectorNumElements();
04485   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
04486     return false;
04487 
04488   if (!isUndefOrEqual(Mask[0], 0))
04489     return false;
04490 
04491   for (unsigned i = 1; i != NumOps; ++i)
04492     if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
04493           (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
04494           (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
04495       return false;
04496 
04497   return true;
04498 }
04499 
04500 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04501 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
04502 /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
04503 static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
04504                            const X86Subtarget *Subtarget) {
04505   if (!Subtarget->hasSSE3())
04506     return false;
04507 
04508   unsigned NumElems = VT.getVectorNumElements();
04509 
04510   if ((VT.is128BitVector() && NumElems != 4) ||
04511       (VT.is256BitVector() && NumElems != 8) ||
04512       (VT.is512BitVector() && NumElems != 16))
04513     return false;
04514 
04515   // "i+1" is the value the indexed mask element must have
04516   for (unsigned i = 0; i != NumElems; i += 2)
04517     if (!isUndefOrEqual(Mask[i], i+1) ||
04518         !isUndefOrEqual(Mask[i+1], i+1))
04519       return false;
04520 
04521   return true;
04522 }
04523 
04524 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04525 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
04526 /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
04527 static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
04528                            const X86Subtarget *Subtarget) {
04529   if (!Subtarget->hasSSE3())
04530     return false;
04531 
04532   unsigned NumElems = VT.getVectorNumElements();
04533 
04534   if ((VT.is128BitVector() && NumElems != 4) ||
04535       (VT.is256BitVector() && NumElems != 8) ||
04536       (VT.is512BitVector() && NumElems != 16))
04537     return false;
04538 
04539   // "i" is the value the indexed mask element must have
04540   for (unsigned i = 0; i != NumElems; i += 2)
04541     if (!isUndefOrEqual(Mask[i], i) ||
04542         !isUndefOrEqual(Mask[i+1], i))
04543       return false;
04544 
04545   return true;
04546 }
04547 
04548 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
04549 /// specifies a shuffle of elements that is suitable for input to 256-bit
04550 /// version of MOVDDUP.
04551 static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04552   if (!HasFp256 || !VT.is256BitVector())
04553     return false;
04554 
04555   unsigned NumElts = VT.getVectorNumElements();
04556   if (NumElts != 4)
04557     return false;
04558 
04559   for (unsigned i = 0; i != NumElts/2; ++i)
04560     if (!isUndefOrEqual(Mask[i], 0))
04561       return false;
04562   for (unsigned i = NumElts/2; i != NumElts; ++i)
04563     if (!isUndefOrEqual(Mask[i], NumElts/2))
04564       return false;
04565   return true;
04566 }
04567 
04568 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04569 /// specifies a shuffle of elements that is suitable for input to 128-bit
04570 /// version of MOVDDUP.
04571 static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
04572   if (!VT.is128BitVector())
04573     return false;
04574 
04575   unsigned e = VT.getVectorNumElements() / 2;
04576   for (unsigned i = 0; i != e; ++i)
04577     if (!isUndefOrEqual(Mask[i], i))
04578       return false;
04579   for (unsigned i = 0; i != e; ++i)
04580     if (!isUndefOrEqual(Mask[e+i], i))
04581       return false;
04582   return true;
04583 }
04584 
04585 /// isVEXTRACTIndex - Return true if the specified
04586 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
04587 /// suitable for instruction that extract 128 or 256 bit vectors
04588 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
04589   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04590   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04591     return false;
04592 
04593   // The index should be aligned on a vecWidth-bit boundary.
04594   uint64_t Index =
04595     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04596 
04597   MVT VT = N->getSimpleValueType(0);
04598   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04599   bool Result = (Index * ElSize) % vecWidth == 0;
04600 
04601   return Result;
04602 }
04603 
04604 /// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
04605 /// operand specifies a subvector insert that is suitable for input to
04606 /// insertion of 128 or 256-bit subvectors
04607 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
04608   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04609   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04610     return false;
04611   // The index should be aligned on a vecWidth-bit boundary.
04612   uint64_t Index =
04613     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04614 
04615   MVT VT = N->getSimpleValueType(0);
04616   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04617   bool Result = (Index * ElSize) % vecWidth == 0;
04618 
04619   return Result;
04620 }
04621 
04622 bool X86::isVINSERT128Index(SDNode *N) {
04623   return isVINSERTIndex(N, 128);
04624 }
04625 
04626 bool X86::isVINSERT256Index(SDNode *N) {
04627   return isVINSERTIndex(N, 256);
04628 }
04629 
04630 bool X86::isVEXTRACT128Index(SDNode *N) {
04631   return isVEXTRACTIndex(N, 128);
04632 }
04633 
04634 bool X86::isVEXTRACT256Index(SDNode *N) {
04635   return isVEXTRACTIndex(N, 256);
04636 }
04637 
04638 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
04639 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
04640 /// Handles 128-bit and 256-bit.
04641 static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
04642   MVT VT = N->getSimpleValueType(0);
04643 
04644   assert((VT.getSizeInBits() >= 128) &&
04645          "Unsupported vector type for PSHUF/SHUFP");
04646 
04647   // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
04648   // independently on 128-bit lanes.
04649   unsigned NumElts = VT.getVectorNumElements();
04650   unsigned NumLanes = VT.getSizeInBits()/128;
04651   unsigned NumLaneElts = NumElts/NumLanes;
04652 
04653   assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
04654          "Only supports 2, 4 or 8 elements per lane");
04655 
04656   unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
04657   unsigned Mask = 0;
04658   for (unsigned i = 0; i != NumElts; ++i) {
04659     int Elt = N->getMaskElt(i);
04660     if (Elt < 0) continue;
04661     Elt &= NumLaneElts - 1;
04662     unsigned ShAmt = (i << Shift) % 8;
04663     Mask |= Elt << ShAmt;
04664   }
04665 
04666   return Mask;
04667 }
04668 
04669 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
04670 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
04671 static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
04672   MVT VT = N->getSimpleValueType(0);
04673 
04674   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04675          "Unsupported vector type for PSHUFHW");
04676 
04677   unsigned NumElts = VT.getVectorNumElements();
04678 
04679   unsigned Mask = 0;
04680   for (unsigned l = 0; l != NumElts; l += 8) {
04681     // 8 nodes per lane, but we only care about the last 4.
04682     for (unsigned i = 0; i < 4; ++i) {
04683       int Elt = N->getMaskElt(l+i+4);
04684       if (Elt < 0) continue;
04685       Elt &= 0x3; // only 2-bits.
04686       Mask |= Elt << (i * 2);
04687     }
04688   }
04689 
04690   return Mask;
04691 }
04692 
04693 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
04694 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
04695 static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
04696   MVT VT = N->getSimpleValueType(0);
04697 
04698   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04699          "Unsupported vector type for PSHUFHW");
04700 
04701   unsigned NumElts = VT.getVectorNumElements();
04702 
04703   unsigned Mask = 0;
04704   for (unsigned l = 0; l != NumElts; l += 8) {
04705     // 8 nodes per lane, but we only care about the first 4.
04706     for (unsigned i = 0; i < 4; ++i) {
04707       int Elt = N->getMaskElt(l+i);
04708       if (Elt < 0) continue;
04709       Elt &= 0x3; // only 2-bits
04710       Mask |= Elt << (i * 2);
04711     }
04712   }
04713 
04714   return Mask;
04715 }
04716 
04717 /// \brief Return the appropriate immediate to shuffle the specified
04718 /// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
04719 /// VALIGN (if Interlane is true) instructions.
04720 static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
04721                                            bool InterLane) {
04722   MVT VT = SVOp->getSimpleValueType(0);
04723   unsigned EltSize = InterLane ? 1 :
04724     VT.getVectorElementType().getSizeInBits() >> 3;
04725 
04726   unsigned NumElts = VT.getVectorNumElements();
04727   unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
04728   unsigned NumLaneElts = NumElts/NumLanes;
04729 
04730   int Val = 0;
04731   unsigned i;
04732   for (i = 0; i != NumElts; ++i) {
04733     Val = SVOp->getMaskElt(i);
04734     if (Val >= 0)
04735       break;
04736   }
04737   if (Val >= (int)NumElts)
04738     Val -= NumElts - NumLaneElts;
04739 
04740   assert(Val - i > 0 && "PALIGNR imm should be positive");
04741   return (Val - i) * EltSize;
04742 }
04743 
04744 /// \brief Return the appropriate immediate to shuffle the specified
04745 /// VECTOR_SHUFFLE mask with the PALIGNR instruction.
04746 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
04747   return getShuffleAlignrImmediate(SVOp, false);
04748 }
04749 
04750 /// \brief Return the appropriate immediate to shuffle the specified
04751 /// VECTOR_SHUFFLE mask with the VALIGN instruction.
04752 static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
04753   return getShuffleAlignrImmediate(SVOp, true);
04754 }
04755 
04756 
04757 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
04758   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04759   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04760     llvm_unreachable("Illegal extract subvector for VEXTRACT");
04761 
04762   uint64_t Index =
04763     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04764 
04765   MVT VecVT = N->getOperand(0).getSimpleValueType();
04766   MVT ElVT = VecVT.getVectorElementType();
04767 
04768   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04769   return Index / NumElemsPerChunk;
04770 }
04771 
04772 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
04773   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04774   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04775     llvm_unreachable("Illegal insert subvector for VINSERT");
04776 
04777   uint64_t Index =
04778     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04779 
04780   MVT VecVT = N->getSimpleValueType(0);
04781   MVT ElVT = VecVT.getVectorElementType();
04782 
04783   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04784   return Index / NumElemsPerChunk;
04785 }
04786 
04787 /// getExtractVEXTRACT128Immediate - Return the appropriate immediate
04788 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
04789 /// and VINSERTI128 instructions.
04790 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
04791   return getExtractVEXTRACTImmediate(N, 128);
04792 }
04793 
04794 /// getExtractVEXTRACT256Immediate - Return the appropriate immediate
04795 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
04796 /// and VINSERTI64x4 instructions.
04797 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
04798   return getExtractVEXTRACTImmediate(N, 256);
04799 }
04800 
04801 /// getInsertVINSERT128Immediate - Return the appropriate immediate
04802 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
04803 /// and VINSERTI128 instructions.
04804 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
04805   return getInsertVINSERTImmediate(N, 128);
04806 }
04807 
04808 /// getInsertVINSERT256Immediate - Return the appropriate immediate
04809 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
04810 /// and VINSERTI64x4 instructions.
04811 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
04812   return getInsertVINSERTImmediate(N, 256);
04813 }
04814 
04815 /// isZero - Returns true if Elt is a constant integer zero
04816 static bool isZero(SDValue V) {
04817   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
04818   return C && C->isNullValue();
04819 }
04820 
04821 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
04822 /// constant +0.0.
04823 bool X86::isZeroNode(SDValue Elt) {
04824   if (isZero(Elt))
04825     return true;
04826   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
04827     return CFP->getValueAPF().isPosZero();
04828   return false;
04829 }
04830 
04831 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
04832 /// match movhlps. The lower half elements should come from upper half of
04833 /// V1 (and in order), and the upper half elements should come from the upper
04834 /// half of V2 (and in order).
04835 static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
04836   if (!VT.is128BitVector())
04837     return false;
04838   if (VT.getVectorNumElements() != 4)
04839     return false;
04840   for (unsigned i = 0, e = 2; i != e; ++i)
04841     if (!isUndefOrEqual(Mask[i], i+2))
04842       return false;
04843   for (unsigned i = 2; i != 4; ++i)
04844     if (!isUndefOrEqual(Mask[i], i+4))
04845       return false;
04846   return true;
04847 }
04848 
04849 /// isScalarLoadToVector - Returns true if the node is a scalar load that
04850 /// is promoted to a vector. It also returns the LoadSDNode by reference if
04851 /// required.
04852 static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
04853   if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
04854     return false;
04855   N = N->getOperand(0).getNode();
04856   if (!ISD::isNON_EXTLoad(N))
04857     return false;
04858   if (LD)
04859     *LD = cast<LoadSDNode>(N);
04860   return true;
04861 }
04862 
04863 // Test whether the given value is a vector value which will be legalized
04864 // into a load.
04865 static bool WillBeConstantPoolLoad(SDNode *N) {
04866   if (N->getOpcode() != ISD::BUILD_VECTOR)
04867     return false;
04868 
04869   // Check for any non-constant elements.
04870   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
04871     switch (N->getOperand(i).getNode()->getOpcode()) {
04872     case ISD::UNDEF:
04873     case ISD::ConstantFP:
04874     case ISD::Constant:
04875       break;
04876     default:
04877       return false;
04878     }
04879 
04880   // Vectors of all-zeros and all-ones are materialized with special
04881   // instructions rather than being loaded.
04882   return !ISD::isBuildVectorAllZeros(N) &&
04883          !ISD::isBuildVectorAllOnes(N);
04884 }
04885 
04886 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
04887 /// match movlp{s|d}. The lower half elements should come from lower half of
04888 /// V1 (and in order), and the upper half elements should come from the upper
04889 /// half of V2 (and in order). And since V1 will become the source of the
04890 /// MOVLP, it must be either a vector load or a scalar load to vector.
04891 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
04892                                ArrayRef<int> Mask, MVT VT) {
04893   if (!VT.is128BitVector())
04894     return false;
04895 
04896   if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
04897     return false;
04898   // Is V2 is a vector load, don't do this transformation. We will try to use
04899   // load folding shufps op.
04900   if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
04901     return false;
04902 
04903   unsigned NumElems = VT.getVectorNumElements();
04904 
04905   if (NumElems != 2 && NumElems != 4)
04906     return false;
04907   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04908     if (!isUndefOrEqual(Mask[i], i))
04909       return false;
04910   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
04911     if (!isUndefOrEqual(Mask[i], i+NumElems))
04912       return false;
04913   return true;
04914 }
04915 
04916 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
04917 /// to an zero vector.
04918 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
04919 static bool isZeroShuffle(ShuffleVectorSDNode *N) {
04920   SDValue V1 = N->getOperand(0);
04921   SDValue V2 = N->getOperand(1);
04922   unsigned NumElems = N->getValueType(0).getVectorNumElements();
04923   for (unsigned i = 0; i != NumElems; ++i) {
04924     int Idx = N->getMaskElt(i);
04925     if (Idx >= (int)NumElems) {
04926       unsigned Opc = V2.getOpcode();
04927       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
04928         continue;
04929       if (Opc != ISD::BUILD_VECTOR ||
04930           !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
04931         return false;
04932     } else if (Idx >= 0) {
04933       unsigned Opc = V1.getOpcode();
04934       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
04935         continue;
04936       if (Opc != ISD::BUILD_VECTOR ||
04937           !X86::isZeroNode(V1.getOperand(Idx)))
04938         return false;
04939     }
04940   }
04941   return true;
04942 }
04943 
04944 /// getZeroVector - Returns a vector of specified type with all zero elements.
04945 ///
04946 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
04947                              SelectionDAG &DAG, SDLoc dl) {
04948   assert(VT.isVector() && "Expected a vector type");
04949 
04950   // Always build SSE zero vectors as <4 x i32> bitcasted
04951   // to their dest type. This ensures they get CSE'd.
04952   SDValue Vec;
04953   if (VT.is128BitVector()) {  // SSE
04954     if (Subtarget->hasSSE2()) {  // SSE2
04955       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
04956       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04957     } else { // SSE1
04958       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
04959       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
04960     }
04961   } else if (VT.is256BitVector()) { // AVX
04962     if (Subtarget->hasInt256()) { // AVX2
04963       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
04964       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04965       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04966     } else {
04967       // 256-bit logic and arithmetic instructions in AVX are all
04968       // floating-point, no support for integer ops. Emit fp zeroed vectors.
04969       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
04970       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04971       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
04972     }
04973   } else if (VT.is512BitVector()) { // AVX-512
04974       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
04975       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
04976                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04977       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
04978   } else if (VT.getScalarType() == MVT::i1) {
04979     assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
04980     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
04981     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
04982     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
04983   } else
04984     llvm_unreachable("Unexpected vector type");
04985 
04986   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
04987 }
04988 
04989 /// getOnesVector - Returns a vector of specified type with all bits set.
04990 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
04991 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
04992 /// Then bitcast to their original type, ensuring they get CSE'd.
04993 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
04994                              SDLoc dl) {
04995   assert(VT.isVector() && "Expected a vector type");
04996 
04997   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
04998   SDValue Vec;
04999   if (VT.is256BitVector()) {
05000     if (HasInt256) { // AVX2
05001       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05002       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
05003     } else { // AVX
05004       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05005       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
05006     }
05007   } else if (VT.is128BitVector()) {
05008     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05009   } else
05010     llvm_unreachable("Unexpected vector type");
05011 
05012   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
05013 }
05014 
05015 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
05016 /// that point to V2 points to its first element.
05017 static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
05018   for (unsigned i = 0; i != NumElems; ++i) {
05019     if (Mask[i] > (int)NumElems) {
05020       Mask[i] = NumElems;
05021     }
05022   }
05023 }
05024 
05025 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
05026 /// operation of specified width.
05027 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
05028                        SDValue V2) {
05029   unsigned NumElems = VT.getVectorNumElements();
05030   SmallVector<int, 8> Mask;
05031   Mask.push_back(NumElems);
05032   for (unsigned i = 1; i != NumElems; ++i)
05033     Mask.push_back(i);
05034   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05035 }
05036 
05037 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
05038 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05039                           SDValue V2) {
05040   unsigned NumElems = VT.getVectorNumElements();
05041   SmallVector<int, 8> Mask;
05042   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
05043     Mask.push_back(i);
05044     Mask.push_back(i + NumElems);
05045   }
05046   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05047 }
05048 
05049 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
05050 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05051                           SDValue V2) {
05052   unsigned NumElems = VT.getVectorNumElements();
05053   SmallVector<int, 8> Mask;
05054   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
05055     Mask.push_back(i + Half);
05056     Mask.push_back(i + NumElems + Half);
05057   }
05058   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05059 }
05060 
05061 // PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
05062 // a generic shuffle instruction because the target has no such instructions.
05063 // Generate shuffles which repeat i16 and i8 several times until they can be
05064 // represented by v4f32 and then be manipulated by target suported shuffles.
05065 static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
05066   MVT VT = V.getSimpleValueType();
05067   int NumElems = VT.getVectorNumElements();
05068   SDLoc dl(V);
05069 
05070   while (NumElems > 4) {
05071     if (EltNo < NumElems/2) {
05072       V = getUnpackl(DAG, dl, VT, V, V);
05073     } else {
05074       V = getUnpackh(DAG, dl, VT, V, V);
05075       EltNo -= NumElems/2;
05076     }
05077     NumElems >>= 1;
05078   }
05079   return V;
05080 }
05081 
05082 /// getLegalSplat - Generate a legal splat with supported x86 shuffles
05083 static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
05084   MVT VT = V.getSimpleValueType();
05085   SDLoc dl(V);
05086 
05087   if (VT.is128BitVector()) {
05088     V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
05089     int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
05090     V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
05091                              &SplatMask[0]);
05092   } else if (VT.is256BitVector()) {
05093     // To use VPERMILPS to splat scalars, the second half of indicies must
05094     // refer to the higher part, which is a duplication of the lower one,
05095     // because VPERMILPS can only handle in-lane permutations.
05096     int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
05097                          EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
05098 
05099     V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
05100     V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
05101                              &SplatMask[0]);
05102   } else
05103     llvm_unreachable("Vector size not supported");
05104 
05105   return DAG.getNode(ISD::BITCAST, dl, VT, V);
05106 }
05107 
05108 /// PromoteSplat - Splat is promoted to target supported vector shuffles.
05109 static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
05110   MVT SrcVT = SV->getSimpleValueType(0);
05111   SDValue V1 = SV->getOperand(0);
05112   SDLoc dl(SV);
05113 
05114   int EltNo = SV->getSplatIndex();
05115   int NumElems = SrcVT.getVectorNumElements();
05116   bool Is256BitVec = SrcVT.is256BitVector();
05117 
05118   assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
05119          "Unknown how to promote splat for type");
05120 
05121   // Extract the 128-bit part containing the splat element and update
05122   // the splat element index when it refers to the higher register.
05123   if (Is256BitVec) {
05124     V1 = Extract128BitVector(V1, EltNo, DAG, dl);
05125     if (EltNo >= NumElems/2)
05126       EltNo -= NumElems/2;
05127   }
05128 
05129   // All i16 and i8 vector types can't be used directly by a generic shuffle
05130   // instruction because the target has no such instruction. Generate shuffles
05131   // which repeat i16 and i8 several times until they fit in i32, and then can
05132   // be manipulated by target suported shuffles.
05133   MVT EltVT = SrcVT.getVectorElementType();
05134   if (EltVT == MVT::i8 || EltVT == MVT::i16)
05135     V1 = PromoteSplati8i16(V1, DAG, EltNo);
05136 
05137   // Recreate the 256-bit vector and place the same 128-bit vector
05138   // into the low and high part. This is necessary because we want
05139   // to use VPERM* to shuffle the vectors
05140   if (Is256BitVec) {
05141     V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
05142   }
05143 
05144   return getLegalSplat(DAG, V1, EltNo);
05145 }
05146 
05147 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
05148 /// vector of zero or undef vector.  This produces a shuffle where the low
05149 /// element of V2 is swizzled into the zero/undef vector, landing at element
05150 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
05151 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
05152                                            bool IsZero,
05153                                            const X86Subtarget *Subtarget,
05154                                            SelectionDAG &DAG) {
05155   MVT VT = V2.getSimpleValueType();
05156   SDValue V1 = IsZero
05157     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
05158   unsigned NumElems = VT.getVectorNumElements();
05159   SmallVector<int, 16> MaskVec;
05160   for (unsigned i = 0; i != NumElems; ++i)
05161     // If this is the insertion idx, put the low elt of V2 here.
05162     MaskVec.push_back(i == Idx ? NumElems : i);
05163   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
05164 }
05165 
05166 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
05167 /// target specific opcode. Returns true if the Mask could be calculated. Sets
05168 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
05169 /// shuffles which use a single input multiple times, and in those cases it will
05170 /// adjust the mask to only have indices within that single input.
05171 static bool getTargetShuffleMask(SDNode *N, MVT VT,
05172                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
05173   unsigned NumElems = VT.getVectorNumElements();
05174   SDValue ImmN;
05175 
05176   IsUnary = false;
05177   bool IsFakeUnary = false;
05178   switch(N->getOpcode()) {
05179   case X86ISD::SHUFP:
05180     ImmN = N->getOperand(N->getNumOperands()-1);
05181     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05182     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05183     break;
05184   case X86ISD::UNPCKH:
05185     DecodeUNPCKHMask(VT, Mask);
05186     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05187     break;
05188   case X86ISD::UNPCKL:
05189     DecodeUNPCKLMask(VT, Mask);
05190     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05191     break;
05192   case X86ISD::MOVHLPS:
05193     DecodeMOVHLPSMask(NumElems, Mask);
05194     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05195     break;
05196   case X86ISD::MOVLHPS:
05197     DecodeMOVLHPSMask(NumElems, Mask);
05198     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05199     break;
05200   case X86ISD::PALIGNR:
05201     ImmN = N->getOperand(N->getNumOperands()-1);
05202     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05203     break;
05204   case X86ISD::PSHUFD:
05205   case X86ISD::VPERMILP:
05206     ImmN = N->getOperand(N->getNumOperands()-1);
05207     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05208     IsUnary = true;
05209     break;
05210   case X86ISD::PSHUFHW:
05211     ImmN = N->getOperand(N->getNumOperands()-1);
05212     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05213     IsUnary = true;
05214     break;
05215   case X86ISD::PSHUFLW:
05216     ImmN = N->getOperand(N->getNumOperands()-1);
05217     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05218     IsUnary = true;
05219     break;
05220   case X86ISD::PSHUFB: {
05221     IsUnary = true;
05222     SDValue MaskNode = N->getOperand(1);
05223     while (MaskNode->getOpcode() == ISD::BITCAST)
05224       MaskNode = MaskNode->getOperand(0);
05225 
05226     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
05227       // If we have a build-vector, then things are easy.
05228       EVT VT = MaskNode.getValueType();
05229       assert(VT.isVector() &&
05230              "Can't produce a non-vector with a build_vector!");
05231       if (!VT.isInteger())
05232         return false;
05233 
05234       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
05235 
05236       SmallVector<uint64_t, 32> RawMask;
05237       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
05238         auto *CN = dyn_cast<ConstantSDNode>(MaskNode->getOperand(i));
05239         if (!CN)
05240           return false;
05241         APInt MaskElement = CN->getAPIntValue();
05242 
05243         // We now have to decode the element which could be any integer size and
05244         // extract each byte of it.
05245         for (int j = 0; j < NumBytesPerElement; ++j) {
05246           // Note that this is x86 and so always little endian: the low byte is
05247           // the first byte of the mask.
05248           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
05249           MaskElement = MaskElement.lshr(8);
05250         }
05251       }
05252       DecodePSHUFBMask(RawMask, Mask);
05253       break;
05254     }
05255 
05256     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
05257     if (!MaskLoad)
05258       return false;
05259 
05260     SDValue Ptr = MaskLoad->getBasePtr();
05261     if (Ptr->getOpcode() == X86ISD::Wrapper)
05262       Ptr = Ptr->getOperand(0);
05263 
05264     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
05265     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
05266       return false;
05267 
05268     if (auto *C = dyn_cast<ConstantDataSequential>(MaskCP->getConstVal())) {
05269       // FIXME: Support AVX-512 here.
05270       if (!C->getType()->isVectorTy() ||
05271           (C->getNumElements() != 16 && C->getNumElements() != 32))
05272         return false;
05273 
05274       assert(C->getType()->isVectorTy() && "Expected a vector constant.");
05275       DecodePSHUFBMask(C, Mask);
05276       break;
05277     }
05278 
05279     return false;
05280   }
05281   case X86ISD::VPERMI:
05282     ImmN = N->getOperand(N->getNumOperands()-1);
05283     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05284     IsUnary = true;
05285     break;
05286   case X86ISD::MOVSS:
05287   case X86ISD::MOVSD: {
05288     // The index 0 always comes from the first element of the second source,
05289     // this is why MOVSS and MOVSD are used in the first place. The other
05290     // elements come from the other positions of the first source vector
05291     Mask.push_back(NumElems);
05292     for (unsigned i = 1; i != NumElems; ++i) {
05293       Mask.push_back(i);
05294     }
05295     break;
05296   }
05297   case X86ISD::VPERM2X128:
05298     ImmN = N->getOperand(N->getNumOperands()-1);
05299     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05300     if (Mask.empty()) return false;
05301     break;
05302   case X86ISD::MOVDDUP:
05303   case X86ISD::MOVLHPD:
05304   case X86ISD::MOVLPD:
05305   case X86ISD::MOVLPS:
05306   case X86ISD::MOVSHDUP:
05307   case X86ISD::MOVSLDUP:
05308     // Not yet implemented
05309     return false;
05310   default: llvm_unreachable("unknown target shuffle node");
05311   }
05312 
05313   // If we have a fake unary shuffle, the shuffle mask is spread across two
05314   // inputs that are actually the same node. Re-map the mask to always point
05315   // into the first input.
05316   if (IsFakeUnary)
05317     for (int &M : Mask)
05318       if (M >= (int)Mask.size())
05319         M -= Mask.size();
05320 
05321   return true;
05322 }
05323 
05324 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
05325 /// element of the result of the vector shuffle.
05326 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
05327                                    unsigned Depth) {
05328   if (Depth == 6)
05329     return SDValue();  // Limit search depth.
05330 
05331   SDValue V = SDValue(N, 0);
05332   EVT VT = V.getValueType();
05333   unsigned Opcode = V.getOpcode();
05334 
05335   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
05336   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
05337     int Elt = SV->getMaskElt(Index);
05338 
05339     if (Elt < 0)
05340       return DAG.getUNDEF(VT.getVectorElementType());
05341 
05342     unsigned NumElems = VT.getVectorNumElements();
05343     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
05344                                          : SV->getOperand(1);
05345     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
05346   }
05347 
05348   // Recurse into target specific vector shuffles to find scalars.
05349   if (isTargetShuffle(Opcode)) {
05350     MVT ShufVT = V.getSimpleValueType();
05351     unsigned NumElems = ShufVT.getVectorNumElements();
05352     SmallVector<int, 16> ShuffleMask;
05353     bool IsUnary;
05354 
05355     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
05356       return SDValue();
05357 
05358     int Elt = ShuffleMask[Index];
05359     if (Elt < 0)
05360       return DAG.getUNDEF(ShufVT.getVectorElementType());
05361 
05362     SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
05363                                          : N->getOperand(1);
05364     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
05365                                Depth+1);
05366   }
05367 
05368   // Actual nodes that may contain scalar elements
05369   if (Opcode == ISD::BITCAST) {
05370     V = V.getOperand(0);
05371     EVT SrcVT = V.getValueType();
05372     unsigned NumElems = VT.getVectorNumElements();
05373 
05374     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
05375       return SDValue();
05376   }
05377 
05378   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
05379     return (Index == 0) ? V.getOperand(0)
05380                         : DAG.getUNDEF(VT.getVectorElementType());
05381 
05382   if (V.getOpcode() == ISD::BUILD_VECTOR)
05383     return V.getOperand(Index);
05384 
05385   return SDValue();
05386 }
05387 
05388 /// getNumOfConsecutiveZeros - Return the number of elements of a vector
05389 /// shuffle operation which come from a consecutively from a zero. The
05390 /// search can start in two different directions, from left or right.
05391 /// We count undefs as zeros until PreferredNum is reached.
05392 static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
05393                                          unsigned NumElems, bool ZerosFromLeft,
05394                                          SelectionDAG &DAG,
05395                                          unsigned PreferredNum = -1U) {
05396   unsigned NumZeros = 0;
05397   for (unsigned i = 0; i != NumElems; ++i) {
05398     unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
05399     SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
05400     if (!Elt.getNode())
05401       break;
05402 
05403     if (X86::isZeroNode(Elt))
05404       ++NumZeros;
05405     else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
05406       NumZeros = std::min(NumZeros + 1, PreferredNum);
05407     else
05408       break;
05409   }
05410 
05411   return NumZeros;
05412 }
05413 
05414 /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
05415 /// correspond consecutively to elements from one of the vector operands,
05416 /// starting from its index OpIdx. Also tell OpNum which source vector operand.
05417 static
05418 bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
05419                               unsigned MaskI, unsigned MaskE, unsigned OpIdx,
05420                               unsigned NumElems, unsigned &OpNum) {
05421   bool SeenV1 = false;
05422   bool SeenV2 = false;
05423 
05424   for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
05425     int Idx = SVOp->getMaskElt(i);
05426     // Ignore undef indicies
05427     if (Idx < 0)
05428       continue;
05429 
05430     if (Idx < (int)NumElems)
05431       SeenV1 = true;
05432     else
05433       SeenV2 = true;
05434 
05435     // Only accept consecutive elements from the same vector
05436     if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
05437       return false;
05438   }
05439 
05440   OpNum = SeenV1 ? 0 : 1;
05441   return true;
05442 }
05443 
05444 /// isVectorShiftRight - Returns true if the shuffle can be implemented as a
05445 /// logical left shift of a vector.
05446 static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05447                                bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05448   unsigned NumElems =
05449     SVOp->getSimpleValueType(0).getVectorNumElements();
05450   unsigned NumZeros = getNumOfConsecutiveZeros(
05451       SVOp, NumElems, false /* check zeros from right */, DAG,
05452       SVOp->getMaskElt(0));
05453   unsigned OpSrc;
05454 
05455   if (!NumZeros)
05456     return false;
05457 
05458   // Considering the elements in the mask that are not consecutive zeros,
05459   // check if they consecutively come from only one of the source vectors.
05460   //
05461   //               V1 = {X, A, B, C}     0
05462   //                         \  \  \    /
05463   //   vector_shuffle V1, V2 <1, 2, 3, X>
05464   //
05465   if (!isShuffleMaskConsecutive(SVOp,
05466             0,                   // Mask Start Index
05467             NumElems-NumZeros,   // Mask End Index(exclusive)
05468             NumZeros,            // Where to start looking in the src vector
05469             NumElems,            // Number of elements in vector
05470             OpSrc))              // Which source operand ?
05471     return false;
05472 
05473   isLeft = false;
05474   ShAmt = NumZeros;
05475   ShVal = SVOp->getOperand(OpSrc);
05476   return true;
05477 }
05478 
05479 /// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
05480 /// logical left shift of a vector.
05481 static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05482                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05483   unsigned NumElems =
05484     SVOp->getSimpleValueType(0).getVectorNumElements();
05485   unsigned NumZeros = getNumOfConsecutiveZeros(
05486       SVOp, NumElems, true /* check zeros from left */, DAG,
05487       NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
05488   unsigned OpSrc;
05489 
05490   if (!NumZeros)
05491     return false;
05492 
05493   // Considering the elements in the mask that are not consecutive zeros,
05494   // check if they consecutively come from only one of the source vectors.
05495   //
05496   //                           0    { A, B, X, X } = V2
05497   //                          / \    /  /
05498   //   vector_shuffle V1, V2 <X, X, 4, 5>
05499   //
05500   if (!isShuffleMaskConsecutive(SVOp,
05501             NumZeros,     // Mask Start Index
05502             NumElems,     // Mask End Index(exclusive)
05503             0,            // Where to start looking in the src vector
05504             NumElems,     // Number of elements in vector
05505             OpSrc))       // Which source operand ?
05506     return false;
05507 
05508   isLeft = true;
05509   ShAmt = NumZeros;
05510   ShVal = SVOp->getOperand(OpSrc);
05511   return true;
05512 }
05513 
05514 /// isVectorShift - Returns true if the shuffle can be implemented as a
05515 /// logical left or right shift of a vector.
05516 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05517                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05518   // Although the logic below support any bitwidth size, there are no
05519   // shift instructions which handle more than 128-bit vectors.
05520   if (!SVOp->getSimpleValueType(0).is128BitVector())
05521     return false;
05522 
05523   if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
05524       isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
05525     return true;
05526 
05527   return false;
05528 }
05529 
05530 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
05531 ///
05532 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
05533                                        unsigned NumNonZero, unsigned NumZero,
05534                                        SelectionDAG &DAG,
05535                                        const X86Subtarget* Subtarget,
05536                                        const TargetLowering &TLI) {
05537   if (NumNonZero > 8)
05538     return SDValue();
05539 
05540   SDLoc dl(Op);
05541   SDValue V;
05542   bool First = true;
05543   for (unsigned i = 0; i < 16; ++i) {
05544     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
05545     if (ThisIsNonZero && First) {
05546       if (NumZero)
05547         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05548       else
05549         V = DAG.getUNDEF(MVT::v8i16);
05550       First = false;
05551     }
05552 
05553     if ((i & 1) != 0) {
05554       SDValue ThisElt, LastElt;
05555       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
05556       if (LastIsNonZero) {
05557         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
05558                               MVT::i16, Op.getOperand(i-1));
05559       }
05560       if (ThisIsNonZero) {
05561         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
05562         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
05563                               ThisElt, DAG.getConstant(8, MVT::i8));
05564         if (LastIsNonZero)
05565           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
05566       } else
05567         ThisElt = LastElt;
05568 
05569       if (ThisElt.getNode())
05570         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
05571                         DAG.getIntPtrConstant(i/2));
05572     }
05573   }
05574 
05575   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
05576 }
05577 
05578 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
05579 ///
05580 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
05581                                      unsigned NumNonZero, unsigned NumZero,
05582                                      SelectionDAG &DAG,
05583                                      const X86Subtarget* Subtarget,
05584                                      const TargetLowering &TLI) {
05585   if (NumNonZero > 4)
05586     return SDValue();
05587 
05588   SDLoc dl(Op);
05589   SDValue V;
05590   bool First = true;
05591   for (unsigned i = 0; i < 8; ++i) {
05592     bool isNonZero = (NonZeros & (1 << i)) != 0;
05593     if (isNonZero) {
05594       if (First) {
05595         if (NumZero)
05596           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05597         else
05598           V = DAG.getUNDEF(MVT::v8i16);
05599         First = false;
05600       }
05601       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
05602                       MVT::v8i16, V, Op.getOperand(i),
05603                       DAG.getIntPtrConstant(i));
05604     }
05605   }
05606 
05607   return V;
05608 }
05609 
05610 /// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
05611 static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
05612                                      unsigned NonZeros, unsigned NumNonZero,
05613                                      unsigned NumZero, SelectionDAG &DAG,
05614                                      const X86Subtarget *Subtarget,
05615                                      const TargetLowering &TLI) {
05616   // We know there's at least one non-zero element
05617   unsigned FirstNonZeroIdx = 0;
05618   SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05619   while (FirstNonZero.getOpcode() == ISD::UNDEF ||
05620          X86::isZeroNode(FirstNonZero)) {
05621     ++FirstNonZeroIdx;
05622     FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05623   }
05624 
05625   if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05626       !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
05627     return SDValue();
05628 
05629   SDValue V = FirstNonZero.getOperand(0);
05630   MVT VVT = V.getSimpleValueType();
05631   if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
05632     return SDValue();
05633 
05634   unsigned FirstNonZeroDst =
05635       cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
05636   unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
05637   unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
05638   unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
05639 
05640   for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
05641     SDValue Elem = Op.getOperand(Idx);
05642     if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
05643       continue;
05644 
05645     // TODO: What else can be here? Deal with it.
05646     if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
05647       return SDValue();
05648 
05649     // TODO: Some optimizations are still possible here
05650     // ex: Getting one element from a vector, and the rest from another.
05651     if (Elem.getOperand(0) != V)
05652       return SDValue();
05653 
05654     unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
05655     if (Dst == Idx)
05656       ++CorrectIdx;
05657     else if (IncorrectIdx == -1U) {
05658       IncorrectIdx = Idx;
05659       IncorrectDst = Dst;
05660     } else
05661       // There was already one element with an incorrect index.
05662       // We can't optimize this case to an insertps.
05663       return SDValue();
05664   }
05665 
05666   if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
05667     SDLoc dl(Op);
05668     EVT VT = Op.getSimpleValueType();
05669     unsigned ElementMoveMask = 0;
05670     if (IncorrectIdx == -1U)
05671       ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
05672     else
05673       ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
05674 
05675     SDValue InsertpsMask =
05676         DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
05677     return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
05678   }
05679 
05680   return SDValue();
05681 }
05682 
05683 /// getVShift - Return a vector logical shift node.
05684 ///
05685 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
05686                          unsigned NumBits, SelectionDAG &DAG,
05687                          const TargetLowering &TLI, SDLoc dl) {
05688   assert(VT.is128BitVector() && "Unknown type for VShift");
05689   EVT ShVT = MVT::v2i64;
05690   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
05691   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
05692   return DAG.getNode(ISD::BITCAST, dl, VT,
05693                      DAG.getNode(Opc, dl, ShVT, SrcOp,
05694                              DAG.getConstant(NumBits,
05695                                   TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
05696 }
05697 
05698 static SDValue
05699 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
05700 
05701   // Check if the scalar load can be widened into a vector load. And if
05702   // the address is "base + cst" see if the cst can be "absorbed" into
05703   // the shuffle mask.
05704   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
05705     SDValue Ptr = LD->getBasePtr();
05706     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
05707       return SDValue();
05708     EVT PVT = LD->getValueType(0);
05709     if (PVT != MVT::i32 && PVT != MVT::f32)
05710       return SDValue();
05711 
05712     int FI = -1;
05713     int64_t Offset = 0;
05714     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
05715       FI = FINode->getIndex();
05716       Offset = 0;
05717     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
05718                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
05719       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
05720       Offset = Ptr.getConstantOperandVal(1);
05721       Ptr = Ptr.getOperand(0);
05722     } else {
05723       return SDValue();
05724     }
05725 
05726     // FIXME: 256-bit vector instructions don't require a strict alignment,
05727     // improve this code to support it better.
05728     unsigned RequiredAlign = VT.getSizeInBits()/8;
05729     SDValue Chain = LD->getChain();
05730     // Make sure the stack object alignment is at least 16 or 32.
05731     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
05732     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
05733       if (MFI->isFixedObjectIndex(FI)) {
05734         // Can't change the alignment. FIXME: It's possible to compute
05735         // the exact stack offset and reference FI + adjust offset instead.
05736         // If someone *really* cares about this. That's the way to implement it.
05737         return SDValue();
05738       } else {
05739         MFI->setObjectAlignment(FI, RequiredAlign);
05740       }
05741     }
05742 
05743     // (Offset % 16 or 32) must be multiple of 4. Then address is then
05744     // Ptr + (Offset & ~15).
05745     if (Offset < 0)
05746       return SDValue();
05747     if ((Offset % RequiredAlign) & 3)
05748       return SDValue();
05749     int64_t StartOffset = Offset & ~(RequiredAlign-1);
05750     if (StartOffset)
05751       Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
05752                         Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
05753 
05754     int EltNo = (Offset - StartOffset) >> 2;
05755     unsigned NumElems = VT.getVectorNumElements();
05756 
05757     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
05758     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
05759                              LD->getPointerInfo().getWithOffset(StartOffset),
05760                              false, false, false, 0);
05761 
05762     SmallVector<int, 8> Mask;
05763     for (unsigned i = 0; i != NumElems; ++i)
05764       Mask.push_back(EltNo);
05765 
05766     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
05767   }
05768 
05769   return SDValue();
05770 }
05771 
05772 /// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
05773 /// vector of type 'VT', see if the elements can be replaced by a single large
05774 /// load which has the same value as a build_vector whose operands are 'elts'.
05775 ///
05776 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
05777 ///
05778 /// FIXME: we'd also like to handle the case where the last elements are zero
05779 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
05780 /// There's even a handy isZeroNode for that purpose.
05781 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
05782                                         SDLoc &DL, SelectionDAG &DAG,
05783                                         bool isAfterLegalize) {
05784   EVT EltVT = VT.getVectorElementType();
05785   unsigned NumElems = Elts.size();
05786 
05787   LoadSDNode *LDBase = nullptr;
05788   unsigned LastLoadedElt = -1U;
05789 
05790   // For each element in the initializer, see if we've found a load or an undef.
05791   // If we don't find an initial load element, or later load elements are
05792   // non-consecutive, bail out.
05793   for (unsigned i = 0; i < NumElems; ++i) {
05794     SDValue Elt = Elts[i];
05795 
05796     if (!Elt.getNode() ||
05797         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
05798       return SDValue();
05799     if (!LDBase) {
05800       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
05801         return SDValue();
05802       LDBase = cast<LoadSDNode>(Elt.getNode());
05803       LastLoadedElt = i;
05804       continue;
05805     }
05806     if (Elt.getOpcode() == ISD::UNDEF)
05807       continue;
05808 
05809     LoadSDNode *LD = cast<LoadSDNode>(Elt);
05810     if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
05811       return SDValue();
05812     LastLoadedElt = i;
05813   }
05814 
05815   // If we have found an entire vector of loads and undefs, then return a large
05816   // load of the entire vector width starting at the base pointer.  If we found
05817   // consecutive loads for the low half, generate a vzext_load node.
05818   if (LastLoadedElt == NumElems - 1) {
05819 
05820     if (isAfterLegalize &&
05821         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
05822       return SDValue();
05823 
05824     SDValue NewLd = SDValue();
05825 
05826     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
05827       NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05828                           LDBase->getPointerInfo(),
05829                           LDBase->isVolatile(), LDBase->isNonTemporal(),
05830                           LDBase->isInvariant(), 0);
05831     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05832                         LDBase->getPointerInfo(),
05833                         LDBase->isVolatile(), LDBase->isNonTemporal(),
05834                         LDBase->isInvariant(), LDBase->getAlignment());
05835 
05836     if (LDBase->hasAnyUseOfValue(1)) {
05837       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05838                                      SDValue(LDBase, 1),
05839                                      SDValue(NewLd.getNode(), 1));
05840       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05841       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05842                              SDValue(NewLd.getNode(), 1));
05843     }
05844 
05845     return NewLd;
05846   }
05847   if (NumElems == 4 && LastLoadedElt == 1 &&
05848       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
05849     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
05850     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
05851     SDValue ResNode =
05852         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
05853                                 LDBase->getPointerInfo(),
05854                                 LDBase->getAlignment(),
05855                                 false/*isVolatile*/, true/*ReadMem*/,
05856                                 false/*WriteMem*/);
05857 
05858     // Make sure the newly-created LOAD is in the same position as LDBase in
05859     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
05860     // update uses of LDBase's output chain to use the TokenFactor.
05861     if (LDBase->hasAnyUseOfValue(1)) {
05862       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05863                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
05864       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05865       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05866                              SDValue(ResNode.getNode(), 1));
05867     }
05868 
05869     return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
05870   }
05871   return SDValue();
05872 }
05873 
05874 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
05875 /// to generate a splat value for the following cases:
05876 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
05877 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
05878 /// a scalar load, or a constant.
05879 /// The VBROADCAST node is returned when a pattern is found,
05880 /// or SDValue() otherwise.
05881 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
05882                                     SelectionDAG &DAG) {
05883   if (!Subtarget->hasFp256())
05884     return SDValue();
05885 
05886   MVT VT = Op.getSimpleValueType();
05887   SDLoc dl(Op);
05888 
05889   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
05890          "Unsupported vector type for broadcast.");
05891 
05892   SDValue Ld;
05893   bool ConstSplatVal;
05894 
05895   switch (Op.getOpcode()) {
05896     default:
05897       // Unknown pattern found.
05898       return SDValue();
05899 
05900     case ISD::BUILD_VECTOR: {
05901       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
05902       BitVector UndefElements;
05903       SDValue Splat = BVOp->getSplatValue(&UndefElements);
05904 
05905       // We need a splat of a single value to use broadcast, and it doesn't
05906       // make any sense if the value is only in one element of the vector.
05907       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
05908         return SDValue();
05909 
05910       Ld = Splat;
05911       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05912                        Ld.getOpcode() == ISD::ConstantFP);
05913 
05914       // Make sure that all of the users of a non-constant load are from the
05915       // BUILD_VECTOR node.
05916       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
05917         return SDValue();
05918       break;
05919     }
05920 
05921     case ISD::VECTOR_SHUFFLE: {
05922       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
05923 
05924       // Shuffles must have a splat mask where the first element is
05925       // broadcasted.
05926       if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
05927         return SDValue();
05928 
05929       SDValue Sc = Op.getOperand(0);
05930       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
05931           Sc.getOpcode() != ISD::BUILD_VECTOR) {
05932 
05933         if (!Subtarget->hasInt256())
05934           return SDValue();
05935 
05936         // Use the register form of the broadcast instruction available on AVX2.
05937         if (VT.getSizeInBits() >= 256)
05938           Sc = Extract128BitVector(Sc, 0, DAG, dl);
05939         return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
05940       }
05941 
05942       Ld = Sc.getOperand(0);
05943       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05944                        Ld.getOpcode() == ISD::ConstantFP);
05945 
05946       // The scalar_to_vector node and the suspected
05947       // load node must have exactly one user.
05948       // Constants may have multiple users.
05949 
05950       // AVX-512 has register version of the broadcast
05951       bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
05952         Ld.getValueType().getSizeInBits() >= 32;
05953       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
05954           !hasRegVer))
05955         return SDValue();
05956       break;
05957     }
05958   }
05959 
05960   bool IsGE256 = (VT.getSizeInBits() >= 256);
05961 
05962   // Handle the broadcasting a single constant scalar from the constant pool
05963   // into a vector. On Sandybridge it is still better to load a constant vector
05964   // from the constant pool and not to broadcast it from a scalar.
05965   if (ConstSplatVal && Subtarget->hasInt256()) {
05966     EVT CVT = Ld.getValueType();
05967     assert(!CVT.isVector() && "Must not broadcast a vector type");
05968     unsigned ScalarSize = CVT.getSizeInBits();
05969 
05970     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
05971       const Constant *C = nullptr;
05972       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
05973         C = CI->getConstantIntValue();
05974       else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
05975         C = CF->getConstantFPValue();
05976 
05977       assert(C && "Invalid constant type");
05978 
05979       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
05980       SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
05981       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
05982       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
05983                        MachinePointerInfo::getConstantPool(),
05984                        false, false, false, Alignment);
05985 
05986       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05987     }
05988   }
05989 
05990   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
05991   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
05992 
05993   // Handle AVX2 in-register broadcasts.
05994   if (!IsLoad && Subtarget->hasInt256() &&
05995       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
05996     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05997 
05998   // The scalar source must be a normal load.
05999   if (!IsLoad)
06000     return SDValue();
06001 
06002   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
06003     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06004 
06005   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
06006   // double since there is no vbroadcastsd xmm
06007   if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
06008     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
06009       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06010   }
06011 
06012   // Unsupported broadcast.
06013   return SDValue();
06014 }
06015 
06016 /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
06017 /// underlying vector and index.
06018 ///
06019 /// Modifies \p ExtractedFromVec to the real vector and returns the real
06020 /// index.
06021 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
06022                                          SDValue ExtIdx) {
06023   int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
06024   if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
06025     return Idx;
06026 
06027   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
06028   // lowered this:
06029   //   (extract_vector_elt (v8f32 %vreg1), Constant<6>)
06030   // to:
06031   //   (extract_vector_elt (vector_shuffle<2,u,u,u>
06032   //                           (extract_subvector (v8f32 %vreg0), Constant<4>),
06033   //                           undef)
06034   //                       Constant<0>)
06035   // In this case the vector is the extract_subvector expression and the index
06036   // is 2, as specified by the shuffle.
06037   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
06038   SDValue ShuffleVec = SVOp->getOperand(0);
06039   MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
06040   assert(ShuffleVecVT.getVectorElementType() ==
06041          ExtractedFromVec.getSimpleValueType().getVectorElementType());
06042 
06043   int ShuffleIdx = SVOp->getMaskElt(Idx);
06044   if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
06045     ExtractedFromVec = ShuffleVec;
06046     return ShuffleIdx;
06047   }
06048   return Idx;
06049 }
06050 
06051 static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
06052   MVT VT = Op.getSimpleValueType();
06053 
06054   // Skip if insert_vec_elt is not supported.
06055   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06056   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
06057     return SDValue();
06058 
06059   SDLoc DL(Op);
06060   unsigned NumElems = Op.getNumOperands();
06061 
06062   SDValue VecIn1;
06063   SDValue VecIn2;
06064   SmallVector<unsigned, 4> InsertIndices;
06065   SmallVector<int, 8> Mask(NumElems, -1);
06066 
06067   for (unsigned i = 0; i != NumElems; ++i) {
06068     unsigned Opc = Op.getOperand(i).getOpcode();
06069 
06070     if (Opc == ISD::UNDEF)
06071       continue;
06072 
06073     if (Opc != ISD::EXTRACT_VECTOR_ELT) {
06074       // Quit if more than 1 elements need inserting.
06075       if (InsertIndices.size() > 1)
06076         return SDValue();
06077 
06078       InsertIndices.push_back(i);
06079       continue;
06080     }
06081 
06082     SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
06083     SDValue ExtIdx = Op.getOperand(i).getOperand(1);
06084     // Quit if non-constant index.
06085     if (!isa<ConstantSDNode>(ExtIdx))
06086       return SDValue();
06087     int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
06088 
06089     // Quit if extracted from vector of different type.
06090     if (ExtractedFromVec.getValueType() != VT)
06091       return SDValue();
06092 
06093     if (!VecIn1.getNode())
06094       VecIn1 = ExtractedFromVec;
06095     else if (VecIn1 != ExtractedFromVec) {
06096       if (!VecIn2.getNode())
06097         VecIn2 = ExtractedFromVec;
06098       else if (VecIn2 != ExtractedFromVec)
06099         // Quit if more than 2 vectors to shuffle
06100         return SDValue();
06101     }
06102 
06103     if (ExtractedFromVec == VecIn1)
06104       Mask[i] = Idx;
06105     else if (ExtractedFromVec == VecIn2)
06106       Mask[i] = Idx + NumElems;
06107   }
06108 
06109   if (!VecIn1.getNode())
06110     return SDValue();
06111 
06112   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
06113   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
06114   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
06115     unsigned Idx = InsertIndices[i];
06116     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
06117                      DAG.getIntPtrConstant(Idx));
06118   }
06119 
06120   return NV;
06121 }
06122 
06123 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
06124 SDValue
06125 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
06126 
06127   MVT VT = Op.getSimpleValueType();
06128   assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
06129          "Unexpected type in LowerBUILD_VECTORvXi1!");
06130 
06131   SDLoc dl(Op);
06132   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06133     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
06134     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06135     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06136   }
06137 
06138   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
06139     SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
06140     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06141     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06142   }
06143 
06144   bool AllContants = true;
06145   uint64_t Immediate = 0;
06146   int NonConstIdx = -1;
06147   bool IsSplat = true;
06148   unsigned NumNonConsts = 0;
06149   unsigned NumConsts = 0;
06150   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
06151     SDValue In = Op.getOperand(idx);
06152     if (In.getOpcode() == ISD::UNDEF)
06153       continue;
06154     if (!isa<ConstantSDNode>(In)) {
06155       AllContants = false;
06156       NonConstIdx = idx;
06157       NumNonConsts++;
06158     }
06159     else {
06160       NumConsts++;
06161       if (cast<ConstantSDNode>(In)->getZExtValue())
06162       Immediate |= (1ULL << idx);
06163     }
06164     if (In != Op.getOperand(0))
06165       IsSplat = false;
06166   }
06167 
06168   if (AllContants) {
06169     SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
06170       DAG.getConstant(Immediate, MVT::i16));
06171     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
06172                        DAG.getIntPtrConstant(0));
06173   }
06174 
06175   if (NumNonConsts == 1 && NonConstIdx != 0) {
06176     SDValue DstVec;
06177     if (NumConsts) {
06178       SDValue VecAsImm = DAG.getConstant(Immediate,
06179                                          MVT::getIntegerVT(VT.getSizeInBits()));
06180       DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
06181     }
06182     else 
06183       DstVec = DAG.getUNDEF(VT);
06184     return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
06185                        Op.getOperand(NonConstIdx),
06186                        DAG.getIntPtrConstant(NonConstIdx));
06187   }
06188   if (!IsSplat && (NonConstIdx != 0))
06189     llvm_unreachable("Unsupported BUILD_VECTOR operation");
06190   MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
06191   SDValue Select;
06192   if (IsSplat)
06193     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06194                           DAG.getConstant(-1, SelectVT),
06195                           DAG.getConstant(0, SelectVT));
06196   else
06197     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06198                          DAG.getConstant((Immediate | 1), SelectVT),
06199                          DAG.getConstant(Immediate, SelectVT));
06200   return DAG.getNode(ISD::BITCAST, dl, VT, Select);
06201 }
06202 
06203 /// \brief Return true if \p N implements a horizontal binop and return the
06204 /// operands for the horizontal binop into V0 and V1.
06205 /// 
06206 /// This is a helper function of PerformBUILD_VECTORCombine.
06207 /// This function checks that the build_vector \p N in input implements a
06208 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
06209 /// operation to match.
06210 /// For example, if \p Opcode is equal to ISD::ADD, then this function
06211 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
06212 /// is equal to ISD::SUB, then this function checks if this is a horizontal
06213 /// arithmetic sub.
06214 ///
06215 /// This function only analyzes elements of \p N whose indices are
06216 /// in range [BaseIdx, LastIdx).
06217 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
06218                               SelectionDAG &DAG,
06219                               unsigned BaseIdx, unsigned LastIdx,
06220                               SDValue &V0, SDValue &V1) {
06221   EVT VT = N->getValueType(0);
06222 
06223   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
06224   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
06225          "Invalid Vector in input!");
06226   
06227   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
06228   bool CanFold = true;
06229   unsigned ExpectedVExtractIdx = BaseIdx;
06230   unsigned NumElts = LastIdx - BaseIdx;
06231   V0 = DAG.getUNDEF(VT);
06232   V1 = DAG.getUNDEF(VT);
06233 
06234   // Check if N implements a horizontal binop.
06235   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
06236     SDValue Op = N->getOperand(i + BaseIdx);
06237 
06238     // Skip UNDEFs.
06239     if (Op->getOpcode() == ISD::UNDEF) {
06240       // Update the expected vector extract index.
06241       if (i * 2 == NumElts)
06242         ExpectedVExtractIdx = BaseIdx;
06243       ExpectedVExtractIdx += 2;
06244       continue;
06245     }
06246 
06247     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
06248 
06249     if (!CanFold)
06250       break;
06251 
06252     SDValue Op0 = Op.getOperand(0);
06253     SDValue Op1 = Op.getOperand(1);
06254 
06255     // Try to match the following pattern:
06256     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
06257     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06258         Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06259         Op0.getOperand(0) == Op1.getOperand(0) &&
06260         isa<ConstantSDNode>(Op0.getOperand(1)) &&
06261         isa<ConstantSDNode>(Op1.getOperand(1)));
06262     if (!CanFold)
06263       break;
06264 
06265     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06266     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
06267 
06268     if (i * 2 < NumElts) {
06269       if (V0.getOpcode() == ISD::UNDEF)
06270         V0 = Op0.getOperand(0);
06271     } else {
06272       if (V1.getOpcode() == ISD::UNDEF)
06273         V1 = Op0.getOperand(0);
06274       if (i * 2 == NumElts)
06275         ExpectedVExtractIdx = BaseIdx;
06276     }
06277 
06278     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
06279     if (I0 == ExpectedVExtractIdx)
06280       CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
06281     else if (IsCommutable && I1 == ExpectedVExtractIdx) {
06282       // Try to match the following dag sequence:
06283       // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
06284       CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
06285     } else
06286       CanFold = false;
06287 
06288     ExpectedVExtractIdx += 2;
06289   }
06290 
06291   return CanFold;
06292 }
06293 
06294 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
06295 /// a concat_vector. 
06296 ///
06297 /// This is a helper function of PerformBUILD_VECTORCombine.
06298 /// This function expects two 256-bit vectors called V0 and V1.
06299 /// At first, each vector is split into two separate 128-bit vectors.
06300 /// Then, the resulting 128-bit vectors are used to implement two
06301 /// horizontal binary operations. 
06302 ///
06303 /// The kind of horizontal binary operation is defined by \p X86Opcode.
06304 ///
06305 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
06306 /// the two new horizontal binop.
06307 /// When Mode is set, the first horizontal binop dag node would take as input
06308 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
06309 /// horizontal binop dag node would take as input the lower 128-bit of V1
06310 /// and the upper 128-bit of V1.
06311 ///   Example:
06312 ///     HADD V0_LO, V0_HI
06313 ///     HADD V1_LO, V1_HI
06314 ///
06315 /// Otherwise, the first horizontal binop dag node takes as input the lower
06316 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
06317 /// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
06318 ///   Example:
06319 ///     HADD V0_LO, V1_LO
06320 ///     HADD V0_HI, V1_HI
06321 ///
06322 /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
06323 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
06324 /// the upper 128-bits of the result.
06325 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
06326                                      SDLoc DL, SelectionDAG &DAG,
06327                                      unsigned X86Opcode, bool Mode,
06328                                      bool isUndefLO, bool isUndefHI) {
06329   EVT VT = V0.getValueType();
06330   assert(VT.is256BitVector() && VT == V1.getValueType() &&
06331          "Invalid nodes in input!");
06332 
06333   unsigned NumElts = VT.getVectorNumElements();
06334   SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
06335   SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
06336   SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
06337   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
06338   EVT NewVT = V0_LO.getValueType();
06339 
06340   SDValue LO = DAG.getUNDEF(NewVT);
06341   SDValue HI = DAG.getUNDEF(NewVT);
06342 
06343   if (Mode) {
06344     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06345     if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
06346       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
06347     if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
06348       HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
06349   } else {
06350     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06351     if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
06352                        V1_LO->getOpcode() != ISD::UNDEF))
06353       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
06354 
06355     if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
06356                        V1_HI->getOpcode() != ISD::UNDEF))
06357       HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
06358   }
06359 
06360   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
06361 }
06362 
06363 /// \brief Try to fold a build_vector that performs an 'addsub' into the
06364 /// sequence of 'vadd + vsub + blendi'.
06365 static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
06366                            const X86Subtarget *Subtarget) {
06367   SDLoc DL(BV);
06368   EVT VT = BV->getValueType(0);
06369   unsigned NumElts = VT.getVectorNumElements();
06370   SDValue InVec0 = DAG.getUNDEF(VT);
06371   SDValue InVec1 = DAG.getUNDEF(VT);
06372 
06373   assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
06374           VT == MVT::v2f64) && "build_vector with an invalid type found!");
06375 
06376   // Don't try to emit a VSELECT that cannot be lowered into a blend.
06377   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06378   if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
06379     return SDValue();
06380 
06381   // Odd-numbered elements in the input build vector are obtained from
06382   // adding two integer/float elements.
06383   // Even-numbered elements in the input build vector are obtained from
06384   // subtracting two integer/float elements.
06385   unsigned ExpectedOpcode = ISD::FSUB;
06386   unsigned NextExpectedOpcode = ISD::FADD;
06387   bool AddFound = false;
06388   bool SubFound = false;
06389 
06390   for (unsigned i = 0, e = NumElts; i != e; i++) {
06391     SDValue Op = BV->getOperand(i);
06392       
06393     // Skip 'undef' values.
06394     unsigned Opcode = Op.getOpcode();
06395     if (Opcode == ISD::UNDEF) {
06396       std::swap(ExpectedOpcode, NextExpectedOpcode);
06397       continue;
06398     }
06399       
06400     // Early exit if we found an unexpected opcode.
06401     if (Opcode != ExpectedOpcode)
06402       return SDValue();
06403 
06404     SDValue Op0 = Op.getOperand(0);
06405     SDValue Op1 = Op.getOperand(1);
06406 
06407     // Try to match the following pattern:
06408     // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
06409     // Early exit if we cannot match that sequence.
06410     if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06411         Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06412         !isa<ConstantSDNode>(Op0.getOperand(1)) ||
06413         !isa<ConstantSDNode>(Op1.getOperand(1)) ||
06414         Op0.getOperand(1) != Op1.getOperand(1))
06415       return SDValue();
06416 
06417     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06418     if (I0 != i)
06419       return SDValue();
06420 
06421     // We found a valid add/sub node. Update the information accordingly.
06422     if (i & 1)
06423       AddFound = true;
06424     else
06425       SubFound = true;
06426 
06427     // Update InVec0 and InVec1.
06428     if (InVec0.getOpcode() == ISD::UNDEF)
06429       InVec0 = Op0.getOperand(0);
06430     if (InVec1.getOpcode() == ISD::UNDEF)
06431       InVec1 = Op1.getOperand(0);
06432 
06433     // Make sure that operands in input to each add/sub node always
06434     // come from a same pair of vectors.
06435     if (InVec0 != Op0.getOperand(0)) {
06436       if (ExpectedOpcode == ISD::FSUB)
06437         return SDValue();
06438 
06439       // FADD is commutable. Try to commute the operands
06440       // and then test again.
06441       std::swap(Op0, Op1);
06442       if (InVec0 != Op0.getOperand(0))
06443         return SDValue();
06444     }
06445 
06446     if (InVec1 != Op1.getOperand(0))
06447       return SDValue();
06448 
06449     // Update the pair of expected opcodes.
06450     std::swap(ExpectedOpcode, NextExpectedOpcode);
06451   }
06452 
06453   // Don't try to fold this build_vector into a VSELECT if it has
06454   // too many UNDEF operands.
06455   if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
06456       InVec1.getOpcode() != ISD::UNDEF) {
06457     // Emit a sequence of vector add and sub followed by a VSELECT.
06458     // The new VSELECT will be lowered into a BLENDI.
06459     // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
06460     // and emit a single ADDSUB instruction.
06461     SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
06462     SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
06463 
06464     // Construct the VSELECT mask.
06465     EVT MaskVT = VT.changeVectorElementTypeToInteger();
06466     EVT SVT = MaskVT.getVectorElementType();
06467     unsigned SVTBits = SVT.getSizeInBits();
06468     SmallVector<SDValue, 8> Ops;
06469 
06470     for (unsigned i = 0, e = NumElts; i != e; ++i) {
06471       APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
06472                             APInt::getAllOnesValue(SVTBits);
06473       SDValue Constant = DAG.getConstant(Value, SVT);
06474       Ops.push_back(Constant);
06475     }
06476 
06477     SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
06478     return DAG.getSelect(DL, VT, Mask, Sub, Add);
06479   }
06480   
06481   return SDValue();
06482 }
06483 
06484 static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
06485                                           const X86Subtarget *Subtarget) {
06486   SDLoc DL(N);
06487   EVT VT = N->getValueType(0);
06488   unsigned NumElts = VT.getVectorNumElements();
06489   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
06490   SDValue InVec0, InVec1;
06491 
06492   // Try to match an ADDSUB.
06493   if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
06494       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
06495     SDValue Value = matchAddSub(BV, DAG, Subtarget);
06496     if (Value.getNode())
06497       return Value;
06498   }
06499 
06500   // Try to match horizontal ADD/SUB.
06501   unsigned NumUndefsLO = 0;
06502   unsigned NumUndefsHI = 0;
06503   unsigned Half = NumElts/2;
06504 
06505   // Count the number of UNDEF operands in the build_vector in input.
06506   for (unsigned i = 0, e = Half; i != e; ++i)
06507     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06508       NumUndefsLO++;
06509 
06510   for (unsigned i = Half, e = NumElts; i != e; ++i)
06511     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06512       NumUndefsHI++;
06513 
06514   // Early exit if this is either a build_vector of all UNDEFs or all the
06515   // operands but one are UNDEF.
06516   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
06517     return SDValue();
06518 
06519   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
06520     // Try to match an SSE3 float HADD/HSUB.
06521     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06522       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06523     
06524     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06525       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06526   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
06527     // Try to match an SSSE3 integer HADD/HSUB.
06528     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06529       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
06530     
06531     if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06532       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
06533   }
06534   
06535   if (!Subtarget->hasAVX())
06536     return SDValue();
06537 
06538   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
06539     // Try to match an AVX horizontal add/sub of packed single/double
06540     // precision floating point values from 256-bit vectors.
06541     SDValue InVec2, InVec3;
06542     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
06543         isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
06544         ((InVec0.getOpcode() == ISD::UNDEF ||
06545           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06546         ((InVec1.getOpcode() == ISD::UNDEF ||
06547           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06548       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06549 
06550     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
06551         isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
06552         ((InVec0.getOpcode() == ISD::UNDEF ||
06553           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06554         ((InVec1.getOpcode() == ISD::UNDEF ||
06555           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06556       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06557   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
06558     // Try to match an AVX2 horizontal add/sub of signed integers.
06559     SDValue InVec2, InVec3;
06560     unsigned X86Opcode;
06561     bool CanFold = true;
06562 
06563     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
06564         isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
06565         ((InVec0.getOpcode() == ISD::UNDEF ||
06566           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06567         ((InVec1.getOpcode() == ISD::UNDEF ||
06568           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06569       X86Opcode = X86ISD::HADD;
06570     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
06571         isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
06572         ((InVec0.getOpcode() == ISD::UNDEF ||
06573           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06574         ((InVec1.getOpcode() == ISD::UNDEF ||
06575           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06576       X86Opcode = X86ISD::HSUB;
06577     else
06578       CanFold = false;
06579 
06580     if (CanFold) {
06581       // Fold this build_vector into a single horizontal add/sub.
06582       // Do this only if the target has AVX2.
06583       if (Subtarget->hasAVX2())
06584         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
06585  
06586       // Do not try to expand this build_vector into a pair of horizontal
06587       // add/sub if we can emit a pair of scalar add/sub.
06588       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06589         return SDValue();
06590 
06591       // Convert this build_vector into a pair of horizontal binop followed by
06592       // a concat vector.
06593       bool isUndefLO = NumUndefsLO == Half;
06594       bool isUndefHI = NumUndefsHI == Half;
06595       return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
06596                                    isUndefLO, isUndefHI);
06597     }
06598   }
06599 
06600   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
06601        VT == MVT::v16i16) && Subtarget->hasAVX()) {
06602     unsigned X86Opcode;
06603     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06604       X86Opcode = X86ISD::HADD;
06605     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06606       X86Opcode = X86ISD::HSUB;
06607     else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06608       X86Opcode = X86ISD::FHADD;
06609     else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06610       X86Opcode = X86ISD::FHSUB;
06611     else
06612       return SDValue();
06613 
06614     // Don't try to expand this build_vector into a pair of horizontal add/sub
06615     // if we can simply emit a pair of scalar add/sub.
06616     if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06617       return SDValue();
06618 
06619     // Convert this build_vector into two horizontal add/sub followed by
06620     // a concat vector.
06621     bool isUndefLO = NumUndefsLO == Half;
06622     bool isUndefHI = NumUndefsHI == Half;
06623     return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
06624                                  isUndefLO, isUndefHI);
06625   }
06626 
06627   return SDValue();
06628 }
06629 
06630 SDValue
06631 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
06632   SDLoc dl(Op);
06633 
06634   MVT VT = Op.getSimpleValueType();
06635   MVT ExtVT = VT.getVectorElementType();
06636   unsigned NumElems = Op.getNumOperands();
06637 
06638   // Generate vectors for predicate vectors.
06639   if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
06640     return LowerBUILD_VECTORvXi1(Op, DAG);
06641 
06642   // Vectors containing all zeros can be matched by pxor and xorps later
06643   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06644     // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
06645     // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
06646     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
06647       return Op;
06648 
06649     return getZeroVector(VT, Subtarget, DAG, dl);
06650   }
06651 
06652   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
06653   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
06654   // vpcmpeqd on 256-bit vectors.
06655   if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
06656     if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
06657       return Op;
06658 
06659     if (!VT.is512BitVector())
06660       return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
06661   }
06662 
06663   SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
06664   if (Broadcast.getNode())
06665     return Broadcast;
06666 
06667   unsigned EVTBits = ExtVT.getSizeInBits();
06668 
06669   unsigned NumZero  = 0;
06670   unsigned NumNonZero = 0;
06671   unsigned NonZeros = 0;
06672   bool IsAllConstants = true;
06673   SmallSet<SDValue, 8> Values;
06674   for (unsigned i = 0; i < NumElems; ++i) {
06675     SDValue Elt = Op.getOperand(i);
06676     if (Elt.getOpcode() == ISD::UNDEF)
06677       continue;
06678     Values.insert(Elt);
06679     if (Elt.getOpcode() != ISD::Constant &&
06680         Elt.getOpcode() != ISD::ConstantFP)
06681       IsAllConstants = false;
06682     if (X86::isZeroNode(Elt))
06683       NumZero++;
06684     else {
06685       NonZeros |= (1 << i);
06686       NumNonZero++;
06687     }
06688   }
06689 
06690   // All undef vector. Return an UNDEF.  All zero vectors were handled above.
06691   if (NumNonZero == 0)
06692     return DAG.getUNDEF(VT);
06693 
06694   // Special case for single non-zero, non-undef, element.
06695   if (NumNonZero == 1) {
06696     unsigned Idx = countTrailingZeros(NonZeros);
06697     SDValue Item = Op.getOperand(Idx);
06698 
06699     // If this is an insertion of an i64 value on x86-32, and if the top bits of
06700     // the value are obviously zero, truncate the value to i32 and do the
06701     // insertion that way.  Only do this if the value is non-constant or if the
06702     // value is a constant being inserted into element 0.  It is cheaper to do
06703     // a constant pool load than it is to do a movd + shuffle.
06704     if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
06705         (!IsAllConstants || Idx == 0)) {
06706       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
06707         // Handle SSE only.
06708         assert(VT == MVT::v2i64 && "Expected an SSE value type!");
06709         EVT VecVT = MVT::v4i32;
06710         unsigned VecElts = 4;
06711 
06712         // Truncate the value (which may itself be a constant) to i32, and
06713         // convert it to a vector with movd (S2V+shuffle to zero extend).
06714         Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
06715         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
06716         Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06717 
06718         // Now we have our 32-bit value zero extended in the low element of
06719         // a vector.  If Idx != 0, swizzle it into place.
06720         if (Idx != 0) {
06721           SmallVector<int, 4> Mask;
06722           Mask.push_back(Idx);
06723           for (unsigned i = 1; i != VecElts; ++i)
06724             Mask.push_back(i);
06725           Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
06726                                       &Mask[0]);
06727         }
06728         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06729       }
06730     }
06731 
06732     // If we have a constant or non-constant insertion into the low element of
06733     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
06734     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
06735     // depending on what the source datatype is.
06736     if (Idx == 0) {
06737       if (NumZero == 0)
06738         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06739 
06740       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
06741           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
06742         if (VT.is256BitVector() || VT.is512BitVector()) {
06743           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
06744           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
06745                              Item, DAG.getIntPtrConstant(0));
06746         }
06747         assert(VT.is128BitVector() && "Expected an SSE value type!");
06748         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06749         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
06750         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06751       }
06752 
06753       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
06754         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
06755         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
06756         if (VT.is256BitVector()) {
06757           SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
06758           Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
06759         } else {
06760           assert(VT.is128BitVector() && "Expected an SSE value type!");
06761           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06762         }
06763         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06764       }
06765     }
06766 
06767     // Is it a vector logical left shift?
06768     if (NumElems == 2 && Idx == 1 &&
06769         X86::isZeroNode(Op.getOperand(0)) &&
06770         !X86::isZeroNode(Op.getOperand(1))) {
06771       unsigned NumBits = VT.getSizeInBits();
06772       return getVShift(true, VT,
06773                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
06774                                    VT, Op.getOperand(1)),
06775                        NumBits/2, DAG, *this, dl);
06776     }
06777 
06778     if (IsAllConstants) // Otherwise, it's better to do a constpool load.
06779       return SDValue();
06780 
06781     // Otherwise, if this is a vector with i32 or f32 elements, and the element
06782     // is a non-constant being inserted into an element other than the low one,
06783     // we can't use a constant pool load.  Instead, use SCALAR_TO_VECTOR (aka
06784     // movd/movss) to move this into the low element, then shuffle it into
06785     // place.
06786     if (EVTBits == 32) {
06787       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06788 
06789       // Turn it into a shuffle of zero and zero-extended scalar to vector.
06790       Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
06791       SmallVector<int, 8> MaskVec;
06792       for (unsigned i = 0; i != NumElems; ++i)
06793         MaskVec.push_back(i == Idx ? 0 : 1);
06794       return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
06795     }
06796   }
06797 
06798   // Splat is obviously ok. Let legalizer expand it to a shuffle.
06799   if (Values.size() == 1) {
06800     if (EVTBits == 32) {
06801       // Instead of a shuffle like this:
06802       // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
06803       // Check if it's possible to issue this instead.
06804       // shuffle (vload ptr)), undef, <1, 1, 1, 1>
06805       unsigned Idx = countTrailingZeros(NonZeros);
06806       SDValue Item = Op.getOperand(Idx);
06807       if (Op.getNode()->isOnlyUserOf(Item.getNode()))
06808         return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
06809     }
06810     return SDValue();
06811   }
06812 
06813   // A vector full of immediates; various special cases are already
06814   // handled, so this is best done with a single constant-pool load.
06815   if (IsAllConstants)
06816     return SDValue();
06817 
06818   // For AVX-length vectors, build the individual 128-bit pieces and use
06819   // shuffles to put them in place.
06820   if (VT.is256BitVector() || VT.is512BitVector()) {
06821     SmallVector<SDValue, 64> V;
06822     for (unsigned i = 0; i != NumElems; ++i)
06823       V.push_back(Op.getOperand(i));
06824 
06825     EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
06826 
06827     // Build both the lower and upper subvector.
06828     SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06829                                 makeArrayRef(&V[0], NumElems/2));
06830     SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06831                                 makeArrayRef(&V[NumElems / 2], NumElems/2));
06832 
06833     // Recreate the wider vector with the lower and upper part.
06834     if (VT.is256BitVector())
06835       return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06836     return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06837   }
06838 
06839   // Let legalizer expand 2-wide build_vectors.
06840   if (EVTBits == 64) {
06841     if (NumNonZero == 1) {
06842       // One half is zero or undef.
06843       unsigned Idx = countTrailingZeros(NonZeros);
06844       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
06845                                  Op.getOperand(Idx));
06846       return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
06847     }
06848     return SDValue();
06849   }
06850 
06851   // If element VT is < 32 bits, convert it to inserts into a zero vector.
06852   if (EVTBits == 8 && NumElems == 16) {
06853     SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
06854                                         Subtarget, *this);
06855     if (V.getNode()) return V;
06856   }
06857 
06858   if (EVTBits == 16 && NumElems == 8) {
06859     SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
06860                                       Subtarget, *this);
06861     if (V.getNode()) return V;
06862   }
06863 
06864   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
06865   if (EVTBits == 32 && NumElems == 4) {
06866     SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
06867                                       NumZero, DAG, Subtarget, *this);
06868     if (V.getNode())
06869       return V;
06870   }
06871 
06872   // If element VT is == 32 bits, turn it into a number of shuffles.
06873   SmallVector<SDValue, 8> V(NumElems);
06874   if (NumElems == 4 && NumZero > 0) {
06875     for (unsigned i = 0; i < 4; ++i) {
06876       bool isZero = !(NonZeros & (1 << i));
06877       if (isZero)
06878         V[i] = getZeroVector(VT, Subtarget, DAG, dl);
06879       else
06880         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
06881     }
06882 
06883     for (unsigned i = 0; i < 2; ++i) {
06884       switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
06885         default: break;
06886         case 0:
06887           V[i] = V[i*2];  // Must be a zero vector.
06888           break;
06889         case 1:
06890           V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
06891           break;
06892         case 2:
06893           V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
06894           break;
06895         case 3:
06896           V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
06897           break;
06898       }
06899     }
06900 
06901     bool Reverse1 = (NonZeros & 0x3) == 2;
06902     bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
06903     int MaskVec[] = {
06904       Reverse1 ? 1 : 0,
06905       Reverse1 ? 0 : 1,
06906       static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
06907       static_cast<int>(Reverse2 ? NumElems   : NumElems+1)
06908     };
06909     return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
06910   }
06911 
06912   if (Values.size() > 1 && VT.is128BitVector()) {
06913     // Check for a build vector of consecutive loads.
06914     for (unsigned i = 0; i < NumElems; ++i)
06915       V[i] = Op.getOperand(i);
06916 
06917     // Check for elements which are consecutive loads.
06918     SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
06919     if (LD.getNode())
06920       return LD;
06921 
06922     // Check for a build vector from mostly shuffle plus few inserting.
06923     SDValue Sh = buildFromShuffleMostly(Op, DAG);
06924     if (Sh.getNode())
06925       return Sh;
06926 
06927     // For SSE 4.1, use insertps to put the high elements into the low element.
06928     if (getSubtarget()->hasSSE41()) {
06929       SDValue Result;
06930       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
06931         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
06932       else
06933         Result = DAG.getUNDEF(VT);
06934 
06935       for (unsigned i = 1; i < NumElems; ++i) {
06936         if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
06937         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
06938                              Op.getOperand(i), DAG.getIntPtrConstant(i));
06939       }
06940       return Result;
06941     }
06942 
06943     // Otherwise, expand into a number of unpckl*, start by extending each of
06944     // our (non-undef) elements to the full vector width with the element in the
06945     // bottom slot of the vector (which generates no code for SSE).
06946     for (unsigned i = 0; i < NumElems; ++i) {
06947       if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
06948         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
06949       else
06950         V[i] = DAG.getUNDEF(VT);
06951     }
06952 
06953     // Next, we iteratively mix elements, e.g. for v4f32:
06954     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
06955     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
06956     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
06957     unsigned EltStride = NumElems >> 1;
06958     while (EltStride != 0) {
06959       for (unsigned i = 0; i < EltStride; ++i) {
06960         // If V[i+EltStride] is undef and this is the first round of mixing,
06961         // then it is safe to just drop this shuffle: V[i] is already in the
06962         // right place, the one element (since it's the first round) being
06963         // inserted as undef can be dropped.  This isn't safe for successive
06964         // rounds because they will permute elements within both vectors.
06965         if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
06966             EltStride == NumElems/2)
06967           continue;
06968 
06969         V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
06970       }
06971       EltStride >>= 1;
06972     }
06973     return V[0];
06974   }
06975   return SDValue();
06976 }
06977 
06978 // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
06979 // to create 256-bit vectors from two other 128-bit ones.
06980 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
06981   SDLoc dl(Op);
06982   MVT ResVT = Op.getSimpleValueType();
06983 
06984   assert((ResVT.is256BitVector() ||
06985           ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
06986 
06987   SDValue V1 = Op.getOperand(0);
06988   SDValue V2 = Op.getOperand(1);
06989   unsigned NumElems = ResVT.getVectorNumElements();
06990   if(ResVT.is256BitVector())
06991     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
06992 
06993   if (Op.getNumOperands() == 4) {
06994     MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
06995                                 ResVT.getVectorNumElements()/2);
06996     SDValue V3 = Op.getOperand(2);
06997     SDValue V4 = Op.getOperand(3);
06998     return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
06999       Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
07000   }
07001   return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
07002 }
07003 
07004 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
07005   MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
07006   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
07007          (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
07008           Op.getNumOperands() == 4)));
07009 
07010   // AVX can use the vinsertf128 instruction to create 256-bit vectors
07011   // from two other 128-bit ones.
07012 
07013   // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
07014   return LowerAVXCONCAT_VECTORS(Op, DAG);
07015 }
07016 
07017 
07018 //===----------------------------------------------------------------------===//
07019 // Vector shuffle lowering
07020 //
07021 // This is an experimental code path for lowering vector shuffles on x86. It is
07022 // designed to handle arbitrary vector shuffles and blends, gracefully
07023 // degrading performance as necessary. It works hard to recognize idiomatic
07024 // shuffles and lower them to optimal instruction patterns without leaving
07025 // a framework that allows reasonably efficient handling of all vector shuffle
07026 // patterns.
07027 //===----------------------------------------------------------------------===//
07028 
07029 /// \brief Tiny helper function to identify a no-op mask.
07030 ///
07031 /// This is a somewhat boring predicate function. It checks whether the mask
07032 /// array input, which is assumed to be a single-input shuffle mask of the kind
07033 /// used by the X86 shuffle instructions (not a fully general
07034 /// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
07035 /// in-place shuffle are 'no-op's.
07036 static bool isNoopShuffleMask(ArrayRef<int> Mask) {
07037   for (int i = 0, Size = Mask.size(); i < Size; ++i)
07038     if (Mask[i] != -1 && Mask[i] != i)
07039       return false;
07040   return true;
07041 }
07042 
07043 /// \brief Helper function to classify a mask as a single-input mask.
07044 ///
07045 /// This isn't a generic single-input test because in the vector shuffle
07046 /// lowering we canonicalize single inputs to be the first input operand. This
07047 /// means we can more quickly test for a single input by only checking whether
07048 /// an input from the second operand exists. We also assume that the size of
07049 /// mask corresponds to the size of the input vectors which isn't true in the
07050 /// fully general case.
07051 static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
07052   for (int M : Mask)
07053     if (M >= (int)Mask.size())
07054       return false;
07055   return true;
07056 }
07057 
07058 // Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
07059 // 2013 will allow us to use it as a non-type template parameter.
07060 namespace {
07061 
07062 /// \brief Implementation of the \c isShuffleEquivalent variadic functor.
07063 ///
07064 /// See its documentation for details.
07065 bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
07066   if (Mask.size() != Args.size())
07067     return false;
07068   for (int i = 0, e = Mask.size(); i < e; ++i) {
07069     assert(*Args[i] >= 0 && "Arguments must be positive integers!");
07070     assert(*Args[i] < (int)Args.size() * 2 &&
07071            "Argument outside the range of possible shuffle inputs!");
07072     if (Mask[i] != -1 && Mask[i] != *Args[i])
07073       return false;
07074   }
07075   return true;
07076 }
07077 
07078 } // namespace
07079 
07080 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
07081 /// arguments.
07082 ///
07083 /// This is a fast way to test a shuffle mask against a fixed pattern:
07084 ///
07085 ///   if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
07086 ///
07087 /// It returns true if the mask is exactly as wide as the argument list, and
07088 /// each element of the mask is either -1 (signifying undef) or the value given
07089 /// in the argument.
07090 static const VariadicFunction1<
07091     bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
07092 
07093 /// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
07094 ///
07095 /// This helper function produces an 8-bit shuffle immediate corresponding to
07096 /// the ubiquitous shuffle encoding scheme used in x86 instructions for
07097 /// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
07098 /// example.
07099 ///
07100 /// NB: We rely heavily on "undef" masks preserving the input lane.
07101 static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
07102                                           SelectionDAG &DAG) {
07103   assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
07104   assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
07105   assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
07106   assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
07107   assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
07108 
07109   unsigned Imm = 0;
07110   Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
07111   Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
07112   Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
07113   Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
07114   return DAG.getConstant(Imm, MVT::i8);
07115 }
07116 
07117 /// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
07118 ///
07119 /// This is the basis function for the 2-lane 64-bit shuffles as we have full
07120 /// support for floating point shuffles but not integer shuffles. These
07121 /// instructions will incur a domain crossing penalty on some chips though so
07122 /// it is better to avoid lowering through this for integer vectors where
07123 /// possible.
07124 static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
07125                                        const X86Subtarget *Subtarget,
07126                                        SelectionDAG &DAG) {
07127   SDLoc DL(Op);
07128   assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
07129   assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
07130   assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
07131   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
07132   ArrayRef<int> Mask = SVOp->getMask();
07133   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
07134 
07135   if (isSingleInputShuffleMask(Mask)) {
07136     // Straight shuffle of a single input vector. Simulate this by using the
07137     // single input as both of the "inputs" to this instruction..
07138     unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
07139     return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
07140                        DAG.getConstant(SHUFPDMask, MVT::i8));
07141   }
07142   assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
07143   assert(Mask[1] >= 2 && "Non-canonicalized blend!");
07144 
07145   // Use dedicated unpack instructions for masks that match their pattern.
07146   if (isShuffleEquivalent(Mask, 0, 2))
07147     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
07148   if (isShuffleEquivalent(Mask, 1, 3))
07149     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
07150 
07151   unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
07152   return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
07153                      DAG.getConstant(SHUFPDMask, MVT::i8));
07154 }
07155 
07156 /// \brief Handle lowering of 2-lane 64-bit integer shuffles.
07157 ///
07158 /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
07159 /// the integer unit to minimize domain crossing penalties. However, for blends
07160 /// it falls back to the floating point shuffle operation with appropriate bit
07161 /// casting.
07162 static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
07163                                        const X86Subtarget *Subtarget,
07164                                        SelectionDAG &DAG) {
07165   SDLoc DL(Op);
07166   assert(Op.