LLVM API Documentation

X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86InstrBuilder.h"
00019 #include "X86MachineFunctionInfo.h"
00020 #include "X86TargetMachine.h"
00021 #include "X86TargetObjectFile.h"
00022 #include "llvm/ADT/SmallBitVector.h"
00023 #include "llvm/ADT/SmallSet.h"
00024 #include "llvm/ADT/Statistic.h"
00025 #include "llvm/ADT/StringExtras.h"
00026 #include "llvm/ADT/StringSwitch.h"
00027 #include "llvm/ADT/VariadicFunction.h"
00028 #include "llvm/CodeGen/IntrinsicLowering.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineFunction.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00033 #include "llvm/CodeGen/MachineModuleInfo.h"
00034 #include "llvm/CodeGen/MachineRegisterInfo.h"
00035 #include "llvm/IR/CallSite.h"
00036 #include "llvm/IR/CallingConv.h"
00037 #include "llvm/IR/Constants.h"
00038 #include "llvm/IR/DerivedTypes.h"
00039 #include "llvm/IR/Function.h"
00040 #include "llvm/IR/GlobalAlias.h"
00041 #include "llvm/IR/GlobalVariable.h"
00042 #include "llvm/IR/Instructions.h"
00043 #include "llvm/IR/Intrinsics.h"
00044 #include "llvm/MC/MCAsmInfo.h"
00045 #include "llvm/MC/MCContext.h"
00046 #include "llvm/MC/MCExpr.h"
00047 #include "llvm/MC/MCSymbol.h"
00048 #include "llvm/Support/CommandLine.h"
00049 #include "llvm/Support/Debug.h"
00050 #include "llvm/Support/ErrorHandling.h"
00051 #include "llvm/Support/MathExtras.h"
00052 #include "llvm/Target/TargetOptions.h"
00053 #include "X86IntrinsicsInfo.h"
00054 #include <bitset>
00055 #include <numeric>
00056 #include <cctype>
00057 using namespace llvm;
00058 
00059 #define DEBUG_TYPE "x86-isel"
00060 
00061 STATISTIC(NumTailCalls, "Number of tail calls");
00062 
00063 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00064     "x86-experimental-vector-widening-legalization", cl::init(false),
00065     cl::desc("Enable an experimental vector type legalization through widening "
00066              "rather than promotion."),
00067     cl::Hidden);
00068 
00069 static cl::opt<bool> ExperimentalVectorShuffleLowering(
00070     "x86-experimental-vector-shuffle-lowering", cl::init(false),
00071     cl::desc("Enable an experimental vector shuffle lowering code path."),
00072     cl::Hidden);
00073 
00074 // Forward declarations.
00075 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
00076                        SDValue V2);
00077 
00078 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
00079                                 SelectionDAG &DAG, SDLoc dl,
00080                                 unsigned vectorWidth) {
00081   assert((vectorWidth == 128 || vectorWidth == 256) &&
00082          "Unsupported vector width");
00083   EVT VT = Vec.getValueType();
00084   EVT ElVT = VT.getVectorElementType();
00085   unsigned Factor = VT.getSizeInBits()/vectorWidth;
00086   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
00087                                   VT.getVectorNumElements()/Factor);
00088 
00089   // Extract from UNDEF is UNDEF.
00090   if (Vec.getOpcode() == ISD::UNDEF)
00091     return DAG.getUNDEF(ResultVT);
00092 
00093   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
00094   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
00095 
00096   // This is the index of the first element of the vectorWidth-bit chunk
00097   // we want.
00098   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
00099                                * ElemsPerChunk);
00100 
00101   // If the input is a buildvector just emit a smaller one.
00102   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
00103     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
00104                        makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
00105                                     ElemsPerChunk));
00106 
00107   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00108   SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
00109                                VecIdx);
00110 
00111   return Result;
00112 
00113 }
00114 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
00115 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
00116 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
00117 /// instructions or a simple subregister reference. Idx is an index in the
00118 /// 128 bits we want.  It need not be aligned to a 128-bit bounday.  That makes
00119 /// lowering EXTRACT_VECTOR_ELT operations easier.
00120 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
00121                                    SelectionDAG &DAG, SDLoc dl) {
00122   assert((Vec.getValueType().is256BitVector() ||
00123           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
00124   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
00125 }
00126 
00127 /// Generate a DAG to grab 256-bits from a 512-bit vector.
00128 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
00129                                    SelectionDAG &DAG, SDLoc dl) {
00130   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
00131   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
00132 }
00133 
00134 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
00135                                unsigned IdxVal, SelectionDAG &DAG,
00136                                SDLoc dl, unsigned vectorWidth) {
00137   assert((vectorWidth == 128 || vectorWidth == 256) &&
00138          "Unsupported vector width");
00139   // Inserting UNDEF is Result
00140   if (Vec.getOpcode() == ISD::UNDEF)
00141     return Result;
00142   EVT VT = Vec.getValueType();
00143   EVT ElVT = VT.getVectorElementType();
00144   EVT ResultVT = Result.getValueType();
00145 
00146   // Insert the relevant vectorWidth bits.
00147   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
00148 
00149   // This is the index of the first element of the vectorWidth-bit chunk
00150   // we want.
00151   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
00152                                * ElemsPerChunk);
00153 
00154   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00155   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
00156                      VecIdx);
00157 }
00158 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
00159 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
00160 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
00161 /// simple superregister reference.  Idx is an index in the 128 bits
00162 /// we want.  It need not be aligned to a 128-bit bounday.  That makes
00163 /// lowering INSERT_VECTOR_ELT operations easier.
00164 static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
00165                                   unsigned IdxVal, SelectionDAG &DAG,
00166                                   SDLoc dl) {
00167   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
00168   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
00169 }
00170 
00171 static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
00172                                   unsigned IdxVal, SelectionDAG &DAG,
00173                                   SDLoc dl) {
00174   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
00175   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
00176 }
00177 
00178 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
00179 /// instructions. This is used because creating CONCAT_VECTOR nodes of
00180 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
00181 /// large BUILD_VECTORS.
00182 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
00183                                    unsigned NumElems, SelectionDAG &DAG,
00184                                    SDLoc dl) {
00185   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00186   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
00187 }
00188 
00189 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
00190                                    unsigned NumElems, SelectionDAG &DAG,
00191                                    SDLoc dl) {
00192   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00193   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
00194 }
00195 
00196 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00197   if (TT.isOSBinFormatMachO()) {
00198     if (TT.getArch() == Triple::x86_64)
00199       return new X86_64MachoTargetObjectFile();
00200     return new TargetLoweringObjectFileMachO();
00201   }
00202 
00203   if (TT.isOSLinux())
00204     return new X86LinuxTargetObjectFile();
00205   if (TT.isOSBinFormatELF())
00206     return new TargetLoweringObjectFileELF();
00207   if (TT.isKnownWindowsMSVCEnvironment())
00208     return new X86WindowsTargetObjectFile();
00209   if (TT.isOSBinFormatCOFF())
00210     return new TargetLoweringObjectFileCOFF();
00211   llvm_unreachable("unknown subtarget type");
00212 }
00213 
00214 // FIXME: This should stop caching the target machine as soon as
00215 // we can remove resetOperationActions et al.
00216 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
00217     : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
00218   Subtarget = &TM.getSubtarget<X86Subtarget>();
00219   X86ScalarSSEf64 = Subtarget->hasSSE2();
00220   X86ScalarSSEf32 = Subtarget->hasSSE1();
00221   TD = getDataLayout();
00222 
00223   resetOperationActions();
00224 }
00225 
00226 void X86TargetLowering::resetOperationActions() {
00227   const TargetMachine &TM = getTargetMachine();
00228   static bool FirstTimeThrough = true;
00229 
00230   // If none of the target options have changed, then we don't need to reset the
00231   // operation actions.
00232   if (!FirstTimeThrough && TO == TM.Options) return;
00233 
00234   if (!FirstTimeThrough) {
00235     // Reinitialize the actions.
00236     initActions();
00237     FirstTimeThrough = false;
00238   }
00239 
00240   TO = TM.Options;
00241 
00242   // Set up the TargetLowering object.
00243   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
00244 
00245   // X86 is weird, it always uses i8 for shift amounts and setcc results.
00246   setBooleanContents(ZeroOrOneBooleanContent);
00247   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00248   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00249 
00250   // For 64-bit since we have so many registers use the ILP scheduler, for
00251   // 32-bit code use the register pressure specific scheduling.
00252   // For Atom, always use ILP scheduling.
00253   if (Subtarget->isAtom())
00254     setSchedulingPreference(Sched::ILP);
00255   else if (Subtarget->is64Bit())
00256     setSchedulingPreference(Sched::ILP);
00257   else
00258     setSchedulingPreference(Sched::RegPressure);
00259   const X86RegisterInfo *RegInfo =
00260       TM.getSubtarget<X86Subtarget>().getRegisterInfo();
00261   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00262 
00263   // Bypass expensive divides on Atom when compiling with O2
00264   if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
00265     addBypassSlowDiv(32, 8);
00266     if (Subtarget->is64Bit())
00267       addBypassSlowDiv(64, 16);
00268   }
00269 
00270   if (Subtarget->isTargetKnownWindowsMSVC()) {
00271     // Setup Windows compiler runtime calls.
00272     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00273     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00274     setLibcallName(RTLIB::SREM_I64, "_allrem");
00275     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00276     setLibcallName(RTLIB::MUL_I64, "_allmul");
00277     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00278     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00279     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00280     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00281     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00282 
00283     // The _ftol2 runtime function has an unusual calling conv, which
00284     // is modeled by a special pseudo-instruction.
00285     setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
00286     setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
00287     setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
00288     setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
00289   }
00290 
00291   if (Subtarget->isTargetDarwin()) {
00292     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00293     setUseUnderscoreSetJmp(false);
00294     setUseUnderscoreLongJmp(false);
00295   } else if (Subtarget->isTargetWindowsGNU()) {
00296     // MS runtime is weird: it exports _setjmp, but longjmp!
00297     setUseUnderscoreSetJmp(true);
00298     setUseUnderscoreLongJmp(false);
00299   } else {
00300     setUseUnderscoreSetJmp(true);
00301     setUseUnderscoreLongJmp(true);
00302   }
00303 
00304   // Set up the register classes.
00305   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00306   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00307   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00308   if (Subtarget->is64Bit())
00309     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00310 
00311   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00312 
00313   // We don't accept any truncstore of integer registers.
00314   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00315   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00316   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00317   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00318   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00319   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00320 
00321   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00322 
00323   // SETOEQ and SETUNE require checking two conditions.
00324   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00325   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00326   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00327   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00328   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00329   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00330 
00331   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00332   // operation.
00333   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00334   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00335   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00336 
00337   if (Subtarget->is64Bit()) {
00338     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00339     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00340   } else if (!TM.Options.UseSoftFloat) {
00341     // We have an algorithm for SSE2->double, and we turn this into a
00342     // 64-bit FILD followed by conditional FADD for other targets.
00343     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00344     // We have an algorithm for SSE2, and we turn this into a 64-bit
00345     // FILD for other targets.
00346     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00347   }
00348 
00349   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00350   // this operation.
00351   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00352   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00353 
00354   if (!TM.Options.UseSoftFloat) {
00355     // SSE has no i16 to fp conversion, only i32
00356     if (X86ScalarSSEf32) {
00357       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00358       // f32 and f64 cases are Legal, f80 case is not
00359       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00360     } else {
00361       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00362       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00363     }
00364   } else {
00365     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00366     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00367   }
00368 
00369   // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00370   // are Legal, f80 is custom lowered.
00371   setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00372   setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00373 
00374   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00375   // this operation.
00376   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00377   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00378 
00379   if (X86ScalarSSEf32) {
00380     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00381     // f32 and f64 cases are Legal, f80 case is not
00382     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00383   } else {
00384     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00385     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00386   }
00387 
00388   // Handle FP_TO_UINT by promoting the destination to a larger signed
00389   // conversion.
00390   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00391   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00392   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00393 
00394   if (Subtarget->is64Bit()) {
00395     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
00396     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00397   } else if (!TM.Options.UseSoftFloat) {
00398     // Since AVX is a superset of SSE3, only check for SSE here.
00399     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00400       // Expand FP_TO_UINT into a select.
00401       // FIXME: We would like to use a Custom expander here eventually to do
00402       // the optimal thing for SSE vs. the default expansion in the legalizer.
00403       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00404     else
00405       // With SSE3 we can use fisttpll to convert to a signed i64; without
00406       // SSE, we're stuck with a fistpll.
00407       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00408   }
00409 
00410   if (isTargetFTOL()) {
00411     // Use the _ftol2 runtime function, which has a pseudo-instruction
00412     // to handle its weird calling convention.
00413     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00414   }
00415 
00416   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00417   if (!X86ScalarSSEf64) {
00418     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00419     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00420     if (Subtarget->is64Bit()) {
00421       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00422       // Without SSE, i64->f64 goes through memory.
00423       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00424     }
00425   }
00426 
00427   // Scalar integer divide and remainder are lowered to use operations that
00428   // produce two results, to match the available instructions. This exposes
00429   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00430   // into a single instruction.
00431   //
00432   // Scalar integer multiply-high is also lowered to use two-result
00433   // operations, to match the available instructions. However, plain multiply
00434   // (low) operations are left as Legal, as there are single-result
00435   // instructions for this in x86. Using the two-result multiply instructions
00436   // when both high and low results are needed must be arranged by dagcombine.
00437   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00438     MVT VT = IntVTs[i];
00439     setOperationAction(ISD::MULHS, VT, Expand);
00440     setOperationAction(ISD::MULHU, VT, Expand);
00441     setOperationAction(ISD::SDIV, VT, Expand);
00442     setOperationAction(ISD::UDIV, VT, Expand);
00443     setOperationAction(ISD::SREM, VT, Expand);
00444     setOperationAction(ISD::UREM, VT, Expand);
00445 
00446     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00447     setOperationAction(ISD::ADDC, VT, Custom);
00448     setOperationAction(ISD::ADDE, VT, Custom);
00449     setOperationAction(ISD::SUBC, VT, Custom);
00450     setOperationAction(ISD::SUBE, VT, Custom);
00451   }
00452 
00453   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00454   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00455   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00456   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00457   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00458   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00459   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00460   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00461   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00462   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00463   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00464   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00465   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00466   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00467   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00468   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00469   if (Subtarget->is64Bit())
00470     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00471   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00472   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00473   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00474   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00475   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
00476   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00477   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00478   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00479 
00480   // Promote the i8 variants and force them on up to i32 which has a shorter
00481   // encoding.
00482   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00483   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00484   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00485   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00486   if (Subtarget->hasBMI()) {
00487     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00488     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00489     if (Subtarget->is64Bit())
00490       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00491   } else {
00492     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00493     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00494     if (Subtarget->is64Bit())
00495       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00496   }
00497 
00498   if (Subtarget->hasLZCNT()) {
00499     // When promoting the i8 variants, force them to i32 for a shorter
00500     // encoding.
00501     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00502     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00503     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00504     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00505     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00506     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00507     if (Subtarget->is64Bit())
00508       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00509   } else {
00510     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00511     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00512     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00513     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00514     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00515     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00516     if (Subtarget->is64Bit()) {
00517       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00518       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00519     }
00520   }
00521 
00522   // Special handling for half-precision floating point conversions.
00523   // If we don't have F16C support, then lower half float conversions
00524   // into library calls.
00525   if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
00526     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00527     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00528   }
00529 
00530   // There's never any support for operations beyond MVT::f32.
00531   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00532   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00533   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00534   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00535 
00536   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
00537   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00538   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00539   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00540 
00541   if (Subtarget->hasPOPCNT()) {
00542     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00543   } else {
00544     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00545     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00546     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00547     if (Subtarget->is64Bit())
00548       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00549   }
00550 
00551   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00552 
00553   if (!Subtarget->hasMOVBE())
00554     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00555 
00556   // These should be promoted to a larger select which is supported.
00557   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00558   // X86 wants to expand cmov itself.
00559   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00560   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00561   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00562   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00563   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00564   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00565   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00566   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00567   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00568   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00569   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00570   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00571   if (Subtarget->is64Bit()) {
00572     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00573     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00574   }
00575   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00576   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00577   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00578   // support continuation, user-level threading, and etc.. As a result, no
00579   // other SjLj exception interfaces are implemented and please don't build
00580   // your own exception handling based on them.
00581   // LLVM/Clang supports zero-cost DWARF exception handling.
00582   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00583   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00584 
00585   // Darwin ABI issue.
00586   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00587   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00588   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00589   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00590   if (Subtarget->is64Bit())
00591     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00592   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00593   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00594   if (Subtarget->is64Bit()) {
00595     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00596     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00597     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00598     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00599     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00600   }
00601   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00602   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00603   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00604   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00605   if (Subtarget->is64Bit()) {
00606     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00607     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00608     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00609   }
00610 
00611   if (Subtarget->hasSSE1())
00612     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00613 
00614   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00615 
00616   // Expand certain atomics
00617   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00618     MVT VT = IntVTs[i];
00619     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00620     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00621     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00622   }
00623 
00624   if (Subtarget->hasCmpxchg16b()) {
00625     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00626   }
00627 
00628   // FIXME - use subtarget debug flags
00629   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00630       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00631     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00632   }
00633 
00634   if (Subtarget->is64Bit()) {
00635     setExceptionPointerRegister(X86::RAX);
00636     setExceptionSelectorRegister(X86::RDX);
00637   } else {
00638     setExceptionPointerRegister(X86::EAX);
00639     setExceptionSelectorRegister(X86::EDX);
00640   }
00641   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00642   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00643 
00644   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00645   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00646 
00647   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00648   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00649 
00650   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00651   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00652   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00653   if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
00654     // TargetInfo::X86_64ABIBuiltinVaList
00655     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00656     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00657   } else {
00658     // TargetInfo::CharPtrBuiltinVaList
00659     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00660     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00661   }
00662 
00663   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00664   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00665 
00666   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
00667 
00668   if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
00669     // f32 and f64 use SSE.
00670     // Set up the FP register classes.
00671     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00672     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00673 
00674     // Use ANDPD to simulate FABS.
00675     setOperationAction(ISD::FABS , MVT::f64, Custom);
00676     setOperationAction(ISD::FABS , MVT::f32, Custom);
00677 
00678     // Use XORP to simulate FNEG.
00679     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00680     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00681 
00682     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00683     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00684     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00685 
00686     // Lower this to FGETSIGNx86 plus an AND.
00687     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00688     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00689 
00690     // We don't support sin/cos/fmod
00691     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00692     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00693     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00694     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00695     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00696     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00697 
00698     // Expand FP immediates into loads from the stack, except for the special
00699     // cases we handle.
00700     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00701     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00702   } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
00703     // Use SSE for f32, x87 for f64.
00704     // Set up the FP register classes.
00705     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00706     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00707 
00708     // Use ANDPS to simulate FABS.
00709     setOperationAction(ISD::FABS , MVT::f32, Custom);
00710 
00711     // Use XORP to simulate FNEG.
00712     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00713 
00714     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00715 
00716     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00717     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00718     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00719 
00720     // We don't support sin/cos/fmod
00721     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00722     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00723     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00724 
00725     // Special cases we handle for FP constants.
00726     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00727     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00728     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00729     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00730     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00731 
00732     if (!TM.Options.UnsafeFPMath) {
00733       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00734       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00735       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00736     }
00737   } else if (!TM.Options.UseSoftFloat) {
00738     // f32 and f64 in x87.
00739     // Set up the FP register classes.
00740     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00741     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00742 
00743     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00744     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00745     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00746     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00747 
00748     if (!TM.Options.UnsafeFPMath) {
00749       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00750       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00751       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00752       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00753       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00754       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00755     }
00756     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00757     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00758     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00759     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00760     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00761     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00762     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00763     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00764   }
00765 
00766   // We don't support FMA.
00767   setOperationAction(ISD::FMA, MVT::f64, Expand);
00768   setOperationAction(ISD::FMA, MVT::f32, Expand);
00769 
00770   // Long double always uses X87.
00771   if (!TM.Options.UseSoftFloat) {
00772     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00773     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00774     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00775     {
00776       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00777       addLegalFPImmediate(TmpFlt);  // FLD0
00778       TmpFlt.changeSign();
00779       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00780 
00781       bool ignored;
00782       APFloat TmpFlt2(+1.0);
00783       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00784                       &ignored);
00785       addLegalFPImmediate(TmpFlt2);  // FLD1
00786       TmpFlt2.changeSign();
00787       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00788     }
00789 
00790     if (!TM.Options.UnsafeFPMath) {
00791       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00792       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00793       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00794     }
00795 
00796     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00797     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00798     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00799     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00800     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00801     setOperationAction(ISD::FMA, MVT::f80, Expand);
00802   }
00803 
00804   // Always use a library call for pow.
00805   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00806   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00807   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00808 
00809   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00810   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00811   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00812   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00813   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00814 
00815   // First set operation action for all vector types to either promote
00816   // (for widening) or expand (for scalarization). Then we will selectively
00817   // turn on ones that can be effectively codegen'd.
00818   for (int i = MVT::FIRST_VECTOR_VALUETYPE;
00819            i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
00820     MVT VT = (MVT::SimpleValueType)i;
00821     setOperationAction(ISD::ADD , VT, Expand);
00822     setOperationAction(ISD::SUB , VT, Expand);
00823     setOperationAction(ISD::FADD, VT, Expand);
00824     setOperationAction(ISD::FNEG, VT, Expand);
00825     setOperationAction(ISD::FSUB, VT, Expand);
00826     setOperationAction(ISD::MUL , VT, Expand);
00827     setOperationAction(ISD::FMUL, VT, Expand);
00828     setOperationAction(ISD::SDIV, VT, Expand);
00829     setOperationAction(ISD::UDIV, VT, Expand);
00830     setOperationAction(ISD::FDIV, VT, Expand);
00831     setOperationAction(ISD::SREM, VT, Expand);
00832     setOperationAction(ISD::UREM, VT, Expand);
00833     setOperationAction(ISD::LOAD, VT, Expand);
00834     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00835     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00836     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00837     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00838     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00839     setOperationAction(ISD::FABS, VT, Expand);
00840     setOperationAction(ISD::FSIN, VT, Expand);
00841     setOperationAction(ISD::FSINCOS, VT, Expand);
00842     setOperationAction(ISD::FCOS, VT, Expand);
00843     setOperationAction(ISD::FSINCOS, VT, Expand);
00844     setOperationAction(ISD::FREM, VT, Expand);
00845     setOperationAction(ISD::FMA,  VT, Expand);
00846     setOperationAction(ISD::FPOWI, VT, Expand);
00847     setOperationAction(ISD::FSQRT, VT, Expand);
00848     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00849     setOperationAction(ISD::FFLOOR, VT, Expand);
00850     setOperationAction(ISD::FCEIL, VT, Expand);
00851     setOperationAction(ISD::FTRUNC, VT, Expand);
00852     setOperationAction(ISD::FRINT, VT, Expand);
00853     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00854     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00855     setOperationAction(ISD::MULHS, VT, Expand);
00856     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00857     setOperationAction(ISD::MULHU, VT, Expand);
00858     setOperationAction(ISD::SDIVREM, VT, Expand);
00859     setOperationAction(ISD::UDIVREM, VT, Expand);
00860     setOperationAction(ISD::FPOW, VT, Expand);
00861     setOperationAction(ISD::CTPOP, VT, Expand);
00862     setOperationAction(ISD::CTTZ, VT, Expand);
00863     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00864     setOperationAction(ISD::CTLZ, VT, Expand);
00865     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00866     setOperationAction(ISD::SHL, VT, Expand);
00867     setOperationAction(ISD::SRA, VT, Expand);
00868     setOperationAction(ISD::SRL, VT, Expand);
00869     setOperationAction(ISD::ROTL, VT, Expand);
00870     setOperationAction(ISD::ROTR, VT, Expand);
00871     setOperationAction(ISD::BSWAP, VT, Expand);
00872     setOperationAction(ISD::SETCC, VT, Expand);
00873     setOperationAction(ISD::FLOG, VT, Expand);
00874     setOperationAction(ISD::FLOG2, VT, Expand);
00875     setOperationAction(ISD::FLOG10, VT, Expand);
00876     setOperationAction(ISD::FEXP, VT, Expand);
00877     setOperationAction(ISD::FEXP2, VT, Expand);
00878     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00879     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00880     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00881     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00882     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00883     setOperationAction(ISD::TRUNCATE, VT, Expand);
00884     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00885     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00886     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00887     setOperationAction(ISD::VSELECT, VT, Expand);
00888     setOperationAction(ISD::SELECT_CC, VT, Expand);
00889     for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
00890              InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00891       setTruncStoreAction(VT,
00892                           (MVT::SimpleValueType)InnerVT, Expand);
00893     setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
00894     setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
00895 
00896     // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
00897     // we have to deal with them whether we ask for Expansion or not. Setting
00898     // Expand causes its own optimisation problems though, so leave them legal.
00899     if (VT.getVectorElementType() == MVT::i1)
00900       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
00901   }
00902 
00903   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00904   // with -msoft-float, disable use of MMX as well.
00905   if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
00906     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00907     // No operations on x86mmx supported, everything uses intrinsics.
00908   }
00909 
00910   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00911   // into smaller operations.
00912   setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
00913   setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
00914   setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
00915   setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
00916   setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
00917   setOperationAction(ISD::AND,                MVT::v4i16, Expand);
00918   setOperationAction(ISD::AND,                MVT::v2i32, Expand);
00919   setOperationAction(ISD::AND,                MVT::v1i64, Expand);
00920   setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
00921   setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
00922   setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
00923   setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
00924   setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
00925   setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
00926   setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
00927   setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
00928   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
00929   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
00930   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
00931   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
00932   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00933   setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
00934   setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
00935   setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
00936   setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
00937   setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
00938   setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
00939   setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
00940   setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
00941 
00942   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
00943     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00944 
00945     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00946     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00947     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00948     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00949     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00950     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00951     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00952     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00953     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00954     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00955     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00956     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00957   }
00958 
00959   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
00960     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00961 
00962     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
00963     // registers cannot be used even for integer operations.
00964     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00965     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00966     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00967     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00968 
00969     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00970     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00971     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00972     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00973     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00974     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00975     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00976     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00977     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00978     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00979     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00980     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00981     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00982     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00983     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00984     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00985     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00986     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00987     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00988     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00989     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00990     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00991 
00992     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00993     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00994     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00995     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00996 
00997     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00998     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00999     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
01000     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
01001     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
01002 
01003     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
01004     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01005       MVT VT = (MVT::SimpleValueType)i;
01006       // Do not attempt to custom lower non-power-of-2 vectors
01007       if (!isPowerOf2_32(VT.getVectorNumElements()))
01008         continue;
01009       // Do not attempt to custom lower non-128-bit vectors
01010       if (!VT.is128BitVector())
01011         continue;
01012       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01013       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01014       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01015     }
01016 
01017     // We support custom legalizing of sext and anyext loads for specific
01018     // memory vector types which we can load as a scalar (or sequence of
01019     // scalars) and extend in-register to a legal 128-bit vector type. For sext
01020     // loads these must work with a single scalar load.
01021     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom);
01022     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom);
01023     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom);
01024     setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom);
01025     setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom);
01026     setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom);
01027     setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
01028     setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Custom);
01029     setLoadExtAction(ISD::EXTLOAD, MVT::v8i8, Custom);
01030 
01031     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
01032     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
01033     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
01034     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
01035     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
01036     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
01037 
01038     if (Subtarget->is64Bit()) {
01039       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01040       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01041     }
01042 
01043     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
01044     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01045       MVT VT = (MVT::SimpleValueType)i;
01046 
01047       // Do not attempt to promote non-128-bit vectors
01048       if (!VT.is128BitVector())
01049         continue;
01050 
01051       setOperationAction(ISD::AND,    VT, Promote);
01052       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
01053       setOperationAction(ISD::OR,     VT, Promote);
01054       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
01055       setOperationAction(ISD::XOR,    VT, Promote);
01056       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
01057       setOperationAction(ISD::LOAD,   VT, Promote);
01058       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
01059       setOperationAction(ISD::SELECT, VT, Promote);
01060       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
01061     }
01062 
01063     // Custom lower v2i64 and v2f64 selects.
01064     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
01065     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
01066     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
01067     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
01068 
01069     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
01070     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
01071 
01072     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
01073     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
01074     // As there is no 64-bit GPR available, we need build a special custom
01075     // sequence to convert from v2i32 to v2f32.
01076     if (!Subtarget->is64Bit())
01077       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
01078 
01079     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
01080     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
01081 
01082     setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
01083 
01084     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
01085     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
01086     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
01087   }
01088 
01089   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
01090     setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
01091     setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
01092     setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
01093     setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
01094     setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
01095     setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
01096     setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
01097     setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
01098     setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
01099     setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
01100 
01101     setOperationAction(ISD::FFLOOR,             MVT::v4f32, Legal);
01102     setOperationAction(ISD::FCEIL,              MVT::v4f32, Legal);
01103     setOperationAction(ISD::FTRUNC,             MVT::v4f32, Legal);
01104     setOperationAction(ISD::FRINT,              MVT::v4f32, Legal);
01105     setOperationAction(ISD::FNEARBYINT,         MVT::v4f32, Legal);
01106     setOperationAction(ISD::FFLOOR,             MVT::v2f64, Legal);
01107     setOperationAction(ISD::FCEIL,              MVT::v2f64, Legal);
01108     setOperationAction(ISD::FTRUNC,             MVT::v2f64, Legal);
01109     setOperationAction(ISD::FRINT,              MVT::v2f64, Legal);
01110     setOperationAction(ISD::FNEARBYINT,         MVT::v2f64, Legal);
01111 
01112     // FIXME: Do we need to handle scalar-to-vector here?
01113     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
01114 
01115     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
01116     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
01117     setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
01118     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
01119     setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom);
01120     // There is no BLENDI for byte vectors. We don't need to custom lower
01121     // some vselects for now.
01122     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
01123 
01124     // SSE41 brings specific instructions for doing vector sign extend even in
01125     // cases where we don't have SRA.
01126     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Custom);
01127     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Custom);
01128     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, Custom);
01129 
01130     // i8 and i16 vectors are custom because the source register and source
01131     // source memory operand types are not the same width.  f32 vectors are
01132     // custom since the immediate controlling the insert encodes additional
01133     // information.
01134     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
01135     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
01136     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
01137     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
01138 
01139     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
01140     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
01141     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
01142     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
01143 
01144     // FIXME: these should be Legal, but that's only for the case where
01145     // the index is constant.  For now custom expand to deal with that.
01146     if (Subtarget->is64Bit()) {
01147       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01148       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01149     }
01150   }
01151 
01152   if (Subtarget->hasSSE2()) {
01153     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01154     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01155 
01156     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01157     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01158 
01159     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01160     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01161 
01162     // In the customized shift lowering, the legal cases in AVX2 will be
01163     // recognized.
01164     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01165     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01166 
01167     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01168     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01169 
01170     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01171   }
01172 
01173   if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
01174     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01175     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01176     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01177     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01178     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01179     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01180 
01181     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01182     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01183     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01184 
01185     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01186     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01187     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01188     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01189     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01190     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01191     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01192     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01193     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01194     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01195     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01196     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01197 
01198     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01199     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01200     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01201     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01202     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01203     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01204     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01205     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01206     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01207     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01208     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01209     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01210 
01211     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01212     // even though v8i16 is a legal type.
01213     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01214     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01215     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01216 
01217     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01218     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01219     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01220 
01221     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01222     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01223 
01224     setLoadExtAction(ISD::EXTLOAD,              MVT::v4f32, Legal);
01225 
01226     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01227     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01228 
01229     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01230     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01231 
01232     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01233     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01234 
01235     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01236     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01237     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01238     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01239 
01240     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01241     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01242     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01243 
01244     setOperationAction(ISD::VSELECT,           MVT::v4f64, Custom);
01245     setOperationAction(ISD::VSELECT,           MVT::v4i64, Custom);
01246     setOperationAction(ISD::VSELECT,           MVT::v8i32, Custom);
01247     setOperationAction(ISD::VSELECT,           MVT::v8f32, Custom);
01248 
01249     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01250     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01251     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01252     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01253     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01254     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01255     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01256     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01257     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01258     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01259     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01260     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01261 
01262     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
01263       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01264       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01265       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01266       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01267       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01268       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01269     }
01270 
01271     if (Subtarget->hasInt256()) {
01272       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01273       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01274       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01275       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01276 
01277       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01278       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01279       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01280       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01281 
01282       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01283       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01284       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01285       // Don't lower v32i8 because there is no 128-bit byte mul
01286 
01287       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01288       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01289       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01290       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01291 
01292       setOperationAction(ISD::VSELECT,         MVT::v16i16, Custom);
01293       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01294     } else {
01295       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01296       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01297       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01298       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01299 
01300       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01301       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01302       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01303       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01304 
01305       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01306       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01307       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01308       // Don't lower v32i8 because there is no 128-bit byte mul
01309     }
01310 
01311     // In the customized shift lowering, the legal cases in AVX2 will be
01312     // recognized.
01313     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01314     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01315 
01316     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01317     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01318 
01319     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01320 
01321     // Custom lower several nodes for 256-bit types.
01322     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01323              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01324       MVT VT = (MVT::SimpleValueType)i;
01325 
01326       // Extract subvector is special because the value type
01327       // (result) is 128-bit but the source is 256-bit wide.
01328       if (VT.is128BitVector())
01329         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01330 
01331       // Do not attempt to custom lower other non-256-bit vectors
01332       if (!VT.is256BitVector())
01333         continue;
01334 
01335       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01336       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01337       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01338       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01339       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01340       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01341       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01342     }
01343 
01344     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01345     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
01346       MVT VT = (MVT::SimpleValueType)i;
01347 
01348       // Do not attempt to promote non-256-bit vectors
01349       if (!VT.is256BitVector())
01350         continue;
01351 
01352       setOperationAction(ISD::AND,    VT, Promote);
01353       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01354       setOperationAction(ISD::OR,     VT, Promote);
01355       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01356       setOperationAction(ISD::XOR,    VT, Promote);
01357       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01358       setOperationAction(ISD::LOAD,   VT, Promote);
01359       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01360       setOperationAction(ISD::SELECT, VT, Promote);
01361       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01362     }
01363   }
01364 
01365   if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
01366     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01367     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01368     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01369     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01370 
01371     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01372     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01373     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01374 
01375     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01376     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01377     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01378     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01379     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01380     setLoadExtAction(ISD::EXTLOAD,              MVT::v8f32, Legal);
01381     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01382     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01383     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01384     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01385     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01386 
01387     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01388     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01389     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01390     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01391     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01392     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01393 
01394     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01395     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01396     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01397     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01398     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01399     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01400     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01401     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01402 
01403     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
01404     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
01405     setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
01406     setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
01407     if (Subtarget->is64Bit()) {
01408       setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
01409       setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
01410       setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
01411       setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
01412     }
01413     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01414     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01415     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01416     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01417     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01418     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01419     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01420     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01421     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01422     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01423 
01424     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01425     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01426     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01427     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01428     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01429     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01430     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01431     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01432     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01433     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01434     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01435     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01436     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01437 
01438     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01439     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01440     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01441     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01442     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,    Custom);
01443     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
01444 
01445     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01446     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01447 
01448     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01449 
01450     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01451     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01452     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01453     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01454     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01455     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01456     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01457     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01458     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01459 
01460     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01461     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01462 
01463     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01464     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01465 
01466     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01467 
01468     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01469     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01470 
01471     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01472     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01473 
01474     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01475     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01476 
01477     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01478     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01479     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01480     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01481     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01482     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01483 
01484     if (Subtarget->hasCDI()) {
01485       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
01486       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01487     }
01488 
01489     // Custom lower several nodes.
01490     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01491              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01492       MVT VT = (MVT::SimpleValueType)i;
01493 
01494       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01495       // Extract subvector is special because the value type
01496       // (result) is 256/128-bit but the source is 512-bit wide.
01497       if (VT.is128BitVector() || VT.is256BitVector())
01498         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01499 
01500       if (VT.getVectorElementType() == MVT::i1)
01501         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01502 
01503       // Do not attempt to custom lower other non-512-bit vectors
01504       if (!VT.is512BitVector())
01505         continue;
01506 
01507       if ( EltSize >= 32) {
01508         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01509         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01510         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01511         setOperationAction(ISD::VSELECT,             VT, Legal);
01512         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01513         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01514         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01515       }
01516     }
01517     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01518       MVT VT = (MVT::SimpleValueType)i;
01519 
01520       // Do not attempt to promote non-256-bit vectors
01521       if (!VT.is512BitVector())
01522         continue;
01523 
01524       setOperationAction(ISD::SELECT, VT, Promote);
01525       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01526     }
01527   }// has  AVX-512
01528 
01529   if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
01530     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
01531     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
01532 
01533     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01534     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01535 
01536     setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
01537     setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
01538     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
01539     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
01540 
01541     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01542       const MVT VT = (MVT::SimpleValueType)i;
01543 
01544       const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01545 
01546       // Do not attempt to promote non-256-bit vectors
01547       if (!VT.is512BitVector())
01548         continue;
01549 
01550       if ( EltSize < 32) {
01551         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01552         setOperationAction(ISD::VSELECT,             VT, Legal);
01553       }
01554     }
01555   }
01556 
01557   if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
01558     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
01559     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
01560 
01561     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
01562     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
01563     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Legal);
01564   }
01565 
01566   // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
01567   // of this type with custom code.
01568   for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
01569            VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
01570     setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
01571                        Custom);
01572   }
01573 
01574   // We want to custom lower some of our intrinsics.
01575   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01576   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01577   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01578   if (!Subtarget->is64Bit())
01579     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01580 
01581   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01582   // handle type legalization for these operations here.
01583   //
01584   // FIXME: We really should do custom legalization for addition and
01585   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01586   // than generic legalization for 64-bit multiplication-with-overflow, though.
01587   for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
01588     // Add/Sub/Mul with overflow operations are custom lowered.
01589     MVT VT = IntVTs[i];
01590     setOperationAction(ISD::SADDO, VT, Custom);
01591     setOperationAction(ISD::UADDO, VT, Custom);
01592     setOperationAction(ISD::SSUBO, VT, Custom);
01593     setOperationAction(ISD::USUBO, VT, Custom);
01594     setOperationAction(ISD::SMULO, VT, Custom);
01595     setOperationAction(ISD::UMULO, VT, Custom);
01596   }
01597 
01598   // There are no 8-bit 3-address imul/mul instructions
01599   setOperationAction(ISD::SMULO, MVT::i8, Expand);
01600   setOperationAction(ISD::UMULO, MVT::i8, Expand);
01601 
01602   if (!Subtarget->is64Bit()) {
01603     // These libcalls are not available in 32-bit.
01604     setLibcallName(RTLIB::SHL_I128, nullptr);
01605     setLibcallName(RTLIB::SRL_I128, nullptr);
01606     setLibcallName(RTLIB::SRA_I128, nullptr);
01607   }
01608 
01609   // Combine sin / cos into one node or libcall if possible.
01610   if (Subtarget->hasSinCos()) {
01611     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01612     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01613     if (Subtarget->isTargetDarwin()) {
01614       // For MacOSX, we don't want to the normal expansion of a libcall to
01615       // sincos. We want to issue a libcall to __sincos_stret to avoid memory
01616       // traffic.
01617       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01618       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01619     }
01620   }
01621 
01622   if (Subtarget->isTargetWin64()) {
01623     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01624     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01625     setOperationAction(ISD::SREM, MVT::i128, Custom);
01626     setOperationAction(ISD::UREM, MVT::i128, Custom);
01627     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01628     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01629   }
01630 
01631   // We have target-specific dag combine patterns for the following nodes:
01632   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01633   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01634   setTargetDAGCombine(ISD::VSELECT);
01635   setTargetDAGCombine(ISD::SELECT);
01636   setTargetDAGCombine(ISD::SHL);
01637   setTargetDAGCombine(ISD::SRA);
01638   setTargetDAGCombine(ISD::SRL);
01639   setTargetDAGCombine(ISD::OR);
01640   setTargetDAGCombine(ISD::AND);
01641   setTargetDAGCombine(ISD::ADD);
01642   setTargetDAGCombine(ISD::FADD);
01643   setTargetDAGCombine(ISD::FSUB);
01644   setTargetDAGCombine(ISD::FMA);
01645   setTargetDAGCombine(ISD::SUB);
01646   setTargetDAGCombine(ISD::LOAD);
01647   setTargetDAGCombine(ISD::STORE);
01648   setTargetDAGCombine(ISD::ZERO_EXTEND);
01649   setTargetDAGCombine(ISD::ANY_EXTEND);
01650   setTargetDAGCombine(ISD::SIGN_EXTEND);
01651   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01652   setTargetDAGCombine(ISD::TRUNCATE);
01653   setTargetDAGCombine(ISD::SINT_TO_FP);
01654   setTargetDAGCombine(ISD::SETCC);
01655   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
01656   setTargetDAGCombine(ISD::BUILD_VECTOR);
01657   if (Subtarget->is64Bit())
01658     setTargetDAGCombine(ISD::MUL);
01659   setTargetDAGCombine(ISD::XOR);
01660 
01661   computeRegisterProperties();
01662 
01663   // On Darwin, -Os means optimize for size without hurting performance,
01664   // do not reduce the limit.
01665   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01666   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
01667   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01668   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01669   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01670   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01671   setPrefLoopAlignment(4); // 2^4 bytes.
01672 
01673   // Predictable cmov don't hurt on atom because it's in-order.
01674   PredictableSelectIsExpensive = !Subtarget->isAtom();
01675 
01676   setPrefFunctionAlignment(4); // 2^4 bytes.
01677 
01678   verifyIntrinsicTables();
01679 }
01680 
01681 // This has so far only been implemented for 64-bit MachO.
01682 bool X86TargetLowering::useLoadStackGuardNode() const {
01683   return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO &&
01684          Subtarget->is64Bit();
01685 }
01686 
01687 TargetLoweringBase::LegalizeTypeAction
01688 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01689   if (ExperimentalVectorWideningLegalization &&
01690       VT.getVectorNumElements() != 1 &&
01691       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01692     return TypeWidenVector;
01693 
01694   return TargetLoweringBase::getPreferredVectorAction(VT);
01695 }
01696 
01697 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01698   if (!VT.isVector())
01699     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01700 
01701   const unsigned NumElts = VT.getVectorNumElements();
01702   const EVT EltVT = VT.getVectorElementType();
01703   if (VT.is512BitVector()) {
01704     if (Subtarget->hasAVX512())
01705       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01706           EltVT == MVT::f32 || EltVT == MVT::f64)
01707         switch(NumElts) {
01708         case  8: return MVT::v8i1;
01709         case 16: return MVT::v16i1;
01710       }
01711     if (Subtarget->hasBWI())
01712       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01713         switch(NumElts) {
01714         case 32: return MVT::v32i1;
01715         case 64: return MVT::v64i1;
01716       }
01717   }
01718 
01719   if (VT.is256BitVector() || VT.is128BitVector()) {
01720     if (Subtarget->hasVLX())
01721       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01722           EltVT == MVT::f32 || EltVT == MVT::f64)
01723         switch(NumElts) {
01724         case 2: return MVT::v2i1;
01725         case 4: return MVT::v4i1;
01726         case 8: return MVT::v8i1;
01727       }
01728     if (Subtarget->hasBWI() && Subtarget->hasVLX())
01729       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01730         switch(NumElts) {
01731         case  8: return MVT::v8i1;
01732         case 16: return MVT::v16i1;
01733         case 32: return MVT::v32i1;
01734       }
01735   }
01736 
01737   return VT.changeVectorElementTypeToInteger();
01738 }
01739 
01740 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
01741 /// the desired ByVal argument alignment.
01742 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01743   if (MaxAlign == 16)
01744     return;
01745   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01746     if (VTy->getBitWidth() == 128)
01747       MaxAlign = 16;
01748   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01749     unsigned EltAlign = 0;
01750     getMaxByValAlign(ATy->getElementType(), EltAlign);
01751     if (EltAlign > MaxAlign)
01752       MaxAlign = EltAlign;
01753   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01754     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
01755       unsigned EltAlign = 0;
01756       getMaxByValAlign(STy->getElementType(i), EltAlign);
01757       if (EltAlign > MaxAlign)
01758         MaxAlign = EltAlign;
01759       if (MaxAlign == 16)
01760         break;
01761     }
01762   }
01763 }
01764 
01765 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
01766 /// function arguments in the caller parameter area. For X86, aggregates
01767 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01768 /// are at 4-byte boundaries.
01769 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
01770   if (Subtarget->is64Bit()) {
01771     // Max of 8 and alignment of type.
01772     unsigned TyAlign = TD->getABITypeAlignment(Ty);
01773     if (TyAlign > 8)
01774       return TyAlign;
01775     return 8;
01776   }
01777 
01778   unsigned Align = 4;
01779   if (Subtarget->hasSSE1())
01780     getMaxByValAlign(Ty, Align);
01781   return Align;
01782 }
01783 
01784 /// getOptimalMemOpType - Returns the target specific optimal type for load
01785 /// and store operations as a result of memset, memcpy, and memmove
01786 /// lowering. If DstAlign is zero that means it's safe to destination
01787 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01788 /// means there isn't a need to check it against alignment requirement,
01789 /// probably because the source does not need to be loaded. If 'IsMemset' is
01790 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01791 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01792 /// source is constant so it does not need to be loaded.
01793 /// It returns EVT::Other if the type should be determined using generic
01794 /// target-independent logic.
01795 EVT
01796 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01797                                        unsigned DstAlign, unsigned SrcAlign,
01798                                        bool IsMemset, bool ZeroMemset,
01799                                        bool MemcpyStrSrc,
01800                                        MachineFunction &MF) const {
01801   const Function *F = MF.getFunction();
01802   if ((!IsMemset || ZeroMemset) &&
01803       !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
01804                                        Attribute::NoImplicitFloat)) {
01805     if (Size >= 16 &&
01806         (Subtarget->isUnalignedMemAccessFast() ||
01807          ((DstAlign == 0 || DstAlign >= 16) &&
01808           (SrcAlign == 0 || SrcAlign >= 16)))) {
01809       if (Size >= 32) {
01810         if (Subtarget->hasInt256())
01811           return MVT::v8i32;
01812         if (Subtarget->hasFp256())
01813           return MVT::v8f32;
01814       }
01815       if (Subtarget->hasSSE2())
01816         return MVT::v4i32;
01817       if (Subtarget->hasSSE1())
01818         return MVT::v4f32;
01819     } else if (!MemcpyStrSrc && Size >= 8 &&
01820                !Subtarget->is64Bit() &&
01821                Subtarget->hasSSE2()) {
01822       // Do not use f64 to lower memcpy if source is string constant. It's
01823       // better to use i32 to avoid the loads.
01824       return MVT::f64;
01825     }
01826   }
01827   if (Subtarget->is64Bit() && Size >= 8)
01828     return MVT::i64;
01829   return MVT::i32;
01830 }
01831 
01832 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
01833   if (VT == MVT::f32)
01834     return X86ScalarSSEf32;
01835   else if (VT == MVT::f64)
01836     return X86ScalarSSEf64;
01837   return true;
01838 }
01839 
01840 bool
01841 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
01842                                                   unsigned,
01843                                                   unsigned,
01844                                                   bool *Fast) const {
01845   if (Fast)
01846     *Fast = Subtarget->isUnalignedMemAccessFast();
01847   return true;
01848 }
01849 
01850 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
01851 /// current function.  The returned value is a member of the
01852 /// MachineJumpTableInfo::JTEntryKind enum.
01853 unsigned X86TargetLowering::getJumpTableEncoding() const {
01854   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
01855   // symbol.
01856   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
01857       Subtarget->isPICStyleGOT())
01858     return MachineJumpTableInfo::EK_Custom32;
01859 
01860   // Otherwise, use the normal jump table encoding heuristics.
01861   return TargetLowering::getJumpTableEncoding();
01862 }
01863 
01864 const MCExpr *
01865 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
01866                                              const MachineBasicBlock *MBB,
01867                                              unsigned uid,MCContext &Ctx) const{
01868   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
01869          Subtarget->isPICStyleGOT());
01870   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
01871   // entries.
01872   return MCSymbolRefExpr::Create(MBB->getSymbol(),
01873                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
01874 }
01875 
01876 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
01877 /// jumptable.
01878 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
01879                                                     SelectionDAG &DAG) const {
01880   if (!Subtarget->is64Bit())
01881     // This doesn't have SDLoc associated with it, but is not really the
01882     // same as a Register.
01883     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
01884   return Table;
01885 }
01886 
01887 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
01888 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
01889 /// MCExpr.
01890 const MCExpr *X86TargetLowering::
01891 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
01892                              MCContext &Ctx) const {
01893   // X86-64 uses RIP relative addressing based on the jump table label.
01894   if (Subtarget->isPICStyleRIPRel())
01895     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
01896 
01897   // Otherwise, the reference is relative to the PIC base.
01898   return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
01899 }
01900 
01901 // FIXME: Why this routine is here? Move to RegInfo!
01902 std::pair<const TargetRegisterClass*, uint8_t>
01903 X86TargetLowering::findRepresentativeClass(MVT VT) const{
01904   const TargetRegisterClass *RRC = nullptr;
01905   uint8_t Cost = 1;
01906   switch (VT.SimpleTy) {
01907   default:
01908     return TargetLowering::findRepresentativeClass(VT);
01909   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
01910     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
01911     break;
01912   case MVT::x86mmx:
01913     RRC = &X86::VR64RegClass;
01914     break;
01915   case MVT::f32: case MVT::f64:
01916   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01917   case MVT::v4f32: case MVT::v2f64:
01918   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
01919   case MVT::v4f64:
01920     RRC = &X86::VR128RegClass;
01921     break;
01922   }
01923   return std::make_pair(RRC, Cost);
01924 }
01925 
01926 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
01927                                                unsigned &Offset) const {
01928   if (!Subtarget->isTargetLinux())
01929     return false;
01930 
01931   if (Subtarget->is64Bit()) {
01932     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
01933     Offset = 0x28;
01934     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
01935       AddressSpace = 256;
01936     else
01937       AddressSpace = 257;
01938   } else {
01939     // %gs:0x14 on i386
01940     Offset = 0x14;
01941     AddressSpace = 256;
01942   }
01943   return true;
01944 }
01945 
01946 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
01947                                             unsigned DestAS) const {
01948   assert(SrcAS != DestAS && "Expected different address spaces!");
01949 
01950   return SrcAS < 256 && DestAS < 256;
01951 }
01952 
01953 //===----------------------------------------------------------------------===//
01954 //               Return Value Calling Convention Implementation
01955 //===----------------------------------------------------------------------===//
01956 
01957 #include "X86GenCallingConv.inc"
01958 
01959 bool
01960 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
01961                                   MachineFunction &MF, bool isVarArg,
01962                         const SmallVectorImpl<ISD::OutputArg> &Outs,
01963                         LLVMContext &Context) const {
01964   SmallVector<CCValAssign, 16> RVLocs;
01965   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
01966   return CCInfo.CheckReturn(Outs, RetCC_X86);
01967 }
01968 
01969 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
01970   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
01971   return ScratchRegs;
01972 }
01973 
01974 SDValue
01975 X86TargetLowering::LowerReturn(SDValue Chain,
01976                                CallingConv::ID CallConv, bool isVarArg,
01977                                const SmallVectorImpl<ISD::OutputArg> &Outs,
01978                                const SmallVectorImpl<SDValue> &OutVals,
01979                                SDLoc dl, SelectionDAG &DAG) const {
01980   MachineFunction &MF = DAG.getMachineFunction();
01981   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
01982 
01983   SmallVector<CCValAssign, 16> RVLocs;
01984   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
01985   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
01986 
01987   SDValue Flag;
01988   SmallVector<SDValue, 6> RetOps;
01989   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
01990   // Operand #1 = Bytes To Pop
01991   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
01992                    MVT::i16));
01993 
01994   // Copy the result values into the output registers.
01995   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01996     CCValAssign &VA = RVLocs[i];
01997     assert(VA.isRegLoc() && "Can only return in registers!");
01998     SDValue ValToCopy = OutVals[i];
01999     EVT ValVT = ValToCopy.getValueType();
02000 
02001     // Promote values to the appropriate types
02002     if (VA.getLocInfo() == CCValAssign::SExt)
02003       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
02004     else if (VA.getLocInfo() == CCValAssign::ZExt)
02005       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
02006     else if (VA.getLocInfo() == CCValAssign::AExt)
02007       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
02008     else if (VA.getLocInfo() == CCValAssign::BCvt)
02009       ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
02010 
02011     assert(VA.getLocInfo() != CCValAssign::FPExt &&
02012            "Unexpected FP-extend for return value.");  
02013 
02014     // If this is x86-64, and we disabled SSE, we can't return FP values,
02015     // or SSE or MMX vectors.
02016     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
02017          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
02018           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
02019       report_fatal_error("SSE register return with SSE disabled");
02020     }
02021     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
02022     // llvm-gcc has never done it right and no one has noticed, so this
02023     // should be OK for now.
02024     if (ValVT == MVT::f64 &&
02025         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
02026       report_fatal_error("SSE2 register return with SSE2 disabled");
02027 
02028     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
02029     // the RET instruction and handled by the FP Stackifier.
02030     if (VA.getLocReg() == X86::FP0 ||
02031         VA.getLocReg() == X86::FP1) {
02032       // If this is a copy from an xmm register to ST(0), use an FPExtend to
02033       // change the value to the FP stack register class.
02034       if (isScalarFPTypeInSSEReg(VA.getValVT()))
02035         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
02036       RetOps.push_back(ValToCopy);
02037       // Don't emit a copytoreg.
02038       continue;
02039     }
02040 
02041     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
02042     // which is returned in RAX / RDX.
02043     if (Subtarget->is64Bit()) {
02044       if (ValVT == MVT::x86mmx) {
02045         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
02046           ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
02047           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
02048                                   ValToCopy);
02049           // If we don't have SSE2 available, convert to v4f32 so the generated
02050           // register is legal.
02051           if (!Subtarget->hasSSE2())
02052             ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
02053         }
02054       }
02055     }
02056 
02057     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
02058     Flag = Chain.getValue(1);
02059     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02060   }
02061 
02062   // The x86-64 ABIs require that for returning structs by value we copy
02063   // the sret argument into %rax/%eax (depending on ABI) for the return.
02064   // Win32 requires us to put the sret argument to %eax as well.
02065   // We saved the argument into a virtual register in the entry block,
02066   // so now we copy the value out and into %rax/%eax.
02067   if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
02068       (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
02069     MachineFunction &MF = DAG.getMachineFunction();
02070     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02071     unsigned Reg = FuncInfo->getSRetReturnReg();
02072     assert(Reg &&
02073            "SRetReturnReg should have been set in LowerFormalArguments().");
02074     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
02075 
02076     unsigned RetValReg
02077         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02078           X86::RAX : X86::EAX;
02079     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02080     Flag = Chain.getValue(1);
02081 
02082     // RAX/EAX now acts like a return value.
02083     RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
02084   }
02085 
02086   RetOps[0] = Chain;  // Update chain.
02087 
02088   // Add the flag if we have it.
02089   if (Flag.getNode())
02090     RetOps.push_back(Flag);
02091 
02092   return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
02093 }
02094 
02095 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02096   if (N->getNumValues() != 1)
02097     return false;
02098   if (!N->hasNUsesOfValue(1, 0))
02099     return false;
02100 
02101   SDValue TCChain = Chain;
02102   SDNode *Copy = *N->use_begin();
02103   if (Copy->getOpcode() == ISD::CopyToReg) {
02104     // If the copy has a glue operand, we conservatively assume it isn't safe to
02105     // perform a tail call.
02106     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02107       return false;
02108     TCChain = Copy->getOperand(0);
02109   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02110     return false;
02111 
02112   bool HasRet = false;
02113   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02114        UI != UE; ++UI) {
02115     if (UI->getOpcode() != X86ISD::RET_FLAG)
02116       return false;
02117     // If we are returning more than one value, we can definitely
02118     // not make a tail call see PR19530
02119     if (UI->getNumOperands() > 4)
02120       return false;
02121     if (UI->getNumOperands() == 4 &&
02122         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
02123       return false;
02124     HasRet = true;
02125   }
02126 
02127   if (!HasRet)
02128     return false;
02129 
02130   Chain = TCChain;
02131   return true;
02132 }
02133 
02134 EVT
02135 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02136                                             ISD::NodeType ExtendKind) const {
02137   MVT ReturnMVT;
02138   // TODO: Is this also valid on 32-bit?
02139   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02140     ReturnMVT = MVT::i8;
02141   else
02142     ReturnMVT = MVT::i32;
02143 
02144   EVT MinVT = getRegisterType(Context, ReturnMVT);
02145   return VT.bitsLT(MinVT) ? MinVT : VT;
02146 }
02147 
02148 /// LowerCallResult - Lower the result values of a call into the
02149 /// appropriate copies out of appropriate physical registers.
02150 ///
02151 SDValue
02152 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02153                                    CallingConv::ID CallConv, bool isVarArg,
02154                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02155                                    SDLoc dl, SelectionDAG &DAG,
02156                                    SmallVectorImpl<SDValue> &InVals) const {
02157 
02158   // Assign locations to each value returned by this call.
02159   SmallVector<CCValAssign, 16> RVLocs;
02160   bool Is64Bit = Subtarget->is64Bit();
02161   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02162                  *DAG.getContext());
02163   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02164 
02165   // Copy all of the result registers out of their specified physreg.
02166   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02167     CCValAssign &VA = RVLocs[i];
02168     EVT CopyVT = VA.getValVT();
02169 
02170     // If this is x86-64, and we disabled SSE, we can't return FP values
02171     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02172         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02173       report_fatal_error("SSE register return with SSE disabled");
02174     }
02175 
02176     // If we prefer to use the value in xmm registers, copy it out as f80 and
02177     // use a truncate to move it from fp stack reg to xmm reg.
02178     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02179         isScalarFPTypeInSSEReg(VA.getValVT()))
02180       CopyVT = MVT::f80;
02181 
02182     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02183                                CopyVT, InFlag).getValue(1);
02184     SDValue Val = Chain.getValue(0);
02185 
02186     if (CopyVT != VA.getValVT())
02187       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02188                         // This truncation won't change the value.
02189                         DAG.getIntPtrConstant(1));
02190 
02191     InFlag = Chain.getValue(2);
02192     InVals.push_back(Val);
02193   }
02194 
02195   return Chain;
02196 }
02197 
02198 //===----------------------------------------------------------------------===//
02199 //                C & StdCall & Fast Calling Convention implementation
02200 //===----------------------------------------------------------------------===//
02201 //  StdCall calling convention seems to be standard for many Windows' API
02202 //  routines and around. It differs from C calling convention just a little:
02203 //  callee should clean up the stack, not caller. Symbols should be also
02204 //  decorated in some fancy way :) It doesn't support any vector arguments.
02205 //  For info on fast calling convention see Fast Calling Convention (tail call)
02206 //  implementation LowerX86_32FastCCCallTo.
02207 
02208 /// CallIsStructReturn - Determines whether a call uses struct return
02209 /// semantics.
02210 enum StructReturnType {
02211   NotStructReturn,
02212   RegStructReturn,
02213   StackStructReturn
02214 };
02215 static StructReturnType
02216 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
02217   if (Outs.empty())
02218     return NotStructReturn;
02219 
02220   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02221   if (!Flags.isSRet())
02222     return NotStructReturn;
02223   if (Flags.isInReg())
02224     return RegStructReturn;
02225   return StackStructReturn;
02226 }
02227 
02228 /// ArgsAreStructReturn - Determines whether a function uses struct
02229 /// return semantics.
02230 static StructReturnType
02231 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
02232   if (Ins.empty())
02233     return NotStructReturn;
02234 
02235   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02236   if (!Flags.isSRet())
02237     return NotStructReturn;
02238   if (Flags.isInReg())
02239     return RegStructReturn;
02240   return StackStructReturn;
02241 }
02242 
02243 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
02244 /// by "Src" to address "Dst" with size and alignment information specified by
02245 /// the specific parameter attribute. The copy will be passed as a byval
02246 /// function parameter.
02247 static SDValue
02248 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02249                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02250                           SDLoc dl) {
02251   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
02252 
02253   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02254                        /*isVolatile*/false, /*AlwaysInline=*/true,
02255                        MachinePointerInfo(), MachinePointerInfo());
02256 }
02257 
02258 /// IsTailCallConvention - Return true if the calling convention is one that
02259 /// supports tail call optimization.
02260 static bool IsTailCallConvention(CallingConv::ID CC) {
02261   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02262           CC == CallingConv::HiPE);
02263 }
02264 
02265 /// \brief Return true if the calling convention is a C calling convention.
02266 static bool IsCCallConvention(CallingConv::ID CC) {
02267   return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
02268           CC == CallingConv::X86_64_SysV);
02269 }
02270 
02271 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02272   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02273     return false;
02274 
02275   CallSite CS(CI);
02276   CallingConv::ID CalleeCC = CS.getCallingConv();
02277   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
02278     return false;
02279 
02280   return true;
02281 }
02282 
02283 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
02284 /// a tailcall target by changing its ABI.
02285 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
02286                                    bool GuaranteedTailCallOpt) {
02287   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
02288 }
02289 
02290 SDValue
02291 X86TargetLowering::LowerMemArgument(SDValue Chain,
02292                                     CallingConv::ID CallConv,
02293                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02294                                     SDLoc dl, SelectionDAG &DAG,
02295                                     const CCValAssign &VA,
02296                                     MachineFrameInfo *MFI,
02297                                     unsigned i) const {
02298   // Create the nodes corresponding to a load from this parameter slot.
02299   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02300   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
02301       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02302   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02303   EVT ValVT;
02304 
02305   // If value is passed by pointer we have address passed instead of the value
02306   // itself.
02307   if (VA.getLocInfo() == CCValAssign::Indirect)
02308     ValVT = VA.getLocVT();
02309   else
02310     ValVT = VA.getValVT();
02311 
02312   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02313   // changed with more analysis.
02314   // In case of tail call optimization mark all arguments mutable. Since they
02315   // could be overwritten by lowering of arguments in case of a tail call.
02316   if (Flags.isByVal()) {
02317     unsigned Bytes = Flags.getByValSize();
02318     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02319     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02320     return DAG.getFrameIndex(FI, getPointerTy());
02321   } else {
02322     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02323                                     VA.getLocMemOffset(), isImmutable);
02324     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02325     return DAG.getLoad(ValVT, dl, Chain, FIN,
02326                        MachinePointerInfo::getFixedStack(FI),
02327                        false, false, false, 0);
02328   }
02329 }
02330 
02331 // FIXME: Get this from tablegen.
02332 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
02333                                                 const X86Subtarget *Subtarget) {
02334   assert(Subtarget->is64Bit());
02335 
02336   if (Subtarget->isCallingConvWin64(CallConv)) {
02337     static const MCPhysReg GPR64ArgRegsWin64[] = {
02338       X86::RCX, X86::RDX, X86::R8,  X86::R9
02339     };
02340     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
02341   }
02342 
02343   static const MCPhysReg GPR64ArgRegs64Bit[] = {
02344     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02345   };
02346   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
02347 }
02348 
02349 // FIXME: Get this from tablegen.
02350 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
02351                                                 CallingConv::ID CallConv,
02352                                                 const X86Subtarget *Subtarget) {
02353   assert(Subtarget->is64Bit());
02354   if (Subtarget->isCallingConvWin64(CallConv)) {
02355     // The XMM registers which might contain var arg parameters are shadowed
02356     // in their paired GPR.  So we only need to save the GPR to their home
02357     // slots.
02358     // TODO: __vectorcall will change this.
02359     return None;
02360   }
02361 
02362   const Function *Fn = MF.getFunction();
02363   bool NoImplicitFloatOps = Fn->getAttributes().
02364       hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
02365   assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
02366          "SSE register cannot be used when SSE is disabled!");
02367   if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
02368       !Subtarget->hasSSE1())
02369     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
02370     // registers.
02371     return None;
02372 
02373   static const MCPhysReg XMMArgRegs64Bit[] = {
02374     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02375     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02376   };
02377   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
02378 }
02379 
02380 SDValue
02381 X86TargetLowering::LowerFormalArguments(SDValue Chain,
02382                                         CallingConv::ID CallConv,
02383                                         bool isVarArg,
02384                                       const SmallVectorImpl<ISD::InputArg> &Ins,
02385                                         SDLoc dl,
02386                                         SelectionDAG &DAG,
02387                                         SmallVectorImpl<SDValue> &InVals)
02388                                           const {
02389   MachineFunction &MF = DAG.getMachineFunction();
02390   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02391 
02392   const Function* Fn = MF.getFunction();
02393   if (Fn->hasExternalLinkage() &&
02394       Subtarget->isTargetCygMing() &&
02395       Fn->getName() == "main")
02396     FuncInfo->setForceFramePointer(true);
02397 
02398   MachineFrameInfo *MFI = MF.getFrameInfo();
02399   bool Is64Bit = Subtarget->is64Bit();
02400   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02401 
02402   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02403          "Var args not supported with calling convention fastcc, ghc or hipe");
02404 
02405   // Assign locations to all of the incoming arguments.
02406   SmallVector<CCValAssign, 16> ArgLocs;
02407   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02408 
02409   // Allocate shadow area for Win64
02410   if (IsWin64)
02411     CCInfo.AllocateStack(32, 8);
02412 
02413   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02414 
02415   unsigned LastVal = ~0U;
02416   SDValue ArgValue;
02417   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02418     CCValAssign &VA = ArgLocs[i];
02419     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02420     // places.
02421     assert(VA.getValNo() != LastVal &&
02422            "Don't support value assigned to multiple locs yet");
02423     (void)LastVal;
02424     LastVal = VA.getValNo();
02425 
02426     if (VA.isRegLoc()) {
02427       EVT RegVT = VA.getLocVT();
02428       const TargetRegisterClass *RC;
02429       if (RegVT == MVT::i32)
02430         RC = &X86::GR32RegClass;
02431       else if (Is64Bit && RegVT == MVT::i64)
02432         RC = &X86::GR64RegClass;
02433       else if (RegVT == MVT::f32)
02434         RC = &X86::FR32RegClass;
02435       else if (RegVT == MVT::f64)
02436         RC = &X86::FR64RegClass;
02437       else if (RegVT.is512BitVector())
02438         RC = &X86::VR512RegClass;
02439       else if (RegVT.is256BitVector())
02440         RC = &X86::VR256RegClass;
02441       else if (RegVT.is128BitVector())
02442         RC = &X86::VR128RegClass;
02443       else if (RegVT == MVT::x86mmx)
02444         RC = &X86::VR64RegClass;
02445       else if (RegVT == MVT::i1)
02446         RC = &X86::VK1RegClass;
02447       else if (RegVT == MVT::v8i1)
02448         RC = &X86::VK8RegClass;
02449       else if (RegVT == MVT::v16i1)
02450         RC = &X86::VK16RegClass;
02451       else if (RegVT == MVT::v32i1)
02452         RC = &X86::VK32RegClass;
02453       else if (RegVT == MVT::v64i1)
02454         RC = &X86::VK64RegClass;
02455       else
02456         llvm_unreachable("Unknown argument type!");
02457 
02458       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02459       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02460 
02461       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02462       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02463       // right size.
02464       if (VA.getLocInfo() == CCValAssign::SExt)
02465         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02466                                DAG.getValueType(VA.getValVT()));
02467       else if (VA.getLocInfo() == CCValAssign::ZExt)
02468         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02469                                DAG.getValueType(VA.getValVT()));
02470       else if (VA.getLocInfo() == CCValAssign::BCvt)
02471         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
02472 
02473       if (VA.isExtInLoc()) {
02474         // Handle MMX values passed in XMM regs.
02475         if (RegVT.isVector())
02476           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02477         else
02478           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02479       }
02480     } else {
02481       assert(VA.isMemLoc());
02482       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02483     }
02484 
02485     // If value is passed via pointer - do a load.
02486     if (VA.getLocInfo() == CCValAssign::Indirect)
02487       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02488                              MachinePointerInfo(), false, false, false, 0);
02489 
02490     InVals.push_back(ArgValue);
02491   }
02492 
02493   if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
02494     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02495       // The x86-64 ABIs require that for returning structs by value we copy
02496       // the sret argument into %rax/%eax (depending on ABI) for the return.
02497       // Win32 requires us to put the sret argument to %eax as well.
02498       // Save the argument into a virtual register so that we can access it
02499       // from the return points.
02500       if (Ins[i].Flags.isSRet()) {
02501         unsigned Reg = FuncInfo->getSRetReturnReg();
02502         if (!Reg) {
02503           MVT PtrTy = getPointerTy();
02504           Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02505           FuncInfo->setSRetReturnReg(Reg);
02506         }
02507         SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02508         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02509         break;
02510       }
02511     }
02512   }
02513 
02514   unsigned StackSize = CCInfo.getNextStackOffset();
02515   // Align stack specially for tail calls.
02516   if (FuncIsMadeTailCallSafe(CallConv,
02517                              MF.getTarget().Options.GuaranteedTailCallOpt))
02518     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02519 
02520   // If the function takes variable number of arguments, make a frame index for
02521   // the start of the first vararg value... for expansion of llvm.va_start. We
02522   // can skip this if there are no va_start calls.
02523   if (MFI->hasVAStart() &&
02524       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02525                    CallConv != CallingConv::X86_ThisCall))) {
02526     FuncInfo->setVarArgsFrameIndex(
02527         MFI->CreateFixedObject(1, StackSize, true));
02528   }
02529 
02530   // 64-bit calling conventions support varargs and register parameters, so we
02531   // have to do extra work to spill them in the prologue or forward them to
02532   // musttail calls.
02533   if (Is64Bit && isVarArg &&
02534       (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
02535     // Find the first unallocated argument registers.
02536     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
02537     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
02538     unsigned NumIntRegs =
02539         CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
02540     unsigned NumXMMRegs =
02541         CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
02542     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02543            "SSE register cannot be used when SSE is disabled!");
02544 
02545     // Gather all the live in physical registers.
02546     SmallVector<SDValue, 6> LiveGPRs;
02547     SmallVector<SDValue, 8> LiveXMMRegs;
02548     SDValue ALVal;
02549     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
02550       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
02551       LiveGPRs.push_back(
02552           DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
02553     }
02554     if (!ArgXMMs.empty()) {
02555       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02556       ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
02557       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
02558         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
02559         LiveXMMRegs.push_back(
02560             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
02561       }
02562     }
02563 
02564     // Store them to the va_list returned by va_start.
02565     if (MFI->hasVAStart()) {
02566       if (IsWin64) {
02567         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
02568         // Get to the caller-allocated home save location.  Add 8 to account
02569         // for the return address.
02570         int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02571         FuncInfo->setRegSaveFrameIndex(
02572           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02573         // Fixup to set vararg frame on shadow area (4 x i64).
02574         if (NumIntRegs < 4)
02575           FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02576       } else {
02577         // For X86-64, if there are vararg parameters that are passed via
02578         // registers, then we must store them to their spots on the stack so
02579         // they may be loaded by deferencing the result of va_next.
02580         FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02581         FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
02582         FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
02583             ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
02584       }
02585 
02586       // Store the integer parameter registers.
02587       SmallVector<SDValue, 8> MemOps;
02588       SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02589                                         getPointerTy());
02590       unsigned Offset = FuncInfo->getVarArgsGPOffset();
02591       for (SDValue Val : LiveGPRs) {
02592         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
02593                                   DAG.getIntPtrConstant(Offset));
02594         SDValue Store =
02595           DAG.getStore(Val.getValue(1), dl, Val, FIN,
02596                        MachinePointerInfo::getFixedStack(
02597                          FuncInfo->getRegSaveFrameIndex(), Offset),
02598                        false, false, 0);
02599         MemOps.push_back(Store);
02600         Offset += 8;
02601       }
02602 
02603       if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
02604         // Now store the XMM (fp + vector) parameter registers.
02605         SmallVector<SDValue, 12> SaveXMMOps;
02606         SaveXMMOps.push_back(Chain);
02607         SaveXMMOps.push_back(ALVal);
02608         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02609                                FuncInfo->getRegSaveFrameIndex()));
02610         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02611                                FuncInfo->getVarArgsFPOffset()));
02612         SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
02613                           LiveXMMRegs.end());
02614         MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02615                                      MVT::Other, SaveXMMOps));
02616       }
02617 
02618       if (!MemOps.empty())
02619         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02620     } else {
02621       // Add all GPRs, al, and XMMs to the list of forwards.  We will add then
02622       // to the liveout set on a musttail call.
02623       assert(MFI->hasMustTailInVarArgFunc());
02624       auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
02625       typedef X86MachineFunctionInfo::Forward Forward;
02626 
02627       for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
02628         unsigned VReg =
02629             MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
02630         Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
02631         Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
02632       }
02633 
02634       if (!ArgXMMs.empty()) {
02635         unsigned ALVReg =
02636             MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
02637         Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
02638         Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
02639 
02640         for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
02641           unsigned VReg =
02642               MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
02643           Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
02644           Forwards.push_back(
02645               Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
02646         }
02647       }
02648     }
02649   }
02650 
02651   // Some CCs need callee pop.
02652   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02653                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02654     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02655   } else {
02656     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02657     // If this is an sret function, the return should pop the hidden pointer.
02658     if (!Is64Bit && !IsTailCallConvention(CallConv) &&
02659         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02660         argsAreStructReturn(Ins) == StackStructReturn)
02661       FuncInfo->setBytesToPopOnReturn(4);
02662   }
02663 
02664   if (!Is64Bit) {
02665     // RegSaveFrameIndex is X86-64 only.
02666     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02667     if (CallConv == CallingConv::X86_FastCall ||
02668         CallConv == CallingConv::X86_ThisCall)
02669       // fastcc functions can't have varargs.
02670       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02671   }
02672 
02673   FuncInfo->setArgumentStackSize(StackSize);
02674 
02675   return Chain;
02676 }
02677 
02678 SDValue
02679 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
02680                                     SDValue StackPtr, SDValue Arg,
02681                                     SDLoc dl, SelectionDAG &DAG,
02682                                     const CCValAssign &VA,
02683                                     ISD::ArgFlagsTy Flags) const {
02684   unsigned LocMemOffset = VA.getLocMemOffset();
02685   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
02686   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
02687   if (Flags.isByVal())
02688     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
02689 
02690   return DAG.getStore(Chain, dl, Arg, PtrOff,
02691                       MachinePointerInfo::getStack(LocMemOffset),
02692                       false, false, 0);
02693 }
02694 
02695 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
02696 /// optimization is performed and it is required.
02697 SDValue
02698 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
02699                                            SDValue &OutRetAddr, SDValue Chain,
02700                                            bool IsTailCall, bool Is64Bit,
02701                                            int FPDiff, SDLoc dl) const {
02702   // Adjust the Return address stack slot.
02703   EVT VT = getPointerTy();
02704   OutRetAddr = getReturnAddressFrameIndex(DAG);
02705 
02706   // Load the "old" Return address.
02707   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
02708                            false, false, false, 0);
02709   return SDValue(OutRetAddr.getNode(), 1);
02710 }
02711 
02712 /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
02713 /// optimization is performed and it is required (FPDiff!=0).
02714 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
02715                                         SDValue Chain, SDValue RetAddrFrIdx,
02716                                         EVT PtrVT, unsigned SlotSize,
02717                                         int FPDiff, SDLoc dl) {
02718   // Store the return address to the appropriate stack slot.
02719   if (!FPDiff) return Chain;
02720   // Calculate the new stack slot for the return address.
02721   int NewReturnAddrFI =
02722     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
02723                                          false);
02724   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
02725   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
02726                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
02727                        false, false, 0);
02728   return Chain;
02729 }
02730 
02731 SDValue
02732 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
02733                              SmallVectorImpl<SDValue> &InVals) const {
02734   SelectionDAG &DAG                     = CLI.DAG;
02735   SDLoc &dl                             = CLI.DL;
02736   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
02737   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
02738   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
02739   SDValue Chain                         = CLI.Chain;
02740   SDValue Callee                        = CLI.Callee;
02741   CallingConv::ID CallConv              = CLI.CallConv;
02742   bool &isTailCall                      = CLI.IsTailCall;
02743   bool isVarArg                         = CLI.IsVarArg;
02744 
02745   MachineFunction &MF = DAG.getMachineFunction();
02746   bool Is64Bit        = Subtarget->is64Bit();
02747   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
02748   StructReturnType SR = callIsStructReturn(Outs);
02749   bool IsSibcall      = false;
02750   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
02751 
02752   if (MF.getTarget().Options.DisableTailCalls)
02753     isTailCall = false;
02754 
02755   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
02756   if (IsMustTail) {
02757     // Force this to be a tail call.  The verifier rules are enough to ensure
02758     // that we can lower this successfully without moving the return address
02759     // around.
02760     isTailCall = true;
02761   } else if (isTailCall) {
02762     // Check if it's really possible to do a tail call.
02763     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
02764                     isVarArg, SR != NotStructReturn,
02765                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
02766                     Outs, OutVals, Ins, DAG);
02767 
02768     // Sibcalls are automatically detected tailcalls which do not require
02769     // ABI changes.
02770     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
02771       IsSibcall = true;
02772 
02773     if (isTailCall)
02774       ++NumTailCalls;
02775   }
02776 
02777   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02778          "Var args not supported with calling convention fastcc, ghc or hipe");
02779 
02780   // Analyze operands of the call, assigning locations to each operand.
02781   SmallVector<CCValAssign, 16> ArgLocs;
02782   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02783 
02784   // Allocate shadow area for Win64
02785   if (IsWin64)
02786     CCInfo.AllocateStack(32, 8);
02787 
02788   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
02789 
02790   // Get a count of how many bytes are to be pushed on the stack.
02791   unsigned NumBytes = CCInfo.getNextStackOffset();
02792   if (IsSibcall)
02793     // This is a sibcall. The memory operands are available in caller's
02794     // own caller's stack.
02795     NumBytes = 0;
02796   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
02797            IsTailCallConvention(CallConv))
02798     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
02799 
02800   int FPDiff = 0;
02801   if (isTailCall && !IsSibcall && !IsMustTail) {
02802     // Lower arguments at fp - stackoffset + fpdiff.
02803     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
02804 
02805     FPDiff = NumBytesCallerPushed - NumBytes;
02806 
02807     // Set the delta of movement of the returnaddr stackslot.
02808     // But only set if delta is greater than previous delta.
02809     if (FPDiff < X86Info->getTCReturnAddrDelta())
02810       X86Info->setTCReturnAddrDelta(FPDiff);
02811   }
02812 
02813   unsigned NumBytesToPush = NumBytes;
02814   unsigned NumBytesToPop = NumBytes;
02815 
02816   // If we have an inalloca argument, all stack space has already been allocated
02817   // for us and be right at the top of the stack.  We don't support multiple
02818   // arguments passed in memory when using inalloca.
02819   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
02820     NumBytesToPush = 0;
02821     if (!ArgLocs.back().isMemLoc())
02822       report_fatal_error("cannot use inalloca attribute on a register "
02823                          "parameter");
02824     if (ArgLocs.back().getLocMemOffset() != 0)
02825       report_fatal_error("any parameter with the inalloca attribute must be "
02826                          "the only memory argument");
02827   }
02828 
02829   if (!IsSibcall)
02830     Chain = DAG.getCALLSEQ_START(
02831         Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
02832 
02833   SDValue RetAddrFrIdx;
02834   // Load return address for tail calls.
02835   if (isTailCall && FPDiff)
02836     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
02837                                     Is64Bit, FPDiff, dl);
02838 
02839   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
02840   SmallVector<SDValue, 8> MemOpChains;
02841   SDValue StackPtr;
02842 
02843   // Walk the register/memloc assignments, inserting copies/loads.  In the case
02844   // of tail call optimization arguments are handle later.
02845   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
02846       DAG.getSubtarget().getRegisterInfo());
02847   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02848     // Skip inalloca arguments, they have already been written.
02849     ISD::ArgFlagsTy Flags = Outs[i].Flags;
02850     if (Flags.isInAlloca())
02851       continue;
02852 
02853     CCValAssign &VA = ArgLocs[i];
02854     EVT RegVT = VA.getLocVT();
02855     SDValue Arg = OutVals[i];
02856     bool isByVal = Flags.isByVal();
02857 
02858     // Promote the value if needed.
02859     switch (VA.getLocInfo()) {
02860     default: llvm_unreachable("Unknown loc info!");
02861     case CCValAssign::Full: break;
02862     case CCValAssign::SExt:
02863       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02864       break;
02865     case CCValAssign::ZExt:
02866       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
02867       break;
02868     case CCValAssign::AExt:
02869       if (RegVT.is128BitVector()) {
02870         // Special case: passing MMX values in XMM registers.
02871         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
02872         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
02873         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
02874       } else
02875         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
02876       break;
02877     case CCValAssign::BCvt:
02878       Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
02879       break;
02880     case CCValAssign::Indirect: {
02881       // Store the argument.
02882       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
02883       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
02884       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
02885                            MachinePointerInfo::getFixedStack(FI),
02886                            false, false, 0);
02887       Arg = SpillSlot;
02888       break;
02889     }
02890     }
02891 
02892     if (VA.isRegLoc()) {
02893       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
02894       if (isVarArg && IsWin64) {
02895         // Win64 ABI requires argument XMM reg to be copied to the corresponding
02896         // shadow reg if callee is a varargs function.
02897         unsigned ShadowReg = 0;
02898         switch (VA.getLocReg()) {
02899         case X86::XMM0: ShadowReg = X86::RCX; break;
02900         case X86::XMM1: ShadowReg = X86::RDX; break;
02901         case X86::XMM2: ShadowReg = X86::R8; break;
02902         case X86::XMM3: ShadowReg = X86::R9; break;
02903         }
02904         if (ShadowReg)
02905           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
02906       }
02907     } else if (!IsSibcall && (!isTailCall || isByVal)) {
02908       assert(VA.isMemLoc());
02909       if (!StackPtr.getNode())
02910         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
02911                                       getPointerTy());
02912       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
02913                                              dl, DAG, VA, Flags));
02914     }
02915   }
02916 
02917   if (!MemOpChains.empty())
02918     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
02919 
02920   if (Subtarget->isPICStyleGOT()) {
02921     // ELF / PIC requires GOT in the EBX register before function calls via PLT
02922     // GOT pointer.
02923     if (!isTailCall) {
02924       RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
02925                DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
02926     } else {
02927       // If we are tail calling and generating PIC/GOT style code load the
02928       // address of the callee into ECX. The value in ecx is used as target of
02929       // the tail jump. This is done to circumvent the ebx/callee-saved problem
02930       // for tail calls on PIC/GOT architectures. Normally we would just put the
02931       // address of GOT into ebx and then call target@PLT. But for tail calls
02932       // ebx would be restored (since ebx is callee saved) before jumping to the
02933       // target@PLT.
02934 
02935       // Note: The actual moving to ECX is done further down.
02936       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02937       if (G && !G->getGlobal()->hasHiddenVisibility() &&
02938           !G->getGlobal()->hasProtectedVisibility())
02939         Callee = LowerGlobalAddress(Callee, DAG);
02940       else if (isa<ExternalSymbolSDNode>(Callee))
02941         Callee = LowerExternalSymbol(Callee, DAG);
02942     }
02943   }
02944 
02945   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
02946     // From AMD64 ABI document:
02947     // For calls that may call functions that use varargs or stdargs
02948     // (prototype-less calls or calls to functions containing ellipsis (...) in
02949     // the declaration) %al is used as hidden argument to specify the number
02950     // of SSE registers used. The contents of %al do not need to match exactly
02951     // the number of registers, but must be an ubound on the number of SSE
02952     // registers used and is in the range 0 - 8 inclusive.
02953 
02954     // Count the number of XMM registers allocated.
02955     static const MCPhysReg XMMArgRegs[] = {
02956       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02957       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02958     };
02959     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
02960     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02961            && "SSE registers cannot be used when SSE is disabled");
02962 
02963     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
02964                                         DAG.getConstant(NumXMMRegs, MVT::i8)));
02965   }
02966 
02967   if (Is64Bit && isVarArg && IsMustTail) {
02968     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
02969     for (const auto &F : Forwards) {
02970       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02971       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
02972     }
02973   }
02974 
02975   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
02976   // don't need this because the eligibility check rejects calls that require
02977   // shuffling arguments passed in memory.
02978   if (!IsSibcall && isTailCall) {
02979     // Force all the incoming stack arguments to be loaded from the stack
02980     // before any new outgoing arguments are stored to the stack, because the
02981     // outgoing stack slots may alias the incoming argument stack slots, and
02982     // the alias isn't otherwise explicit. This is slightly more conservative
02983     // than necessary, because it means that each store effectively depends
02984     // on every argument instead of just those arguments it would clobber.
02985     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
02986 
02987     SmallVector<SDValue, 8> MemOpChains2;
02988     SDValue FIN;
02989     int FI = 0;
02990     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02991       CCValAssign &VA = ArgLocs[i];
02992       if (VA.isRegLoc())
02993         continue;
02994       assert(VA.isMemLoc());
02995       SDValue Arg = OutVals[i];
02996       ISD::ArgFlagsTy Flags = Outs[i].Flags;
02997       // Skip inalloca arguments.  They don't require any work.
02998       if (Flags.isInAlloca())
02999         continue;
03000       // Create frame index.
03001       int32_t Offset = VA.getLocMemOffset()+FPDiff;
03002       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
03003       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03004       FIN = DAG.getFrameIndex(FI, getPointerTy());
03005 
03006       if (Flags.isByVal()) {
03007         // Copy relative to framepointer.
03008         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
03009         if (!StackPtr.getNode())
03010           StackPtr = DAG.getCopyFromReg(Chain, dl,
03011                                         RegInfo->getStackRegister(),
03012                                         getPointerTy());
03013         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
03014 
03015         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
03016                                                          ArgChain,
03017                                                          Flags, DAG, dl));
03018       } else {
03019         // Store relative to framepointer.
03020         MemOpChains2.push_back(
03021           DAG.getStore(ArgChain, dl, Arg, FIN,
03022                        MachinePointerInfo::getFixedStack(FI),
03023                        false, false, 0));
03024       }
03025     }
03026 
03027     if (!MemOpChains2.empty())
03028       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03029 
03030     // Store the return address to the appropriate stack slot.
03031     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
03032                                      getPointerTy(), RegInfo->getSlotSize(),
03033                                      FPDiff, dl);
03034   }
03035 
03036   // Build a sequence of copy-to-reg nodes chained together with token chain
03037   // and flag operands which copy the outgoing args into registers.
03038   SDValue InFlag;
03039   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
03040     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
03041                              RegsToPass[i].second, InFlag);
03042     InFlag = Chain.getValue(1);
03043   }
03044 
03045   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
03046     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
03047     // In the 64-bit large code model, we have to make all calls
03048     // through a register, since the call instruction's 32-bit
03049     // pc-relative offset may not be large enough to hold the whole
03050     // address.
03051   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
03052     // If the callee is a GlobalAddress node (quite common, every direct call
03053     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
03054     // it.
03055 
03056     // We should use extra load for direct calls to dllimported functions in
03057     // non-JIT mode.
03058     const GlobalValue *GV = G->getGlobal();
03059     if (!GV->hasDLLImportStorageClass()) {
03060       unsigned char OpFlags = 0;
03061       bool ExtraLoad = false;
03062       unsigned WrapperKind = ISD::DELETED_NODE;
03063 
03064       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
03065       // external symbols most go through the PLT in PIC mode.  If the symbol
03066       // has hidden or protected visibility, or if it is static or local, then
03067       // we don't need to use the PLT - we can directly call it.
03068       if (Subtarget->isTargetELF() &&
03069           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
03070           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
03071         OpFlags = X86II::MO_PLT;
03072       } else if (Subtarget->isPICStyleStubAny() &&
03073                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
03074                  (!Subtarget->getTargetTriple().isMacOSX() ||
03075                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03076         // PC-relative references to external symbols should go through $stub,
03077         // unless we're building with the leopard linker or later, which
03078         // automatically synthesizes these stubs.
03079         OpFlags = X86II::MO_DARWIN_STUB;
03080       } else if (Subtarget->isPICStyleRIPRel() &&
03081                  isa<Function>(GV) &&
03082                  cast<Function>(GV)->getAttributes().
03083                    hasAttribute(AttributeSet::FunctionIndex,
03084                                 Attribute::NonLazyBind)) {
03085         // If the function is marked as non-lazy, generate an indirect call
03086         // which loads from the GOT directly. This avoids runtime overhead
03087         // at the cost of eager binding (and one extra byte of encoding).
03088         OpFlags = X86II::MO_GOTPCREL;
03089         WrapperKind = X86ISD::WrapperRIP;
03090         ExtraLoad = true;
03091       }
03092 
03093       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
03094                                           G->getOffset(), OpFlags);
03095 
03096       // Add a wrapper if needed.
03097       if (WrapperKind != ISD::DELETED_NODE)
03098         Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
03099       // Add extra indirection if needed.
03100       if (ExtraLoad)
03101         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
03102                              MachinePointerInfo::getGOT(),
03103                              false, false, false, 0);
03104     }
03105   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03106     unsigned char OpFlags = 0;
03107 
03108     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
03109     // external symbols should go through the PLT.
03110     if (Subtarget->isTargetELF() &&
03111         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
03112       OpFlags = X86II::MO_PLT;
03113     } else if (Subtarget->isPICStyleStubAny() &&
03114                (!Subtarget->getTargetTriple().isMacOSX() ||
03115                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03116       // PC-relative references to external symbols should go through $stub,
03117       // unless we're building with the leopard linker or later, which
03118       // automatically synthesizes these stubs.
03119       OpFlags = X86II::MO_DARWIN_STUB;
03120     }
03121 
03122     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
03123                                          OpFlags);
03124   } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
03125     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
03126     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
03127   }
03128 
03129   // Returns a chain & a flag for retval copy to use.
03130   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
03131   SmallVector<SDValue, 8> Ops;
03132 
03133   if (!IsSibcall && isTailCall) {
03134     Chain = DAG.getCALLSEQ_END(Chain,
03135                                DAG.getIntPtrConstant(NumBytesToPop, true),
03136                                DAG.getIntPtrConstant(0, true), InFlag, dl);
03137     InFlag = Chain.getValue(1);
03138   }
03139 
03140   Ops.push_back(Chain);
03141   Ops.push_back(Callee);
03142 
03143   if (isTailCall)
03144     Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
03145 
03146   // Add argument registers to the end of the list so that they are known live
03147   // into the call.
03148   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03149     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03150                                   RegsToPass[i].second.getValueType()));
03151 
03152   // Add a register mask operand representing the call-preserved registers.
03153   const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
03154   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
03155   assert(Mask && "Missing call preserved mask for calling convention");
03156   Ops.push_back(DAG.getRegisterMask(Mask));
03157 
03158   if (InFlag.getNode())
03159     Ops.push_back(InFlag);
03160 
03161   if (isTailCall) {
03162     // We used to do:
03163     //// If this is the first return lowered for this function, add the regs
03164     //// to the liveout set for the function.
03165     // This isn't right, although it's probably harmless on x86; liveouts
03166     // should be computed from returns not tail calls.  Consider a void
03167     // function making a tail call to a function returning int.
03168     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03169   }
03170 
03171   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03172   InFlag = Chain.getValue(1);
03173 
03174   // Create the CALLSEQ_END node.
03175   unsigned NumBytesForCalleeToPop;
03176   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03177                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03178     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03179   else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
03180            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03181            SR == StackStructReturn)
03182     // If this is a call to a struct-return function, the callee
03183     // pops the hidden struct pointer, so we have to push it back.
03184     // This is common for Darwin/X86, Linux & Mingw32 targets.
03185     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03186     NumBytesForCalleeToPop = 4;
03187   else
03188     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03189 
03190   // Returns a flag for retval copy to use.
03191   if (!IsSibcall) {
03192     Chain = DAG.getCALLSEQ_END(Chain,
03193                                DAG.getIntPtrConstant(NumBytesToPop, true),
03194                                DAG.getIntPtrConstant(NumBytesForCalleeToPop,
03195                                                      true),
03196                                InFlag, dl);
03197     InFlag = Chain.getValue(1);
03198   }
03199 
03200   // Handle result values, copying them out of physregs into vregs that we
03201   // return.
03202   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03203                          Ins, dl, DAG, InVals);
03204 }
03205 
03206 //===----------------------------------------------------------------------===//
03207 //                Fast Calling Convention (tail call) implementation
03208 //===----------------------------------------------------------------------===//
03209 
03210 //  Like std call, callee cleans arguments, convention except that ECX is
03211 //  reserved for storing the tail called function address. Only 2 registers are
03212 //  free for argument passing (inreg). Tail call optimization is performed
03213 //  provided:
03214 //                * tailcallopt is enabled
03215 //                * caller/callee are fastcc
03216 //  On X86_64 architecture with GOT-style position independent code only local
03217 //  (within module) calls are supported at the moment.
03218 //  To keep the stack aligned according to platform abi the function
03219 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03220 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03221 //  If a tail called function callee has more arguments than the caller the
03222 //  caller needs to make sure that there is room to move the RETADDR to. This is
03223 //  achieved by reserving an area the size of the argument delta right after the
03224 //  original RETADDR, but before the saved framepointer or the spilled registers
03225 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03226 //  stack layout:
03227 //    arg1
03228 //    arg2
03229 //    RETADDR
03230 //    [ new RETADDR
03231 //      move area ]
03232 //    (possible EBP)
03233 //    ESI
03234 //    EDI
03235 //    local1 ..
03236 
03237 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
03238 /// for a 16 byte align requirement.
03239 unsigned
03240 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03241                                                SelectionDAG& DAG) const {
03242   MachineFunction &MF = DAG.getMachineFunction();
03243   const TargetMachine &TM = MF.getTarget();
03244   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03245       TM.getSubtargetImpl()->getRegisterInfo());
03246   const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
03247   unsigned StackAlignment = TFI.getStackAlignment();
03248   uint64_t AlignMask = StackAlignment - 1;
03249   int64_t Offset = StackSize;
03250   unsigned SlotSize = RegInfo->getSlotSize();
03251   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03252     // Number smaller than 12 so just add the difference.
03253     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03254   } else {
03255     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03256     Offset = ((~AlignMask) & Offset) + StackAlignment +
03257       (StackAlignment-SlotSize);
03258   }
03259   return Offset;
03260 }
03261 
03262 /// MatchingStackOffset - Return true if the given stack call argument is
03263 /// already available in the same position (relatively) of the caller's
03264 /// incoming argument stack.
03265 static
03266 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03267                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03268                          const X86InstrInfo *TII) {
03269   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03270   int FI = INT_MAX;
03271   if (Arg.getOpcode() == ISD::CopyFromReg) {
03272     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03273     if (!TargetRegisterInfo::isVirtualRegister(VR))
03274       return false;
03275     MachineInstr *Def = MRI->getVRegDef(VR);
03276     if (!Def)
03277       return false;
03278     if (!Flags.isByVal()) {
03279       if (!TII->isLoadFromStackSlot(Def, FI))
03280         return false;
03281     } else {
03282       unsigned Opcode = Def->getOpcode();
03283       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
03284           Def->getOperand(1).isFI()) {
03285         FI = Def->getOperand(1).getIndex();
03286         Bytes = Flags.getByValSize();
03287       } else
03288         return false;
03289     }
03290   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03291     if (Flags.isByVal())
03292       // ByVal argument is passed in as a pointer but it's now being
03293       // dereferenced. e.g.
03294       // define @foo(%struct.X* %A) {
03295       //   tail call @bar(%struct.X* byval %A)
03296       // }
03297       return false;
03298     SDValue Ptr = Ld->getBasePtr();
03299     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03300     if (!FINode)
03301       return false;
03302     FI = FINode->getIndex();
03303   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03304     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03305     FI = FINode->getIndex();
03306     Bytes = Flags.getByValSize();
03307   } else
03308     return false;
03309 
03310   assert(FI != INT_MAX);
03311   if (!MFI->isFixedObjectIndex(FI))
03312     return false;
03313   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03314 }
03315 
03316 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03317 /// for tail call optimization. Targets which want to do tail call
03318 /// optimization should implement this function.
03319 bool
03320 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03321                                                      CallingConv::ID CalleeCC,
03322                                                      bool isVarArg,
03323                                                      bool isCalleeStructRet,
03324                                                      bool isCallerStructRet,
03325                                                      Type *RetTy,
03326                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03327                                     const SmallVectorImpl<SDValue> &OutVals,
03328                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03329                                                      SelectionDAG &DAG) const {
03330   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
03331     return false;
03332 
03333   // If -tailcallopt is specified, make fastcc functions tail-callable.
03334   const MachineFunction &MF = DAG.getMachineFunction();
03335   const Function *CallerF = MF.getFunction();
03336 
03337   // If the function return type is x86_fp80 and the callee return type is not,
03338   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03339   // perform a tailcall optimization here.
03340   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03341     return false;
03342 
03343   CallingConv::ID CallerCC = CallerF->getCallingConv();
03344   bool CCMatch = CallerCC == CalleeCC;
03345   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03346   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03347 
03348   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03349     if (IsTailCallConvention(CalleeCC) && CCMatch)
03350       return true;
03351     return false;
03352   }
03353 
03354   // Look for obvious safe cases to perform tail call optimization that do not
03355   // require ABI changes. This is what gcc calls sibcall.
03356 
03357   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03358   // emit a special epilogue.
03359   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03360       DAG.getSubtarget().getRegisterInfo());
03361   if (RegInfo->needsStackRealignment(MF))
03362     return false;
03363 
03364   // Also avoid sibcall optimization if either caller or callee uses struct
03365   // return semantics.
03366   if (isCalleeStructRet || isCallerStructRet)
03367     return false;
03368 
03369   // An stdcall/thiscall caller is expected to clean up its arguments; the
03370   // callee isn't going to do that.
03371   // FIXME: this is more restrictive than needed. We could produce a tailcall
03372   // when the stack adjustment matches. For example, with a thiscall that takes
03373   // only one argument.
03374   if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
03375                    CallerCC == CallingConv::X86_ThisCall))
03376     return false;
03377 
03378   // Do not sibcall optimize vararg calls unless all arguments are passed via
03379   // registers.
03380   if (isVarArg && !Outs.empty()) {
03381 
03382     // Optimizing for varargs on Win64 is unlikely to be safe without
03383     // additional testing.
03384     if (IsCalleeWin64 || IsCallerWin64)
03385       return false;
03386 
03387     SmallVector<CCValAssign, 16> ArgLocs;
03388     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03389                    *DAG.getContext());
03390 
03391     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03392     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03393       if (!ArgLocs[i].isRegLoc())
03394         return false;
03395   }
03396 
03397   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03398   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03399   // this into a sibcall.
03400   bool Unused = false;
03401   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03402     if (!Ins[i].Used) {
03403       Unused = true;
03404       break;
03405     }
03406   }
03407   if (Unused) {
03408     SmallVector<CCValAssign, 16> RVLocs;
03409     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03410                    *DAG.getContext());
03411     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03412     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03413       CCValAssign &VA = RVLocs[i];
03414       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03415         return false;
03416     }
03417   }
03418 
03419   // If the calling conventions do not match, then we'd better make sure the
03420   // results are returned in the same way as what the caller expects.
03421   if (!CCMatch) {
03422     SmallVector<CCValAssign, 16> RVLocs1;
03423     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03424                     *DAG.getContext());
03425     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03426 
03427     SmallVector<CCValAssign, 16> RVLocs2;
03428     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03429                     *DAG.getContext());
03430     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03431 
03432     if (RVLocs1.size() != RVLocs2.size())
03433       return false;
03434     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03435       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03436         return false;
03437       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03438         return false;
03439       if (RVLocs1[i].isRegLoc()) {
03440         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03441           return false;
03442       } else {
03443         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03444           return false;
03445       }
03446     }
03447   }
03448 
03449   // If the callee takes no arguments then go on to check the results of the
03450   // call.
03451   if (!Outs.empty()) {
03452     // Check if stack adjustment is needed. For now, do not do this if any
03453     // argument is passed on the stack.
03454     SmallVector<CCValAssign, 16> ArgLocs;
03455     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03456                    *DAG.getContext());
03457 
03458     // Allocate shadow area for Win64
03459     if (IsCalleeWin64)
03460       CCInfo.AllocateStack(32, 8);
03461 
03462     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03463     if (CCInfo.getNextStackOffset()) {
03464       MachineFunction &MF = DAG.getMachineFunction();
03465       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
03466         return false;
03467 
03468       // Check if the arguments are already laid out in the right way as
03469       // the caller's fixed stack objects.
03470       MachineFrameInfo *MFI = MF.getFrameInfo();
03471       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03472       const X86InstrInfo *TII =
03473           static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
03474       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03475         CCValAssign &VA = ArgLocs[i];
03476         SDValue Arg = OutVals[i];
03477         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03478         if (VA.getLocInfo() == CCValAssign::Indirect)
03479           return false;
03480         if (!VA.isRegLoc()) {
03481           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03482                                    MFI, MRI, TII))
03483             return false;
03484         }
03485       }
03486     }
03487 
03488     // If the tailcall address may be in a register, then make sure it's
03489     // possible to register allocate for it. In 32-bit, the call address can
03490     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03491     // callee-saved registers are restored. These happen to be the same
03492     // registers used to pass 'inreg' arguments so watch out for those.
03493     if (!Subtarget->is64Bit() &&
03494         ((!isa<GlobalAddressSDNode>(Callee) &&
03495           !isa<ExternalSymbolSDNode>(Callee)) ||
03496          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03497       unsigned NumInRegs = 0;
03498       // In PIC we need an extra register to formulate the address computation
03499       // for the callee.
03500       unsigned MaxInRegs =
03501   (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03502 
03503       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03504         CCValAssign &VA = ArgLocs[i];
03505         if (!VA.isRegLoc())
03506           continue;
03507         unsigned Reg = VA.getLocReg();
03508         switch (Reg) {
03509         default: break;
03510         case X86::EAX: case X86::EDX: case X86::ECX:
03511           if (++NumInRegs == MaxInRegs)
03512             return false;
03513           break;
03514         }
03515       }
03516     }
03517   }
03518 
03519   return true;
03520 }
03521 
03522 FastISel *
03523 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03524                                   const TargetLibraryInfo *libInfo) const {
03525   return X86::createFastISel(funcInfo, libInfo);
03526 }
03527 
03528 //===----------------------------------------------------------------------===//
03529 //                           Other Lowering Hooks
03530 //===----------------------------------------------------------------------===//
03531 
03532 static bool MayFoldLoad(SDValue Op) {
03533   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03534 }
03535 
03536 static bool MayFoldIntoStore(SDValue Op) {
03537   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03538 }
03539 
03540 static bool isTargetShuffle(unsigned Opcode) {
03541   switch(Opcode) {
03542   default: return false;
03543   case X86ISD::BLENDI:
03544   case X86ISD::PSHUFB:
03545   case X86ISD::PSHUFD:
03546   case X86ISD::PSHUFHW:
03547   case X86ISD::PSHUFLW:
03548   case X86ISD::SHUFP:
03549   case X86ISD::PALIGNR:
03550   case X86ISD::MOVLHPS:
03551   case X86ISD::MOVLHPD:
03552   case X86ISD::MOVHLPS:
03553   case X86ISD::MOVLPS:
03554   case X86ISD::MOVLPD:
03555   case X86ISD::MOVSHDUP:
03556   case X86ISD::MOVSLDUP:
03557   case X86ISD::MOVDDUP:
03558   case X86ISD::MOVSS:
03559   case X86ISD::MOVSD:
03560   case X86ISD::UNPCKL:
03561   case X86ISD::UNPCKH:
03562   case X86ISD::VPERMILPI:
03563   case X86ISD::VPERM2X128:
03564   case X86ISD::VPERMI:
03565     return true;
03566   }
03567 }
03568 
03569 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03570                                     SDValue V1, SelectionDAG &DAG) {
03571   switch(Opc) {
03572   default: llvm_unreachable("Unknown x86 shuffle node");
03573   case X86ISD::MOVSHDUP:
03574   case X86ISD::MOVSLDUP:
03575   case X86ISD::MOVDDUP:
03576     return DAG.getNode(Opc, dl, VT, V1);
03577   }
03578 }
03579 
03580 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03581                                     SDValue V1, unsigned TargetMask,
03582                                     SelectionDAG &DAG) {
03583   switch(Opc) {
03584   default: llvm_unreachable("Unknown x86 shuffle node");
03585   case X86ISD::PSHUFD:
03586   case X86ISD::PSHUFHW:
03587   case X86ISD::PSHUFLW:
03588   case X86ISD::VPERMILPI:
03589   case X86ISD::VPERMI:
03590     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
03591   }
03592 }
03593 
03594 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03595                                     SDValue V1, SDValue V2, unsigned TargetMask,
03596                                     SelectionDAG &DAG) {
03597   switch(Opc) {
03598   default: llvm_unreachable("Unknown x86 shuffle node");
03599   case X86ISD::PALIGNR:
03600   case X86ISD::VALIGN:
03601   case X86ISD::SHUFP:
03602   case X86ISD::VPERM2X128:
03603     return DAG.getNode(Opc, dl, VT, V1, V2,
03604                        DAG.getConstant(TargetMask, MVT::i8));
03605   }
03606 }
03607 
03608 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03609                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03610   switch(Opc) {
03611   default: llvm_unreachable("Unknown x86 shuffle node");
03612   case X86ISD::MOVLHPS:
03613   case X86ISD::MOVLHPD:
03614   case X86ISD::MOVHLPS:
03615   case X86ISD::MOVLPS:
03616   case X86ISD::MOVLPD:
03617   case X86ISD::MOVSS:
03618   case X86ISD::MOVSD:
03619   case X86ISD::UNPCKL:
03620   case X86ISD::UNPCKH:
03621     return DAG.getNode(Opc, dl, VT, V1, V2);
03622   }
03623 }
03624 
03625 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03626   MachineFunction &MF = DAG.getMachineFunction();
03627   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03628       DAG.getSubtarget().getRegisterInfo());
03629   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03630   int ReturnAddrIndex = FuncInfo->getRAIndex();
03631 
03632   if (ReturnAddrIndex == 0) {
03633     // Set up a frame object for the return address.
03634     unsigned SlotSize = RegInfo->getSlotSize();
03635     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03636                                                            -(int64_t)SlotSize,
03637                                                            false);
03638     FuncInfo->setRAIndex(ReturnAddrIndex);
03639   }
03640 
03641   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
03642 }
03643 
03644 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03645                                        bool hasSymbolicDisplacement) {
03646   // Offset should fit into 32 bit immediate field.
03647   if (!isInt<32>(Offset))
03648     return false;
03649 
03650   // If we don't have a symbolic displacement - we don't have any extra
03651   // restrictions.
03652   if (!hasSymbolicDisplacement)
03653     return true;
03654 
03655   // FIXME: Some tweaks might be needed for medium code model.
03656   if (M != CodeModel::Small && M != CodeModel::Kernel)
03657     return false;
03658 
03659   // For small code model we assume that latest object is 16MB before end of 31
03660   // bits boundary. We may also accept pretty large negative constants knowing
03661   // that all objects are in the positive half of address space.
03662   if (M == CodeModel::Small && Offset < 16*1024*1024)
03663     return true;
03664 
03665   // For kernel code model we know that all object resist in the negative half
03666   // of 32bits address space. We may not accept negative offsets, since they may
03667   // be just off and we may accept pretty large positive ones.
03668   if (M == CodeModel::Kernel && Offset > 0)
03669     return true;
03670 
03671   return false;
03672 }
03673 
03674 /// isCalleePop - Determines whether the callee is required to pop its
03675 /// own arguments. Callee pop is necessary to support tail calls.
03676 bool X86::isCalleePop(CallingConv::ID CallingConv,
03677                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
03678   switch (CallingConv) {
03679   default:
03680     return false;
03681   case CallingConv::X86_StdCall:
03682   case CallingConv::X86_FastCall:
03683   case CallingConv::X86_ThisCall:
03684     return !is64Bit;
03685   case CallingConv::Fast:
03686   case CallingConv::GHC:
03687   case CallingConv::HiPE:
03688     if (IsVarArg)
03689       return false;
03690     return TailCallOpt;
03691   }
03692 }
03693 
03694 /// \brief Return true if the condition is an unsigned comparison operation.
03695 static bool isX86CCUnsigned(unsigned X86CC) {
03696   switch (X86CC) {
03697   default: llvm_unreachable("Invalid integer condition!");
03698   case X86::COND_E:     return true;
03699   case X86::COND_G:     return false;
03700   case X86::COND_GE:    return false;
03701   case X86::COND_L:     return false;
03702   case X86::COND_LE:    return false;
03703   case X86::COND_NE:    return true;
03704   case X86::COND_B:     return true;
03705   case X86::COND_A:     return true;
03706   case X86::COND_BE:    return true;
03707   case X86::COND_AE:    return true;
03708   }
03709   llvm_unreachable("covered switch fell through?!");
03710 }
03711 
03712 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
03713 /// specific condition code, returning the condition code and the LHS/RHS of the
03714 /// comparison to make.
03715 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
03716                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
03717   if (!isFP) {
03718     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
03719       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
03720         // X > -1   -> X == 0, jump !sign.
03721         RHS = DAG.getConstant(0, RHS.getValueType());
03722         return X86::COND_NS;
03723       }
03724       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
03725         // X < 0   -> X == 0, jump on sign.
03726         return X86::COND_S;
03727       }
03728       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
03729         // X < 1   -> X <= 0
03730         RHS = DAG.getConstant(0, RHS.getValueType());
03731         return X86::COND_LE;
03732       }
03733     }
03734 
03735     switch (SetCCOpcode) {
03736     default: llvm_unreachable("Invalid integer condition!");
03737     case ISD::SETEQ:  return X86::COND_E;
03738     case ISD::SETGT:  return X86::COND_G;
03739     case ISD::SETGE:  return X86::COND_GE;
03740     case ISD::SETLT:  return X86::COND_L;
03741     case ISD::SETLE:  return X86::COND_LE;
03742     case ISD::SETNE:  return X86::COND_NE;
03743     case ISD::SETULT: return X86::COND_B;
03744     case ISD::SETUGT: return X86::COND_A;
03745     case ISD::SETULE: return X86::COND_BE;
03746     case ISD::SETUGE: return X86::COND_AE;
03747     }
03748   }
03749 
03750   // First determine if it is required or is profitable to flip the operands.
03751 
03752   // If LHS is a foldable load, but RHS is not, flip the condition.
03753   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
03754       !ISD::isNON_EXTLoad(RHS.getNode())) {
03755     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
03756     std::swap(LHS, RHS);
03757   }
03758 
03759   switch (SetCCOpcode) {
03760   default: break;
03761   case ISD::SETOLT:
03762   case ISD::SETOLE:
03763   case ISD::SETUGT:
03764   case ISD::SETUGE:
03765     std::swap(LHS, RHS);
03766     break;
03767   }
03768 
03769   // On a floating point condition, the flags are set as follows:
03770   // ZF  PF  CF   op
03771   //  0 | 0 | 0 | X > Y
03772   //  0 | 0 | 1 | X < Y
03773   //  1 | 0 | 0 | X == Y
03774   //  1 | 1 | 1 | unordered
03775   switch (SetCCOpcode) {
03776   default: llvm_unreachable("Condcode should be pre-legalized away");
03777   case ISD::SETUEQ:
03778   case ISD::SETEQ:   return X86::COND_E;
03779   case ISD::SETOLT:              // flipped
03780   case ISD::SETOGT:
03781   case ISD::SETGT:   return X86::COND_A;
03782   case ISD::SETOLE:              // flipped
03783   case ISD::SETOGE:
03784   case ISD::SETGE:   return X86::COND_AE;
03785   case ISD::SETUGT:              // flipped
03786   case ISD::SETULT:
03787   case ISD::SETLT:   return X86::COND_B;
03788   case ISD::SETUGE:              // flipped
03789   case ISD::SETULE:
03790   case ISD::SETLE:   return X86::COND_BE;
03791   case ISD::SETONE:
03792   case ISD::SETNE:   return X86::COND_NE;
03793   case ISD::SETUO:   return X86::COND_P;
03794   case ISD::SETO:    return X86::COND_NP;
03795   case ISD::SETOEQ:
03796   case ISD::SETUNE:  return X86::COND_INVALID;
03797   }
03798 }
03799 
03800 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
03801 /// code. Current x86 isa includes the following FP cmov instructions:
03802 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
03803 static bool hasFPCMov(unsigned X86CC) {
03804   switch (X86CC) {
03805   default:
03806     return false;
03807   case X86::COND_B:
03808   case X86::COND_BE:
03809   case X86::COND_E:
03810   case X86::COND_P:
03811   case X86::COND_A:
03812   case X86::COND_AE:
03813   case X86::COND_NE:
03814   case X86::COND_NP:
03815     return true;
03816   }
03817 }
03818 
03819 /// isFPImmLegal - Returns true if the target can instruction select the
03820 /// specified FP immediate natively. If false, the legalizer will
03821 /// materialize the FP immediate as a load from a constant pool.
03822 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
03823   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
03824     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
03825       return true;
03826   }
03827   return false;
03828 }
03829 
03830 /// \brief Returns true if it is beneficial to convert a load of a constant
03831 /// to just the constant itself.
03832 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
03833                                                           Type *Ty) const {
03834   assert(Ty->isIntegerTy());
03835 
03836   unsigned BitSize = Ty->getPrimitiveSizeInBits();
03837   if (BitSize == 0 || BitSize > 64)
03838     return false;
03839   return true;
03840 }
03841 
03842 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
03843 /// the specified range (L, H].
03844 static bool isUndefOrInRange(int Val, int Low, int Hi) {
03845   return (Val < 0) || (Val >= Low && Val < Hi);
03846 }
03847 
03848 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
03849 /// specified value.
03850 static bool isUndefOrEqual(int Val, int CmpVal) {
03851   return (Val < 0 || Val == CmpVal);
03852 }
03853 
03854 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
03855 /// from position Pos and ending in Pos+Size, falls within the specified
03856 /// sequential range (L, L+Pos]. or is undef.
03857 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
03858                                        unsigned Pos, unsigned Size, int Low) {
03859   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
03860     if (!isUndefOrEqual(Mask[i], Low))
03861       return false;
03862   return true;
03863 }
03864 
03865 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
03866 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
03867 /// the second operand.
03868 static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
03869   if (VT == MVT::v4f32 || VT == MVT::v4i32 )
03870     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
03871   if (VT == MVT::v2f64 || VT == MVT::v2i64)
03872     return (Mask[0] < 2 && Mask[1] < 2);
03873   return false;
03874 }
03875 
03876 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
03877 /// is suitable for input to PSHUFHW.
03878 static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03879   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03880     return false;
03881 
03882   // Lower quadword copied in order or undef.
03883   if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
03884     return false;
03885 
03886   // Upper quadword shuffled.
03887   for (unsigned i = 4; i != 8; ++i)
03888     if (!isUndefOrInRange(Mask[i], 4, 8))
03889       return false;
03890 
03891   if (VT == MVT::v16i16) {
03892     // Lower quadword copied in order or undef.
03893     if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
03894       return false;
03895 
03896     // Upper quadword shuffled.
03897     for (unsigned i = 12; i != 16; ++i)
03898       if (!isUndefOrInRange(Mask[i], 12, 16))
03899         return false;
03900   }
03901 
03902   return true;
03903 }
03904 
03905 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
03906 /// is suitable for input to PSHUFLW.
03907 static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03908   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03909     return false;
03910 
03911   // Upper quadword copied in order.
03912   if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
03913     return false;
03914 
03915   // Lower quadword shuffled.
03916   for (unsigned i = 0; i != 4; ++i)
03917     if (!isUndefOrInRange(Mask[i], 0, 4))
03918       return false;
03919 
03920   if (VT == MVT::v16i16) {
03921     // Upper quadword copied in order.
03922     if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
03923       return false;
03924 
03925     // Lower quadword shuffled.
03926     for (unsigned i = 8; i != 12; ++i)
03927       if (!isUndefOrInRange(Mask[i], 8, 12))
03928         return false;
03929   }
03930 
03931   return true;
03932 }
03933 
03934 /// \brief Return true if the mask specifies a shuffle of elements that is
03935 /// suitable for input to intralane (palignr) or interlane (valign) vector
03936 /// right-shift.
03937 static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
03938   unsigned NumElts = VT.getVectorNumElements();
03939   unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
03940   unsigned NumLaneElts = NumElts/NumLanes;
03941 
03942   // Do not handle 64-bit element shuffles with palignr.
03943   if (NumLaneElts == 2)
03944     return false;
03945 
03946   for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
03947     unsigned i;
03948     for (i = 0; i != NumLaneElts; ++i) {
03949       if (Mask[i+l] >= 0)
03950         break;
03951     }
03952 
03953     // Lane is all undef, go to next lane
03954     if (i == NumLaneElts)
03955       continue;
03956 
03957     int Start = Mask[i+l];
03958 
03959     // Make sure its in this lane in one of the sources
03960     if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
03961         !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
03962       return false;
03963 
03964     // If not lane 0, then we must match lane 0
03965     if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
03966       return false;
03967 
03968     // Correct second source to be contiguous with first source
03969     if (Start >= (int)NumElts)
03970       Start -= NumElts - NumLaneElts;
03971 
03972     // Make sure we're shifting in the right direction.
03973     if (Start <= (int)(i+l))
03974       return false;
03975 
03976     Start -= i;
03977 
03978     // Check the rest of the elements to see if they are consecutive.
03979     for (++i; i != NumLaneElts; ++i) {
03980       int Idx = Mask[i+l];
03981 
03982       // Make sure its in this lane
03983       if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
03984           !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
03985         return false;
03986 
03987       // If not lane 0, then we must match lane 0
03988       if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
03989         return false;
03990 
03991       if (Idx >= (int)NumElts)
03992         Idx -= NumElts - NumLaneElts;
03993 
03994       if (!isUndefOrEqual(Idx, Start+i))
03995         return false;
03996 
03997     }
03998   }
03999 
04000   return true;
04001 }
04002 
04003 /// \brief Return true if the node specifies a shuffle of elements that is
04004 /// suitable for input to PALIGNR.
04005 static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
04006                           const X86Subtarget *Subtarget) {
04007   if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
04008       (VT.is256BitVector() && !Subtarget->hasInt256()) ||
04009       VT.is512BitVector())
04010     // FIXME: Add AVX512BW.
04011     return false;
04012 
04013   return isAlignrMask(Mask, VT, false);
04014 }
04015 
04016 /// \brief Return true if the node specifies a shuffle of elements that is
04017 /// suitable for input to VALIGN.
04018 static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
04019                           const X86Subtarget *Subtarget) {
04020   // FIXME: Add AVX512VL.
04021   if (!VT.is512BitVector() || !Subtarget->hasAVX512())
04022     return false;
04023   return isAlignrMask(Mask, VT, true);
04024 }
04025 
04026 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
04027 /// the two vector operands have swapped position.
04028 static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
04029                                      unsigned NumElems) {
04030   for (unsigned i = 0; i != NumElems; ++i) {
04031     int idx = Mask[i];
04032     if (idx < 0)
04033       continue;
04034     else if (idx < (int)NumElems)
04035       Mask[i] = idx + NumElems;
04036     else
04037       Mask[i] = idx - NumElems;
04038   }
04039 }
04040 
04041 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
04042 /// specifies a shuffle of elements that is suitable for input to 128/256-bit
04043 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
04044 /// reverse of what x86 shuffles want.
04045 static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
04046 
04047   unsigned NumElems = VT.getVectorNumElements();
04048   unsigned NumLanes = VT.getSizeInBits()/128;
04049   unsigned NumLaneElems = NumElems/NumLanes;
04050 
04051   if (NumLaneElems != 2 && NumLaneElems != 4)
04052     return false;
04053 
04054   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04055   bool symetricMaskRequired =
04056     (VT.getSizeInBits() >= 256) && (EltSize == 32);
04057 
04058   // VSHUFPSY divides the resulting vector into 4 chunks.
04059   // The sources are also splitted into 4 chunks, and each destination
04060   // chunk must come from a different source chunk.
04061   //
04062   //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
04063   //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
04064   //
04065   //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
04066   //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
04067   //
04068   // VSHUFPDY divides the resulting vector into 4 chunks.
04069   // The sources are also splitted into 4 chunks, and each destination
04070   // chunk must come from a different source chunk.
04071   //
04072   //  SRC1 =>      X3       X2       X1       X0
04073   //  SRC2 =>      Y3       Y2       Y1       Y0
04074   //
04075   //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
04076   //
04077   SmallVector<int, 4> MaskVal(NumLaneElems, -1);
04078   unsigned HalfLaneElems = NumLaneElems/2;
04079   for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
04080     for (unsigned i = 0; i != NumLaneElems; ++i) {
04081       int Idx = Mask[i+l];
04082       unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
04083       if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
04084         return false;
04085       // For VSHUFPSY, the mask of the second half must be the same as the
04086       // first but with the appropriate offsets. This works in the same way as
04087       // VPERMILPS works with masks.
04088       if (!symetricMaskRequired || Idx < 0)
04089         continue;
04090       if (MaskVal[i] < 0) {
04091         MaskVal[i] = Idx - l;
04092         continue;
04093       }
04094       if ((signed)(Idx - l) != MaskVal[i])
04095         return false;
04096     }
04097   }
04098 
04099   return true;
04100 }
04101 
04102 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
04103 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
04104 static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
04105   if (!VT.is128BitVector())
04106     return false;
04107 
04108   unsigned NumElems = VT.getVectorNumElements();
04109 
04110   if (NumElems != 4)
04111     return false;
04112 
04113   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
04114   return isUndefOrEqual(Mask[0], 6) &&
04115          isUndefOrEqual(Mask[1], 7) &&
04116          isUndefOrEqual(Mask[2], 2) &&
04117          isUndefOrEqual(Mask[3], 3);
04118 }
04119 
04120 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
04121 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
04122 /// <2, 3, 2, 3>
04123 static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
04124   if (!VT.is128BitVector())
04125     return false;
04126 
04127   unsigned NumElems = VT.getVectorNumElements();
04128 
04129   if (NumElems != 4)
04130     return false;
04131 
04132   return isUndefOrEqual(Mask[0], 2) &&
04133          isUndefOrEqual(Mask[1], 3) &&
04134          isUndefOrEqual(Mask[2], 2) &&
04135          isUndefOrEqual(Mask[3], 3);
04136 }
04137 
04138 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
04139 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
04140 static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
04141   if (!VT.is128BitVector())
04142     return false;
04143 
04144   unsigned NumElems = VT.getVectorNumElements();
04145 
04146   if (NumElems != 2 && NumElems != 4)
04147     return false;
04148 
04149   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04150     if (!isUndefOrEqual(Mask[i], i + NumElems))
04151       return false;
04152 
04153   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
04154     if (!isUndefOrEqual(Mask[i], i))
04155       return false;
04156 
04157   return true;
04158 }
04159 
04160 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
04161 /// specifies a shuffle of elements that is suitable for input to MOVLHPS.
04162 static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
04163   if (!VT.is128BitVector())
04164     return false;
04165 
04166   unsigned NumElems = VT.getVectorNumElements();
04167 
04168   if (NumElems != 2 && NumElems != 4)
04169     return false;
04170 
04171   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04172     if (!isUndefOrEqual(Mask[i], i))
04173       return false;
04174 
04175   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04176     if (!isUndefOrEqual(Mask[i + e], i + NumElems))
04177       return false;
04178 
04179   return true;
04180 }
04181 
04182 /// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand
04183 /// specifies a shuffle of elements that is suitable for input to INSERTPS.
04184 /// i. e: If all but one element come from the same vector.
04185 static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
04186   // TODO: Deal with AVX's VINSERTPS
04187   if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
04188     return false;
04189 
04190   unsigned CorrectPosV1 = 0;
04191   unsigned CorrectPosV2 = 0;
04192   for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
04193     if (Mask[i] == -1) {
04194       ++CorrectPosV1;
04195       ++CorrectPosV2;
04196       continue;
04197     }
04198 
04199     if (Mask[i] == i)
04200       ++CorrectPosV1;
04201     else if (Mask[i] == i + 4)
04202       ++CorrectPosV2;
04203   }
04204 
04205   if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
04206     // We have 3 elements (undefs count as elements from any vector) from one
04207     // vector, and one from another.
04208     return true;
04209 
04210   return false;
04211 }
04212 
04213 //
04214 // Some special combinations that can be optimized.
04215 //
04216 static
04217 SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
04218                                SelectionDAG &DAG) {
04219   MVT VT = SVOp->getSimpleValueType(0);
04220   SDLoc dl(SVOp);
04221 
04222   if (VT != MVT::v8i32 && VT != MVT::v8f32)
04223     return SDValue();
04224 
04225   ArrayRef<int> Mask = SVOp->getMask();
04226 
04227   // These are the special masks that may be optimized.
04228   static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
04229   static const int MaskToOptimizeOdd[]  = {1, 9, 3, 11, 5, 13, 7, 15};
04230   bool MatchEvenMask = true;
04231   bool MatchOddMask  = true;
04232   for (int i=0; i<8; ++i) {
04233     if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
04234       MatchEvenMask = false;
04235     if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
04236       MatchOddMask = false;
04237   }
04238 
04239   if (!MatchEvenMask && !MatchOddMask)
04240     return SDValue();
04241 
04242   SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
04243 
04244   SDValue Op0 = SVOp->getOperand(0);
04245   SDValue Op1 = SVOp->getOperand(1);
04246 
04247   if (MatchEvenMask) {
04248     // Shift the second operand right to 32 bits.
04249     static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
04250     Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
04251   } else {
04252     // Shift the first operand left to 32 bits.
04253     static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
04254     Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
04255   }
04256   static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
04257   return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
04258 }
04259 
04260 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
04261 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
04262 static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
04263                          bool HasInt256, bool V2IsSplat = false) {
04264 
04265   assert(VT.getSizeInBits() >= 128 &&
04266          "Unsupported vector type for unpckl");
04267 
04268   unsigned NumElts = VT.getVectorNumElements();
04269   if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
04270       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04271     return false;
04272 
04273   assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
04274          "Unsupported vector type for unpckh");
04275 
04276   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04277   unsigned NumLanes = VT.getSizeInBits()/128;
04278   unsigned NumLaneElts = NumElts/NumLanes;
04279 
04280   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04281     for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04282       int BitI  = Mask[l+i];
04283       int BitI1 = Mask[l+i+1];
04284       if (!isUndefOrEqual(BitI, j))
04285         return false;
04286       if (V2IsSplat) {
04287         if (!isUndefOrEqual(BitI1, NumElts))
04288           return false;
04289       } else {
04290         if (!isUndefOrEqual(BitI1, j + NumElts))
04291           return false;
04292       }
04293     }
04294   }
04295 
04296   return true;
04297 }
04298 
04299 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
04300 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
04301 static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
04302                          bool HasInt256, bool V2IsSplat = false) {
04303   assert(VT.getSizeInBits() >= 128 &&
04304          "Unsupported vector type for unpckh");
04305 
04306   unsigned NumElts = VT.getVectorNumElements();
04307   if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
04308       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04309     return false;
04310 
04311   assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
04312          "Unsupported vector type for unpckh");
04313 
04314   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04315   unsigned NumLanes = VT.getSizeInBits()/128;
04316   unsigned NumLaneElts = NumElts/NumLanes;
04317 
04318   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04319     for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04320       int BitI  = Mask[l+i];
04321       int BitI1 = Mask[l+i+1];
04322       if (!isUndefOrEqual(BitI, j))
04323         return false;
04324       if (V2IsSplat) {
04325         if (isUndefOrEqual(BitI1, NumElts))
04326           return false;
04327       } else {
04328         if (!isUndefOrEqual(BitI1, j+NumElts))
04329           return false;
04330       }
04331     }
04332   }
04333   return true;
04334 }
04335 
04336 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
04337 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
04338 /// <0, 0, 1, 1>
04339 static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04340   unsigned NumElts = VT.getVectorNumElements();
04341   bool Is256BitVec = VT.is256BitVector();
04342 
04343   if (VT.is512BitVector())
04344     return false;
04345   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04346          "Unsupported vector type for unpckh");
04347 
04348   if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
04349       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04350     return false;
04351 
04352   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
04353   // FIXME: Need a better way to get rid of this, there's no latency difference
04354   // between UNPCKLPD and MOVDDUP, the later should always be checked first and
04355   // the former later. We should also remove the "_undef" special mask.
04356   if (NumElts == 4 && Is256BitVec)
04357     return false;
04358 
04359   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04360   // independently on 128-bit lanes.
04361   unsigned NumLanes = VT.getSizeInBits()/128;
04362   unsigned NumLaneElts = NumElts/NumLanes;
04363 
04364   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04365     for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04366       int BitI  = Mask[l+i];
04367       int BitI1 = Mask[l+i+1];
04368 
04369       if (!isUndefOrEqual(BitI, j))
04370         return false;
04371       if (!isUndefOrEqual(BitI1, j))
04372         return false;
04373     }
04374   }
04375 
04376   return true;
04377 }
04378 
04379 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
04380 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
04381 /// <2, 2, 3, 3>
04382 static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04383   unsigned NumElts = VT.getVectorNumElements();
04384 
04385   if (VT.is512BitVector())
04386     return false;
04387 
04388   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04389          "Unsupported vector type for unpckh");
04390 
04391   if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
04392       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04393     return false;
04394 
04395   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04396   // independently on 128-bit lanes.
04397   unsigned NumLanes = VT.getSizeInBits()/128;
04398   unsigned NumLaneElts = NumElts/NumLanes;
04399 
04400   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04401     for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04402       int BitI  = Mask[l+i];
04403       int BitI1 = Mask[l+i+1];
04404       if (!isUndefOrEqual(BitI, j))
04405         return false;
04406       if (!isUndefOrEqual(BitI1, j))
04407         return false;
04408     }
04409   }
04410   return true;
04411 }
04412 
04413 // Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
04414 // (src1[0], src0[1]), manipulation with 256-bit sub-vectors
04415 static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
04416   if (!VT.is512BitVector())
04417     return false;
04418 
04419   unsigned NumElts = VT.getVectorNumElements();
04420   unsigned HalfSize = NumElts/2;
04421   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
04422     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
04423       *Imm = 1;
04424       return true;
04425     }
04426   }
04427   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
04428     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
04429       *Imm = 0;
04430       return true;
04431     }
04432   }
04433   return false;
04434 }
04435 
04436 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
04437 /// specifies a shuffle of elements that is suitable for input to MOVSS,
04438 /// MOVSD, and MOVD, i.e. setting the lowest element.
04439 static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
04440   if (VT.getVectorElementType().getSizeInBits() < 32)
04441     return false;
04442   if (!VT.is128BitVector())
04443     return false;
04444 
04445   unsigned NumElts = VT.getVectorNumElements();
04446 
04447   if (!isUndefOrEqual(Mask[0], NumElts))
04448     return false;
04449 
04450   for (unsigned i = 1; i != NumElts; ++i)
04451     if (!isUndefOrEqual(Mask[i], i))
04452       return false;
04453 
04454   return true;
04455 }
04456 
04457 /// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
04458 /// as permutations between 128-bit chunks or halves. As an example: this
04459 /// shuffle bellow:
04460 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
04461 /// The first half comes from the second half of V1 and the second half from the
04462 /// the second half of V2.
04463 static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04464   if (!HasFp256 || !VT.is256BitVector())
04465     return false;
04466 
04467   // The shuffle result is divided into half A and half B. In total the two
04468   // sources have 4 halves, namely: C, D, E, F. The final values of A and
04469   // B must come from C, D, E or F.
04470   unsigned HalfSize = VT.getVectorNumElements()/2;
04471   bool MatchA = false, MatchB = false;
04472 
04473   // Check if A comes from one of C, D, E, F.
04474   for (unsigned Half = 0; Half != 4; ++Half) {
04475     if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
04476       MatchA = true;
04477       break;
04478     }
04479   }
04480 
04481   // Check if B comes from one of C, D, E, F.
04482   for (unsigned Half = 0; Half != 4; ++Half) {
04483     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
04484       MatchB = true;
04485       break;
04486     }
04487   }
04488 
04489   return MatchA && MatchB;
04490 }
04491 
04492 /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
04493 /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
04494 static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
04495   MVT VT = SVOp->getSimpleValueType(0);
04496 
04497   unsigned HalfSize = VT.getVectorNumElements()/2;
04498 
04499   unsigned FstHalf = 0, SndHalf = 0;
04500   for (unsigned i = 0; i < HalfSize; ++i) {
04501     if (SVOp->getMaskElt(i) > 0) {
04502       FstHalf = SVOp->getMaskElt(i)/HalfSize;
04503       break;
04504     }
04505   }
04506   for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
04507     if (SVOp->getMaskElt(i) > 0) {
04508       SndHalf = SVOp->getMaskElt(i)/HalfSize;
04509       break;
04510     }
04511   }
04512 
04513   return (FstHalf | (SndHalf << 4));
04514 }
04515 
04516 // Symetric in-lane mask. Each lane has 4 elements (for imm8)
04517 static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
04518   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04519   if (EltSize < 32)
04520     return false;
04521 
04522   unsigned NumElts = VT.getVectorNumElements();
04523   Imm8 = 0;
04524   if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
04525     for (unsigned i = 0; i != NumElts; ++i) {
04526       if (Mask[i] < 0)
04527         continue;
04528       Imm8 |= Mask[i] << (i*2);
04529     }
04530     return true;
04531   }
04532 
04533   unsigned LaneSize = 4;
04534   SmallVector<int, 4> MaskVal(LaneSize, -1);
04535 
04536   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04537     for (unsigned i = 0; i != LaneSize; ++i) {
04538       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04539         return false;
04540       if (Mask[i+l] < 0)
04541         continue;
04542       if (MaskVal[i] < 0) {
04543         MaskVal[i] = Mask[i+l] - l;
04544         Imm8 |= MaskVal[i] << (i*2);
04545         continue;
04546       }
04547       if (Mask[i+l] != (signed)(MaskVal[i]+l))
04548         return false;
04549     }
04550   }
04551   return true;
04552 }
04553 
04554 /// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
04555 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
04556 /// Note that VPERMIL mask matching is different depending whether theunderlying
04557 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
04558 /// to the same elements of the low, but to the higher half of the source.
04559 /// In VPERMILPD the two lanes could be shuffled independently of each other
04560 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
04561 static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
04562   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04563   if (VT.getSizeInBits() < 256 || EltSize < 32)
04564     return false;
04565   bool symetricMaskRequired = (EltSize == 32);
04566   unsigned NumElts = VT.getVectorNumElements();
04567 
04568   unsigned NumLanes = VT.getSizeInBits()/128;
04569   unsigned LaneSize = NumElts/NumLanes;
04570   // 2 or 4 elements in one lane
04571 
04572   SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
04573   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04574     for (unsigned i = 0; i != LaneSize; ++i) {
04575       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04576         return false;
04577       if (symetricMaskRequired) {
04578         if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
04579           ExpectedMaskVal[i] = Mask[i+l] - l;
04580           continue;
04581         }
04582         if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
04583           return false;
04584       }
04585     }
04586   }
04587   return true;
04588 }
04589 
04590 /// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
04591 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
04592 /// element of vector 2 and the other elements to come from vector 1 in order.
04593 static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
04594                                bool V2IsSplat = false, bool V2IsUndef = false) {
04595   if (!VT.is128BitVector())
04596     return false;
04597 
04598   unsigned NumOps = VT.getVectorNumElements();
04599   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
04600     return false;
04601 
04602   if (!isUndefOrEqual(Mask[0], 0))
04603     return false;
04604 
04605   for (unsigned i = 1; i != NumOps; ++i)
04606     if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
04607           (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
04608           (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
04609       return false;
04610 
04611   return true;
04612 }
04613 
04614 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04615 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
04616 /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
04617 static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
04618                            const X86Subtarget *Subtarget) {
04619   if (!Subtarget->hasSSE3())
04620     return false;
04621 
04622   unsigned NumElems = VT.getVectorNumElements();
04623 
04624   if ((VT.is128BitVector() && NumElems != 4) ||
04625       (VT.is256BitVector() && NumElems != 8) ||
04626       (VT.is512BitVector() && NumElems != 16))
04627     return false;
04628 
04629   // "i+1" is the value the indexed mask element must have
04630   for (unsigned i = 0; i != NumElems; i += 2)
04631     if (!isUndefOrEqual(Mask[i], i+1) ||
04632         !isUndefOrEqual(Mask[i+1], i+1))
04633       return false;
04634 
04635   return true;
04636 }
04637 
04638 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04639 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
04640 /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
04641 static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
04642                            const X86Subtarget *Subtarget) {
04643   if (!Subtarget->hasSSE3())
04644     return false;
04645 
04646   unsigned NumElems = VT.getVectorNumElements();
04647 
04648   if ((VT.is128BitVector() && NumElems != 4) ||
04649       (VT.is256BitVector() && NumElems != 8) ||
04650       (VT.is512BitVector() && NumElems != 16))
04651     return false;
04652 
04653   // "i" is the value the indexed mask element must have
04654   for (unsigned i = 0; i != NumElems; i += 2)
04655     if (!isUndefOrEqual(Mask[i], i) ||
04656         !isUndefOrEqual(Mask[i+1], i))
04657       return false;
04658 
04659   return true;
04660 }
04661 
04662 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
04663 /// specifies a shuffle of elements that is suitable for input to 256-bit
04664 /// version of MOVDDUP.
04665 static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04666   if (!HasFp256 || !VT.is256BitVector())
04667     return false;
04668 
04669   unsigned NumElts = VT.getVectorNumElements();
04670   if (NumElts != 4)
04671     return false;
04672 
04673   for (unsigned i = 0; i != NumElts/2; ++i)
04674     if (!isUndefOrEqual(Mask[i], 0))
04675       return false;
04676   for (unsigned i = NumElts/2; i != NumElts; ++i)
04677     if (!isUndefOrEqual(Mask[i], NumElts/2))
04678       return false;
04679   return true;
04680 }
04681 
04682 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04683 /// specifies a shuffle of elements that is suitable for input to 128-bit
04684 /// version of MOVDDUP.
04685 static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
04686   if (!VT.is128BitVector())
04687     return false;
04688 
04689   unsigned e = VT.getVectorNumElements() / 2;
04690   for (unsigned i = 0; i != e; ++i)
04691     if (!isUndefOrEqual(Mask[i], i))
04692       return false;
04693   for (unsigned i = 0; i != e; ++i)
04694     if (!isUndefOrEqual(Mask[e+i], i))
04695       return false;
04696   return true;
04697 }
04698 
04699 /// isVEXTRACTIndex - Return true if the specified
04700 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
04701 /// suitable for instruction that extract 128 or 256 bit vectors
04702 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
04703   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04704   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04705     return false;
04706 
04707   // The index should be aligned on a vecWidth-bit boundary.
04708   uint64_t Index =
04709     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04710 
04711   MVT VT = N->getSimpleValueType(0);
04712   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04713   bool Result = (Index * ElSize) % vecWidth == 0;
04714 
04715   return Result;
04716 }
04717 
04718 /// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
04719 /// operand specifies a subvector insert that is suitable for input to
04720 /// insertion of 128 or 256-bit subvectors
04721 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
04722   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04723   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04724     return false;
04725   // The index should be aligned on a vecWidth-bit boundary.
04726   uint64_t Index =
04727     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04728 
04729   MVT VT = N->getSimpleValueType(0);
04730   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04731   bool Result = (Index * ElSize) % vecWidth == 0;
04732 
04733   return Result;
04734 }
04735 
04736 bool X86::isVINSERT128Index(SDNode *N) {
04737   return isVINSERTIndex(N, 128);
04738 }
04739 
04740 bool X86::isVINSERT256Index(SDNode *N) {
04741   return isVINSERTIndex(N, 256);
04742 }
04743 
04744 bool X86::isVEXTRACT128Index(SDNode *N) {
04745   return isVEXTRACTIndex(N, 128);
04746 }
04747 
04748 bool X86::isVEXTRACT256Index(SDNode *N) {
04749   return isVEXTRACTIndex(N, 256);
04750 }
04751 
04752 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
04753 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
04754 /// Handles 128-bit and 256-bit.
04755 static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
04756   MVT VT = N->getSimpleValueType(0);
04757 
04758   assert((VT.getSizeInBits() >= 128) &&
04759          "Unsupported vector type for PSHUF/SHUFP");
04760 
04761   // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
04762   // independently on 128-bit lanes.
04763   unsigned NumElts = VT.getVectorNumElements();
04764   unsigned NumLanes = VT.getSizeInBits()/128;
04765   unsigned NumLaneElts = NumElts/NumLanes;
04766 
04767   assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
04768          "Only supports 2, 4 or 8 elements per lane");
04769 
04770   unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
04771   unsigned Mask = 0;
04772   for (unsigned i = 0; i != NumElts; ++i) {
04773     int Elt = N->getMaskElt(i);
04774     if (Elt < 0) continue;
04775     Elt &= NumLaneElts - 1;
04776     unsigned ShAmt = (i << Shift) % 8;
04777     Mask |= Elt << ShAmt;
04778   }
04779 
04780   return Mask;
04781 }
04782 
04783 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
04784 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
04785 static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
04786   MVT VT = N->getSimpleValueType(0);
04787 
04788   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04789          "Unsupported vector type for PSHUFHW");
04790 
04791   unsigned NumElts = VT.getVectorNumElements();
04792 
04793   unsigned Mask = 0;
04794   for (unsigned l = 0; l != NumElts; l += 8) {
04795     // 8 nodes per lane, but we only care about the last 4.
04796     for (unsigned i = 0; i < 4; ++i) {
04797       int Elt = N->getMaskElt(l+i+4);
04798       if (Elt < 0) continue;
04799       Elt &= 0x3; // only 2-bits.
04800       Mask |= Elt << (i * 2);
04801     }
04802   }
04803 
04804   return Mask;
04805 }
04806 
04807 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
04808 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
04809 static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
04810   MVT VT = N->getSimpleValueType(0);
04811 
04812   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04813          "Unsupported vector type for PSHUFHW");
04814 
04815   unsigned NumElts = VT.getVectorNumElements();
04816 
04817   unsigned Mask = 0;
04818   for (unsigned l = 0; l != NumElts; l += 8) {
04819     // 8 nodes per lane, but we only care about the first 4.
04820     for (unsigned i = 0; i < 4; ++i) {
04821       int Elt = N->getMaskElt(l+i);
04822       if (Elt < 0) continue;
04823       Elt &= 0x3; // only 2-bits
04824       Mask |= Elt << (i * 2);
04825     }
04826   }
04827 
04828   return Mask;
04829 }
04830 
04831 /// \brief Return the appropriate immediate to shuffle the specified
04832 /// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
04833 /// VALIGN (if Interlane is true) instructions.
04834 static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
04835                                            bool InterLane) {
04836   MVT VT = SVOp->getSimpleValueType(0);
04837   unsigned EltSize = InterLane ? 1 :
04838     VT.getVectorElementType().getSizeInBits() >> 3;
04839 
04840   unsigned NumElts = VT.getVectorNumElements();
04841   unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
04842   unsigned NumLaneElts = NumElts/NumLanes;
04843 
04844   int Val = 0;
04845   unsigned i;
04846   for (i = 0; i != NumElts; ++i) {
04847     Val = SVOp->getMaskElt(i);
04848     if (Val >= 0)
04849       break;
04850   }
04851   if (Val >= (int)NumElts)
04852     Val -= NumElts - NumLaneElts;
04853 
04854   assert(Val - i > 0 && "PALIGNR imm should be positive");
04855   return (Val - i) * EltSize;
04856 }
04857 
04858 /// \brief Return the appropriate immediate to shuffle the specified
04859 /// VECTOR_SHUFFLE mask with the PALIGNR instruction.
04860 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
04861   return getShuffleAlignrImmediate(SVOp, false);
04862 }
04863 
04864 /// \brief Return the appropriate immediate to shuffle the specified
04865 /// VECTOR_SHUFFLE mask with the VALIGN instruction.
04866 static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
04867   return getShuffleAlignrImmediate(SVOp, true);
04868 }
04869 
04870 
04871 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
04872   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04873   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04874     llvm_unreachable("Illegal extract subvector for VEXTRACT");
04875 
04876   uint64_t Index =
04877     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04878 
04879   MVT VecVT = N->getOperand(0).getSimpleValueType();
04880   MVT ElVT = VecVT.getVectorElementType();
04881 
04882   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04883   return Index / NumElemsPerChunk;
04884 }
04885 
04886 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
04887   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04888   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04889     llvm_unreachable("Illegal insert subvector for VINSERT");
04890 
04891   uint64_t Index =
04892     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04893 
04894   MVT VecVT = N->getSimpleValueType(0);
04895   MVT ElVT = VecVT.getVectorElementType();
04896 
04897   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04898   return Index / NumElemsPerChunk;
04899 }
04900 
04901 /// getExtractVEXTRACT128Immediate - Return the appropriate immediate
04902 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
04903 /// and VINSERTI128 instructions.
04904 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
04905   return getExtractVEXTRACTImmediate(N, 128);
04906 }
04907 
04908 /// getExtractVEXTRACT256Immediate - Return the appropriate immediate
04909 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
04910 /// and VINSERTI64x4 instructions.
04911 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
04912   return getExtractVEXTRACTImmediate(N, 256);
04913 }
04914 
04915 /// getInsertVINSERT128Immediate - Return the appropriate immediate
04916 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
04917 /// and VINSERTI128 instructions.
04918 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
04919   return getInsertVINSERTImmediate(N, 128);
04920 }
04921 
04922 /// getInsertVINSERT256Immediate - Return the appropriate immediate
04923 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
04924 /// and VINSERTI64x4 instructions.
04925 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
04926   return getInsertVINSERTImmediate(N, 256);
04927 }
04928 
04929 /// isZero - Returns true if Elt is a constant integer zero
04930 static bool isZero(SDValue V) {
04931   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
04932   return C && C->isNullValue();
04933 }
04934 
04935 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
04936 /// constant +0.0.
04937 bool X86::isZeroNode(SDValue Elt) {
04938   if (isZero(Elt))
04939     return true;
04940   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
04941     return CFP->getValueAPF().isPosZero();
04942   return false;
04943 }
04944 
04945 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
04946 /// match movhlps. The lower half elements should come from upper half of
04947 /// V1 (and in order), and the upper half elements should come from the upper
04948 /// half of V2 (and in order).
04949 static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
04950   if (!VT.is128BitVector())
04951     return false;
04952   if (VT.getVectorNumElements() != 4)
04953     return false;
04954   for (unsigned i = 0, e = 2; i != e; ++i)
04955     if (!isUndefOrEqual(Mask[i], i+2))
04956       return false;
04957   for (unsigned i = 2; i != 4; ++i)
04958     if (!isUndefOrEqual(Mask[i], i+4))
04959       return false;
04960   return true;
04961 }
04962 
04963 /// isScalarLoadToVector - Returns true if the node is a scalar load that
04964 /// is promoted to a vector. It also returns the LoadSDNode by reference if
04965 /// required.
04966 static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
04967   if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
04968     return false;
04969   N = N->getOperand(0).getNode();
04970   if (!ISD::isNON_EXTLoad(N))
04971     return false;
04972   if (LD)
04973     *LD = cast<LoadSDNode>(N);
04974   return true;
04975 }
04976 
04977 // Test whether the given value is a vector value which will be legalized
04978 // into a load.
04979 static bool WillBeConstantPoolLoad(SDNode *N) {
04980   if (N->getOpcode() != ISD::BUILD_VECTOR)
04981     return false;
04982 
04983   // Check for any non-constant elements.
04984   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
04985     switch (N->getOperand(i).getNode()->getOpcode()) {
04986     case ISD::UNDEF:
04987     case ISD::ConstantFP:
04988     case ISD::Constant:
04989       break;
04990     default:
04991       return false;
04992     }
04993 
04994   // Vectors of all-zeros and all-ones are materialized with special
04995   // instructions rather than being loaded.
04996   return !ISD::isBuildVectorAllZeros(N) &&
04997          !ISD::isBuildVectorAllOnes(N);
04998 }
04999 
05000 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
05001 /// match movlp{s|d}. The lower half elements should come from lower half of
05002 /// V1 (and in order), and the upper half elements should come from the upper
05003 /// half of V2 (and in order). And since V1 will become the source of the
05004 /// MOVLP, it must be either a vector load or a scalar load to vector.
05005 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
05006                                ArrayRef<int> Mask, MVT VT) {
05007   if (!VT.is128BitVector())
05008     return false;
05009 
05010   if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
05011     return false;
05012   // Is V2 is a vector load, don't do this transformation. We will try to use
05013   // load folding shufps op.
05014   if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
05015     return false;
05016 
05017   unsigned NumElems = VT.getVectorNumElements();
05018 
05019   if (NumElems != 2 && NumElems != 4)
05020     return false;
05021   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
05022     if (!isUndefOrEqual(Mask[i], i))
05023       return false;
05024   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
05025     if (!isUndefOrEqual(Mask[i], i+NumElems))
05026       return false;
05027   return true;
05028 }
05029 
05030 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
05031 /// to an zero vector.
05032 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
05033 static bool isZeroShuffle(ShuffleVectorSDNode *N) {
05034   SDValue V1 = N->getOperand(0);
05035   SDValue V2 = N->getOperand(1);
05036   unsigned NumElems = N->getValueType(0).getVectorNumElements();
05037   for (unsigned i = 0; i != NumElems; ++i) {
05038     int Idx = N->getMaskElt(i);
05039     if (Idx >= (int)NumElems) {
05040       unsigned Opc = V2.getOpcode();
05041       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
05042         continue;
05043       if (Opc != ISD::BUILD_VECTOR ||
05044           !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
05045         return false;
05046     } else if (Idx >= 0) {
05047       unsigned Opc = V1.getOpcode();
05048       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
05049         continue;
05050       if (Opc != ISD::BUILD_VECTOR ||
05051           !X86::isZeroNode(V1.getOperand(Idx)))
05052         return false;
05053     }
05054   }
05055   return true;
05056 }
05057 
05058 /// getZeroVector - Returns a vector of specified type with all zero elements.
05059 ///
05060 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
05061                              SelectionDAG &DAG, SDLoc dl) {
05062   assert(VT.isVector() && "Expected a vector type");
05063 
05064   // Always build SSE zero vectors as <4 x i32> bitcasted
05065   // to their dest type. This ensures they get CSE'd.
05066   SDValue Vec;
05067   if (VT.is128BitVector()) {  // SSE
05068     if (Subtarget->hasSSE2()) {  // SSE2
05069       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05070       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05071     } else { // SSE1
05072       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
05073       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
05074     }
05075   } else if (VT.is256BitVector()) { // AVX
05076     if (Subtarget->hasInt256()) { // AVX2
05077       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05078       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05079       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
05080     } else {
05081       // 256-bit logic and arithmetic instructions in AVX are all
05082       // floating-point, no support for integer ops. Emit fp zeroed vectors.
05083       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
05084       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05085       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
05086     }
05087   } else if (VT.is512BitVector()) { // AVX-512
05088       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05089       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
05090                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05091       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
05092   } else if (VT.getScalarType() == MVT::i1) {
05093     assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
05094     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
05095     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05096     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05097   } else
05098     llvm_unreachable("Unexpected vector type");
05099 
05100   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
05101 }
05102 
05103 /// getOnesVector - Returns a vector of specified type with all bits set.
05104 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
05105 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
05106 /// Then bitcast to their original type, ensuring they get CSE'd.
05107 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
05108                              SDLoc dl) {
05109   assert(VT.isVector() && "Expected a vector type");
05110 
05111   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
05112   SDValue Vec;
05113   if (VT.is256BitVector()) {
05114     if (HasInt256) { // AVX2
05115       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05116       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
05117     } else { // AVX
05118       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05119       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
05120     }
05121   } else if (VT.is128BitVector()) {
05122     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05123   } else
05124     llvm_unreachable("Unexpected vector type");
05125 
05126   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
05127 }
05128 
05129 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
05130 /// that point to V2 points to its first element.
05131 static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
05132   for (unsigned i = 0; i != NumElems; ++i) {
05133     if (Mask[i] > (int)NumElems) {
05134       Mask[i] = NumElems;
05135     }
05136   }
05137 }
05138 
05139 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
05140 /// operation of specified width.
05141 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
05142                        SDValue V2) {
05143   unsigned NumElems = VT.getVectorNumElements();
05144   SmallVector<int, 8> Mask;
05145   Mask.push_back(NumElems);
05146   for (unsigned i = 1; i != NumElems; ++i)
05147     Mask.push_back(i);
05148   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05149 }
05150 
05151 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
05152 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05153                           SDValue V2) {
05154   unsigned NumElems = VT.getVectorNumElements();
05155   SmallVector<int, 8> Mask;
05156   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
05157     Mask.push_back(i);
05158     Mask.push_back(i + NumElems);
05159   }
05160   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05161 }
05162 
05163 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
05164 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05165                           SDValue V2) {
05166   unsigned NumElems = VT.getVectorNumElements();
05167   SmallVector<int, 8> Mask;
05168   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
05169     Mask.push_back(i + Half);
05170     Mask.push_back(i + NumElems + Half);
05171   }
05172   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05173 }
05174 
05175 // PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
05176 // a generic shuffle instruction because the target has no such instructions.
05177 // Generate shuffles which repeat i16 and i8 several times until they can be
05178 // represented by v4f32 and then be manipulated by target suported shuffles.
05179 static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
05180   MVT VT = V.getSimpleValueType();
05181   int NumElems = VT.getVectorNumElements();
05182   SDLoc dl(V);
05183 
05184   while (NumElems > 4) {
05185     if (EltNo < NumElems/2) {
05186       V = getUnpackl(DAG, dl, VT, V, V);
05187     } else {
05188       V = getUnpackh(DAG, dl, VT, V, V);
05189       EltNo -= NumElems/2;
05190     }
05191     NumElems >>= 1;
05192   }
05193   return V;
05194 }
05195 
05196 /// getLegalSplat - Generate a legal splat with supported x86 shuffles
05197 static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
05198   MVT VT = V.getSimpleValueType();
05199   SDLoc dl(V);
05200 
05201   if (VT.is128BitVector()) {
05202     V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
05203     int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
05204     V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
05205                              &SplatMask[0]);
05206   } else if (VT.is256BitVector()) {
05207     // To use VPERMILPS to splat scalars, the second half of indicies must
05208     // refer to the higher part, which is a duplication of the lower one,
05209     // because VPERMILPS can only handle in-lane permutations.
05210     int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
05211                          EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
05212 
05213     V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
05214     V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
05215                              &SplatMask[0]);
05216   } else
05217     llvm_unreachable("Vector size not supported");
05218 
05219   return DAG.getNode(ISD::BITCAST, dl, VT, V);
05220 }
05221 
05222 /// PromoteSplat - Splat is promoted to target supported vector shuffles.
05223 static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
05224   MVT SrcVT = SV->getSimpleValueType(0);
05225   SDValue V1 = SV->getOperand(0);
05226   SDLoc dl(SV);
05227 
05228   int EltNo = SV->getSplatIndex();
05229   int NumElems = SrcVT.getVectorNumElements();
05230   bool Is256BitVec = SrcVT.is256BitVector();
05231 
05232   assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
05233          "Unknown how to promote splat for type");
05234 
05235   // Extract the 128-bit part containing the splat element and update
05236   // the splat element index when it refers to the higher register.
05237   if (Is256BitVec) {
05238     V1 = Extract128BitVector(V1, EltNo, DAG, dl);
05239     if (EltNo >= NumElems/2)
05240       EltNo -= NumElems/2;
05241   }
05242 
05243   // All i16 and i8 vector types can't be used directly by a generic shuffle
05244   // instruction because the target has no such instruction. Generate shuffles
05245   // which repeat i16 and i8 several times until they fit in i32, and then can
05246   // be manipulated by target suported shuffles.
05247   MVT EltVT = SrcVT.getVectorElementType();
05248   if (EltVT == MVT::i8 || EltVT == MVT::i16)
05249     V1 = PromoteSplati8i16(V1, DAG, EltNo);
05250 
05251   // Recreate the 256-bit vector and place the same 128-bit vector
05252   // into the low and high part. This is necessary because we want
05253   // to use VPERM* to shuffle the vectors
05254   if (Is256BitVec) {
05255     V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
05256   }
05257 
05258   return getLegalSplat(DAG, V1, EltNo);
05259 }
05260 
05261 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
05262 /// vector of zero or undef vector.  This produces a shuffle where the low
05263 /// element of V2 is swizzled into the zero/undef vector, landing at element
05264 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
05265 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
05266                                            bool IsZero,
05267                                            const X86Subtarget *Subtarget,
05268                                            SelectionDAG &DAG) {
05269   MVT VT = V2.getSimpleValueType();
05270   SDValue V1 = IsZero
05271     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
05272   unsigned NumElems = VT.getVectorNumElements();
05273   SmallVector<int, 16> MaskVec;
05274   for (unsigned i = 0; i != NumElems; ++i)
05275     // If this is the insertion idx, put the low elt of V2 here.
05276     MaskVec.push_back(i == Idx ? NumElems : i);
05277   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
05278 }
05279 
05280 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
05281 /// target specific opcode. Returns true if the Mask could be calculated. Sets
05282 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
05283 /// shuffles which use a single input multiple times, and in those cases it will
05284 /// adjust the mask to only have indices within that single input.
05285 static bool getTargetShuffleMask(SDNode *N, MVT VT,
05286                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
05287   unsigned NumElems = VT.getVectorNumElements();
05288   SDValue ImmN;
05289 
05290   IsUnary = false;
05291   bool IsFakeUnary = false;
05292   switch(N->getOpcode()) {
05293   case X86ISD::BLENDI:
05294     ImmN = N->getOperand(N->getNumOperands()-1);
05295     DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05296     break;
05297   case X86ISD::SHUFP:
05298     ImmN = N->getOperand(N->getNumOperands()-1);
05299     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05300     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05301     break;
05302   case X86ISD::UNPCKH:
05303     DecodeUNPCKHMask(VT, Mask);
05304     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05305     break;
05306   case X86ISD::UNPCKL:
05307     DecodeUNPCKLMask(VT, Mask);
05308     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05309     break;
05310   case X86ISD::MOVHLPS:
05311     DecodeMOVHLPSMask(NumElems, Mask);
05312     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05313     break;
05314   case X86ISD::MOVLHPS:
05315     DecodeMOVLHPSMask(NumElems, Mask);
05316     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05317     break;
05318   case X86ISD::PALIGNR:
05319     ImmN = N->getOperand(N->getNumOperands()-1);
05320     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05321     break;
05322   case X86ISD::PSHUFD:
05323   case X86ISD::VPERMILPI:
05324     ImmN = N->getOperand(N->getNumOperands()-1);
05325     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05326     IsUnary = true;
05327     break;
05328   case X86ISD::PSHUFHW:
05329     ImmN = N->getOperand(N->getNumOperands()-1);
05330     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05331     IsUnary = true;
05332     break;
05333   case X86ISD::PSHUFLW:
05334     ImmN = N->getOperand(N->getNumOperands()-1);
05335     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05336     IsUnary = true;
05337     break;
05338   case X86ISD::PSHUFB: {
05339     IsUnary = true;
05340     SDValue MaskNode = N->getOperand(1);
05341     while (MaskNode->getOpcode() == ISD::BITCAST)
05342       MaskNode = MaskNode->getOperand(0);
05343 
05344     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
05345       // If we have a build-vector, then things are easy.
05346       EVT VT = MaskNode.getValueType();
05347       assert(VT.isVector() &&
05348              "Can't produce a non-vector with a build_vector!");
05349       if (!VT.isInteger())
05350         return false;
05351 
05352       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
05353 
05354       SmallVector<uint64_t, 32> RawMask;
05355       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
05356         SDValue Op = MaskNode->getOperand(i);
05357         if (Op->getOpcode() == ISD::UNDEF) {
05358           RawMask.push_back((uint64_t)SM_SentinelUndef);
05359           continue;
05360         }
05361         auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
05362         if (!CN)
05363           return false;
05364         APInt MaskElement = CN->getAPIntValue();
05365 
05366         // We now have to decode the element which could be any integer size and
05367         // extract each byte of it.
05368         for (int j = 0; j < NumBytesPerElement; ++j) {
05369           // Note that this is x86 and so always little endian: the low byte is
05370           // the first byte of the mask.
05371           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
05372           MaskElement = MaskElement.lshr(8);
05373         }
05374       }
05375       DecodePSHUFBMask(RawMask, Mask);
05376       break;
05377     }
05378 
05379     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
05380     if (!MaskLoad)
05381       return false;
05382 
05383     SDValue Ptr = MaskLoad->getBasePtr();
05384     if (Ptr->getOpcode() == X86ISD::Wrapper)
05385       Ptr = Ptr->getOperand(0);
05386 
05387     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
05388     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
05389       return false;
05390 
05391     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
05392       // FIXME: Support AVX-512 here.
05393       Type *Ty = C->getType();
05394       if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 &&
05395                                 Ty->getVectorNumElements() != 32))
05396         return false;
05397 
05398       DecodePSHUFBMask(C, Mask);
05399       break;
05400     }
05401 
05402     return false;
05403   }
05404   case X86ISD::VPERMI:
05405     ImmN = N->getOperand(N->getNumOperands()-1);
05406     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05407     IsUnary = true;
05408     break;
05409   case X86ISD::MOVSS:
05410   case X86ISD::MOVSD: {
05411     // The index 0 always comes from the first element of the second source,
05412     // this is why MOVSS and MOVSD are used in the first place. The other
05413     // elements come from the other positions of the first source vector
05414     Mask.push_back(NumElems);
05415     for (unsigned i = 1; i != NumElems; ++i) {
05416       Mask.push_back(i);
05417     }
05418     break;
05419   }
05420   case X86ISD::VPERM2X128:
05421     ImmN = N->getOperand(N->getNumOperands()-1);
05422     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05423     if (Mask.empty()) return false;
05424     break;
05425   case X86ISD::MOVSLDUP:
05426     DecodeMOVSLDUPMask(VT, Mask);
05427     break;
05428   case X86ISD::MOVSHDUP:
05429     DecodeMOVSHDUPMask(VT, Mask);
05430     break;
05431   case X86ISD::MOVDDUP:
05432   case X86ISD::MOVLHPD:
05433   case X86ISD::MOVLPD:
05434   case X86ISD::MOVLPS:
05435     // Not yet implemented
05436     return false;
05437   default: llvm_unreachable("unknown target shuffle node");
05438   }
05439 
05440   // If we have a fake unary shuffle, the shuffle mask is spread across two
05441   // inputs that are actually the same node. Re-map the mask to always point
05442   // into the first input.
05443   if (IsFakeUnary)
05444     for (int &M : Mask)
05445       if (M >= (int)Mask.size())
05446         M -= Mask.size();
05447 
05448   return true;
05449 }
05450 
05451 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
05452 /// element of the result of the vector shuffle.
05453 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
05454                                    unsigned Depth) {
05455   if (Depth == 6)
05456     return SDValue();  // Limit search depth.
05457 
05458   SDValue V = SDValue(N, 0);
05459   EVT VT = V.getValueType();
05460   unsigned Opcode = V.getOpcode();
05461 
05462   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
05463   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
05464     int Elt = SV->getMaskElt(Index);
05465 
05466     if (Elt < 0)
05467       return DAG.getUNDEF(VT.getVectorElementType());
05468 
05469     unsigned NumElems = VT.getVectorNumElements();
05470     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
05471                                          : SV->getOperand(1);
05472     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
05473   }
05474 
05475   // Recurse into target specific vector shuffles to find scalars.
05476   if (isTargetShuffle(Opcode)) {
05477     MVT ShufVT = V.getSimpleValueType();
05478     unsigned NumElems = ShufVT.getVectorNumElements();
05479     SmallVector<int, 16> ShuffleMask;
05480     bool IsUnary;
05481 
05482     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
05483       return SDValue();
05484 
05485     int Elt = ShuffleMask[Index];
05486     if (Elt < 0)
05487       return DAG.getUNDEF(ShufVT.getVectorElementType());
05488 
05489     SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
05490                                          : N->getOperand(1);
05491     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
05492                                Depth+1);
05493   }
05494 
05495   // Actual nodes that may contain scalar elements
05496   if (Opcode == ISD::BITCAST) {
05497     V = V.getOperand(0);
05498     EVT SrcVT = V.getValueType();
05499     unsigned NumElems = VT.getVectorNumElements();
05500 
05501     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
05502       return SDValue();
05503   }
05504 
05505   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
05506     return (Index == 0) ? V.getOperand(0)
05507                         : DAG.getUNDEF(VT.getVectorElementType());
05508 
05509   if (V.getOpcode() == ISD::BUILD_VECTOR)
05510     return V.getOperand(Index);
05511 
05512   return SDValue();
05513 }
05514 
05515 /// getNumOfConsecutiveZeros - Return the number of elements of a vector
05516 /// shuffle operation which come from a consecutively from a zero. The
05517 /// search can start in two different directions, from left or right.
05518 /// We count undefs as zeros until PreferredNum is reached.
05519 static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
05520                                          unsigned NumElems, bool ZerosFromLeft,
05521                                          SelectionDAG &DAG,
05522                                          unsigned PreferredNum = -1U) {
05523   unsigned NumZeros = 0;
05524   for (unsigned i = 0; i != NumElems; ++i) {
05525     unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
05526     SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
05527     if (!Elt.getNode())
05528       break;
05529 
05530     if (X86::isZeroNode(Elt))
05531       ++NumZeros;
05532     else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
05533       NumZeros = std::min(NumZeros + 1, PreferredNum);
05534     else
05535       break;
05536   }
05537 
05538   return NumZeros;
05539 }
05540 
05541 /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
05542 /// correspond consecutively to elements from one of the vector operands,
05543 /// starting from its index OpIdx. Also tell OpNum which source vector operand.
05544 static
05545 bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
05546                               unsigned MaskI, unsigned MaskE, unsigned OpIdx,
05547                               unsigned NumElems, unsigned &OpNum) {
05548   bool SeenV1 = false;
05549   bool SeenV2 = false;
05550 
05551   for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
05552     int Idx = SVOp->getMaskElt(i);
05553     // Ignore undef indicies
05554     if (Idx < 0)
05555       continue;
05556 
05557     if (Idx < (int)NumElems)
05558       SeenV1 = true;
05559     else
05560       SeenV2 = true;
05561 
05562     // Only accept consecutive elements from the same vector
05563     if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
05564       return false;
05565   }
05566 
05567   OpNum = SeenV1 ? 0 : 1;
05568   return true;
05569 }
05570 
05571 /// isVectorShiftRight - Returns true if the shuffle can be implemented as a
05572 /// logical left shift of a vector.
05573 static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05574                                bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05575   unsigned NumElems =
05576     SVOp->getSimpleValueType(0).getVectorNumElements();
05577   unsigned NumZeros = getNumOfConsecutiveZeros(
05578       SVOp, NumElems, false /* check zeros from right */, DAG,
05579       SVOp->getMaskElt(0));
05580   unsigned OpSrc;
05581 
05582   if (!NumZeros)
05583     return false;
05584 
05585   // Considering the elements in the mask that are not consecutive zeros,
05586   // check if they consecutively come from only one of the source vectors.
05587   //
05588   //               V1 = {X, A, B, C}     0
05589   //                         \  \  \    /
05590   //   vector_shuffle V1, V2 <1, 2, 3, X>
05591   //
05592   if (!isShuffleMaskConsecutive(SVOp,
05593             0,                   // Mask Start Index
05594             NumElems-NumZeros,   // Mask End Index(exclusive)
05595             NumZeros,            // Where to start looking in the src vector
05596             NumElems,            // Number of elements in vector
05597             OpSrc))              // Which source operand ?
05598     return false;
05599 
05600   isLeft = false;
05601   ShAmt = NumZeros;
05602   ShVal = SVOp->getOperand(OpSrc);
05603   return true;
05604 }
05605 
05606 /// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
05607 /// logical left shift of a vector.
05608 static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05609                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05610   unsigned NumElems =
05611     SVOp->getSimpleValueType(0).getVectorNumElements();
05612   unsigned NumZeros = getNumOfConsecutiveZeros(
05613       SVOp, NumElems, true /* check zeros from left */, DAG,
05614       NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
05615   unsigned OpSrc;
05616 
05617   if (!NumZeros)
05618     return false;
05619 
05620   // Considering the elements in the mask that are not consecutive zeros,
05621   // check if they consecutively come from only one of the source vectors.
05622   //
05623   //                           0    { A, B, X, X } = V2
05624   //                          / \    /  /
05625   //   vector_shuffle V1, V2 <X, X, 4, 5>
05626   //
05627   if (!isShuffleMaskConsecutive(SVOp,
05628             NumZeros,     // Mask Start Index
05629             NumElems,     // Mask End Index(exclusive)
05630             0,            // Where to start looking in the src vector
05631             NumElems,     // Number of elements in vector
05632             OpSrc))       // Which source operand ?
05633     return false;
05634 
05635   isLeft = true;
05636   ShAmt = NumZeros;
05637   ShVal = SVOp->getOperand(OpSrc);
05638   return true;
05639 }
05640 
05641 /// isVectorShift - Returns true if the shuffle can be implemented as a
05642 /// logical left or right shift of a vector.
05643 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05644                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05645   // Although the logic below support any bitwidth size, there are no
05646   // shift instructions which handle more than 128-bit vectors.
05647   if (!SVOp->getSimpleValueType(0).is128BitVector())
05648     return false;
05649 
05650   if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
05651       isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
05652     return true;
05653 
05654   return false;
05655 }
05656 
05657 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
05658 ///
05659 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
05660                                        unsigned NumNonZero, unsigned NumZero,
05661                                        SelectionDAG &DAG,
05662                                        const X86Subtarget* Subtarget,
05663                                        const TargetLowering &TLI) {
05664   if (NumNonZero > 8)
05665     return SDValue();
05666 
05667   SDLoc dl(Op);
05668   SDValue V;
05669   bool First = true;
05670   for (unsigned i = 0; i < 16; ++i) {
05671     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
05672     if (ThisIsNonZero && First) {
05673       if (NumZero)
05674         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05675       else
05676         V = DAG.getUNDEF(MVT::v8i16);
05677       First = false;
05678     }
05679 
05680     if ((i & 1) != 0) {
05681       SDValue ThisElt, LastElt;
05682       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
05683       if (LastIsNonZero) {
05684         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
05685                               MVT::i16, Op.getOperand(i-1));
05686       }
05687       if (ThisIsNonZero) {
05688         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
05689         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
05690                               ThisElt, DAG.getConstant(8, MVT::i8));
05691         if (LastIsNonZero)
05692           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
05693       } else
05694         ThisElt = LastElt;
05695 
05696       if (ThisElt.getNode())
05697         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
05698                         DAG.getIntPtrConstant(i/2));
05699     }
05700   }
05701 
05702   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
05703 }
05704 
05705 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
05706 ///
05707 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
05708                                      unsigned NumNonZero, unsigned NumZero,
05709                                      SelectionDAG &DAG,
05710                                      const X86Subtarget* Subtarget,
05711                                      const TargetLowering &TLI) {
05712   if (NumNonZero > 4)
05713     return SDValue();
05714 
05715   SDLoc dl(Op);
05716   SDValue V;
05717   bool First = true;
05718   for (unsigned i = 0; i < 8; ++i) {
05719     bool isNonZero = (NonZeros & (1 << i)) != 0;
05720     if (isNonZero) {
05721       if (First) {
05722         if (NumZero)
05723           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05724         else
05725           V = DAG.getUNDEF(MVT::v8i16);
05726         First = false;
05727       }
05728       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
05729                       MVT::v8i16, V, Op.getOperand(i),
05730                       DAG.getIntPtrConstant(i));
05731     }
05732   }
05733 
05734   return V;
05735 }
05736 
05737 /// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
05738 static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
05739                                      unsigned NonZeros, unsigned NumNonZero,
05740                                      unsigned NumZero, SelectionDAG &DAG,
05741                                      const X86Subtarget *Subtarget,
05742                                      const TargetLowering &TLI) {
05743   // We know there's at least one non-zero element
05744   unsigned FirstNonZeroIdx = 0;
05745   SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05746   while (FirstNonZero.getOpcode() == ISD::UNDEF ||
05747          X86::isZeroNode(FirstNonZero)) {
05748     ++FirstNonZeroIdx;
05749     FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05750   }
05751 
05752   if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05753       !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
05754     return SDValue();
05755 
05756   SDValue V = FirstNonZero.getOperand(0);
05757   MVT VVT = V.getSimpleValueType();
05758   if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
05759     return SDValue();
05760 
05761   unsigned FirstNonZeroDst =
05762       cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
05763   unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
05764   unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
05765   unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
05766 
05767   for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
05768     SDValue Elem = Op.getOperand(Idx);
05769     if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
05770       continue;
05771 
05772     // TODO: What else can be here? Deal with it.
05773     if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
05774       return SDValue();
05775 
05776     // TODO: Some optimizations are still possible here
05777     // ex: Getting one element from a vector, and the rest from another.
05778     if (Elem.getOperand(0) != V)
05779       return SDValue();
05780 
05781     unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
05782     if (Dst == Idx)
05783       ++CorrectIdx;
05784     else if (IncorrectIdx == -1U) {
05785       IncorrectIdx = Idx;
05786       IncorrectDst = Dst;
05787     } else
05788       // There was already one element with an incorrect index.
05789       // We can't optimize this case to an insertps.
05790       return SDValue();
05791   }
05792 
05793   if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
05794     SDLoc dl(Op);
05795     EVT VT = Op.getSimpleValueType();
05796     unsigned ElementMoveMask = 0;
05797     if (IncorrectIdx == -1U)
05798       ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
05799     else
05800       ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
05801 
05802     SDValue InsertpsMask =
05803         DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
05804     return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
05805   }
05806 
05807   return SDValue();
05808 }
05809 
05810 /// getVShift - Return a vector logical shift node.
05811 ///
05812 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
05813                          unsigned NumBits, SelectionDAG &DAG,
05814                          const TargetLowering &TLI, SDLoc dl) {
05815   assert(VT.is128BitVector() && "Unknown type for VShift");
05816   EVT ShVT = MVT::v2i64;
05817   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
05818   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
05819   return DAG.getNode(ISD::BITCAST, dl, VT,
05820                      DAG.getNode(Opc, dl, ShVT, SrcOp,
05821                              DAG.getConstant(NumBits,
05822                                   TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
05823 }
05824 
05825 static SDValue
05826 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
05827 
05828   // Check if the scalar load can be widened into a vector load. And if
05829   // the address is "base + cst" see if the cst can be "absorbed" into
05830   // the shuffle mask.
05831   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
05832     SDValue Ptr = LD->getBasePtr();
05833     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
05834       return SDValue();
05835     EVT PVT = LD->getValueType(0);
05836     if (PVT != MVT::i32 && PVT != MVT::f32)
05837       return SDValue();
05838 
05839     int FI = -1;
05840     int64_t Offset = 0;
05841     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
05842       FI = FINode->getIndex();
05843       Offset = 0;
05844     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
05845                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
05846       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
05847       Offset = Ptr.getConstantOperandVal(1);
05848       Ptr = Ptr.getOperand(0);
05849     } else {
05850       return SDValue();
05851     }
05852 
05853     // FIXME: 256-bit vector instructions don't require a strict alignment,
05854     // improve this code to support it better.
05855     unsigned RequiredAlign = VT.getSizeInBits()/8;
05856     SDValue Chain = LD->getChain();
05857     // Make sure the stack object alignment is at least 16 or 32.
05858     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
05859     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
05860       if (MFI->isFixedObjectIndex(FI)) {
05861         // Can't change the alignment. FIXME: It's possible to compute
05862         // the exact stack offset and reference FI + adjust offset instead.
05863         // If someone *really* cares about this. That's the way to implement it.
05864         return SDValue();
05865       } else {
05866         MFI->setObjectAlignment(FI, RequiredAlign);
05867       }
05868     }
05869 
05870     // (Offset % 16 or 32) must be multiple of 4. Then address is then
05871     // Ptr + (Offset & ~15).
05872     if (Offset < 0)
05873       return SDValue();
05874     if ((Offset % RequiredAlign) & 3)
05875       return SDValue();
05876     int64_t StartOffset = Offset & ~(RequiredAlign-1);
05877     if (StartOffset)
05878       Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
05879                         Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
05880 
05881     int EltNo = (Offset - StartOffset) >> 2;
05882     unsigned NumElems = VT.getVectorNumElements();
05883 
05884     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
05885     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
05886                              LD->getPointerInfo().getWithOffset(StartOffset),
05887                              false, false, false, 0);
05888 
05889     SmallVector<int, 8> Mask;
05890     for (unsigned i = 0; i != NumElems; ++i)
05891       Mask.push_back(EltNo);
05892 
05893     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
05894   }
05895 
05896   return SDValue();
05897 }
05898 
05899 /// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
05900 /// vector of type 'VT', see if the elements can be replaced by a single large
05901 /// load which has the same value as a build_vector whose operands are 'elts'.
05902 ///
05903 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
05904 ///
05905 /// FIXME: we'd also like to handle the case where the last elements are zero
05906 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
05907 /// There's even a handy isZeroNode for that purpose.
05908 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
05909                                         SDLoc &DL, SelectionDAG &DAG,
05910                                         bool isAfterLegalize) {
05911   EVT EltVT = VT.getVectorElementType();
05912   unsigned NumElems = Elts.size();
05913 
05914   LoadSDNode *LDBase = nullptr;
05915   unsigned LastLoadedElt = -1U;
05916 
05917   // For each element in the initializer, see if we've found a load or an undef.
05918   // If we don't find an initial load element, or later load elements are
05919   // non-consecutive, bail out.
05920   for (unsigned i = 0; i < NumElems; ++i) {
05921     SDValue Elt = Elts[i];
05922 
05923     if (!Elt.getNode() ||
05924         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
05925       return SDValue();
05926     if (!LDBase) {
05927       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
05928         return SDValue();
05929       LDBase = cast<LoadSDNode>(Elt.getNode());
05930       LastLoadedElt = i;
05931       continue;
05932     }
05933     if (Elt.getOpcode() == ISD::UNDEF)
05934       continue;
05935 
05936     LoadSDNode *LD = cast<LoadSDNode>(Elt);
05937     if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
05938       return SDValue();
05939     LastLoadedElt = i;
05940   }
05941 
05942   // If we have found an entire vector of loads and undefs, then return a large
05943   // load of the entire vector width starting at the base pointer.  If we found
05944   // consecutive loads for the low half, generate a vzext_load node.
05945   if (LastLoadedElt == NumElems - 1) {
05946 
05947     if (isAfterLegalize &&
05948         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
05949       return SDValue();
05950 
05951     SDValue NewLd = SDValue();
05952 
05953     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
05954       NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05955                           LDBase->getPointerInfo(),
05956                           LDBase->isVolatile(), LDBase->isNonTemporal(),
05957                           LDBase->isInvariant(), 0);
05958     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05959                         LDBase->getPointerInfo(),
05960                         LDBase->isVolatile(), LDBase->isNonTemporal(),
05961                         LDBase->isInvariant(), LDBase->getAlignment());
05962 
05963     if (LDBase->hasAnyUseOfValue(1)) {
05964       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05965                                      SDValue(LDBase, 1),
05966                                      SDValue(NewLd.getNode(), 1));
05967       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05968       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05969                              SDValue(NewLd.getNode(), 1));
05970     }
05971 
05972     return NewLd;
05973   }
05974   if (NumElems == 4 && LastLoadedElt == 1 &&
05975       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
05976     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
05977     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
05978     SDValue ResNode =
05979         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
05980                                 LDBase->getPointerInfo(),
05981                                 LDBase->getAlignment(),
05982                                 false/*isVolatile*/, true/*ReadMem*/,
05983                                 false/*WriteMem*/);
05984 
05985     // Make sure the newly-created LOAD is in the same position as LDBase in
05986     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
05987     // update uses of LDBase's output chain to use the TokenFactor.
05988     if (LDBase->hasAnyUseOfValue(1)) {
05989       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05990                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
05991       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05992       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05993                              SDValue(ResNode.getNode(), 1));
05994     }
05995 
05996     return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
05997   }
05998   return SDValue();
05999 }
06000 
06001 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
06002 /// to generate a splat value for the following cases:
06003 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
06004 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
06005 /// a scalar load, or a constant.
06006 /// The VBROADCAST node is returned when a pattern is found,
06007 /// or SDValue() otherwise.
06008 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
06009                                     SelectionDAG &DAG) {
06010   // VBROADCAST requires AVX.
06011   // TODO: Splats could be generated for non-AVX CPUs using SSE
06012   // instructions, but there's less potential gain for only 128-bit vectors.
06013   if (!Subtarget->hasAVX())
06014     return SDValue();
06015 
06016   MVT VT = Op.getSimpleValueType();
06017   SDLoc dl(Op);
06018 
06019   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
06020          "Unsupported vector type for broadcast.");
06021 
06022   SDValue Ld;
06023   bool ConstSplatVal;
06024 
06025   switch (Op.getOpcode()) {
06026     default:
06027       // Unknown pattern found.
06028       return SDValue();
06029 
06030     case ISD::BUILD_VECTOR: {
06031       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
06032       BitVector UndefElements;
06033       SDValue Splat = BVOp->getSplatValue(&UndefElements);
06034 
06035       // We need a splat of a single value to use broadcast, and it doesn't
06036       // make any sense if the value is only in one element of the vector.
06037       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
06038         return SDValue();
06039 
06040       Ld = Splat;
06041       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
06042                        Ld.getOpcode() == ISD::ConstantFP);
06043 
06044       // Make sure that all of the users of a non-constant load are from the
06045       // BUILD_VECTOR node.
06046       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
06047         return SDValue();
06048       break;
06049     }
06050 
06051     case ISD::VECTOR_SHUFFLE: {
06052       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
06053 
06054       // Shuffles must have a splat mask where the first element is
06055       // broadcasted.
06056       if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
06057         return SDValue();
06058 
06059       SDValue Sc = Op.getOperand(0);
06060       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
06061           Sc.getOpcode() != ISD::BUILD_VECTOR) {
06062 
06063         if (!Subtarget->hasInt256())
06064           return SDValue();
06065 
06066         // Use the register form of the broadcast instruction available on AVX2.
06067         if (VT.getSizeInBits() >= 256)
06068           Sc = Extract128BitVector(Sc, 0, DAG, dl);
06069         return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
06070       }
06071 
06072       Ld = Sc.getOperand(0);
06073       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
06074                        Ld.getOpcode() == ISD::ConstantFP);
06075 
06076       // The scalar_to_vector node and the suspected
06077       // load node must have exactly one user.
06078       // Constants may have multiple users.
06079 
06080       // AVX-512 has register version of the broadcast
06081       bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
06082         Ld.getValueType().getSizeInBits() >= 32;
06083       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
06084           !hasRegVer))
06085         return SDValue();
06086       break;
06087     }
06088   }
06089 
06090   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
06091   bool IsGE256 = (VT.getSizeInBits() >= 256);
06092 
06093   // When optimizing for size, generate up to 5 extra bytes for a broadcast
06094   // instruction to save 8 or more bytes of constant pool data.
06095   // TODO: If multiple splats are generated to load the same constant,
06096   // it may be detrimental to overall size. There needs to be a way to detect
06097   // that condition to know if this is truly a size win.
06098   const Function *F = DAG.getMachineFunction().getFunction();
06099   bool OptForSize = F->getAttributes().
06100     hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
06101 
06102   // Handle broadcasting a single constant scalar from the constant pool
06103   // into a vector.
06104   // On Sandybridge (no AVX2), it is still better to load a constant vector
06105   // from the constant pool and not to broadcast it from a scalar.
06106   // But override that restriction when optimizing for size.
06107   // TODO: Check if splatting is recommended for other AVX-capable CPUs.
06108   if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
06109     EVT CVT = Ld.getValueType();
06110     assert(!CVT.isVector() && "Must not broadcast a vector type");
06111 
06112     // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
06113     // For size optimization, also splat v2f64 and v2i64, and for size opt
06114     // with AVX2, also splat i8 and i16.
06115     // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
06116     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
06117         (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
06118       const Constant *C = nullptr;
06119       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
06120         C = CI->getConstantIntValue();
06121       else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
06122         C = CF->getConstantFPValue();
06123 
06124       assert(C && "Invalid constant type");
06125 
06126       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06127       SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
06128       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
06129       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
06130                        MachinePointerInfo::getConstantPool(),
06131                        false, false, false, Alignment);
06132 
06133       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06134     }
06135   }
06136 
06137   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
06138 
06139   // Handle AVX2 in-register broadcasts.
06140   if (!IsLoad && Subtarget->hasInt256() &&
06141       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
06142     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06143 
06144   // The scalar source must be a normal load.
06145   if (!IsLoad)
06146     return SDValue();
06147 
06148   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
06149     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06150 
06151   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
06152   // double since there is no vbroadcastsd xmm
06153   if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
06154     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
06155       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06156   }
06157 
06158   // Unsupported broadcast.
06159   return SDValue();
06160 }
06161 
06162 /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
06163 /// underlying vector and index.
06164 ///
06165 /// Modifies \p ExtractedFromVec to the real vector and returns the real
06166 /// index.
06167 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
06168                                          SDValue ExtIdx) {
06169   int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
06170   if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
06171     return Idx;
06172 
06173   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
06174   // lowered this:
06175   //   (extract_vector_elt (v8f32 %vreg1), Constant<6>)
06176   // to:
06177   //   (extract_vector_elt (vector_shuffle<2,u,u,u>
06178   //                           (extract_subvector (v8f32 %vreg0), Constant<4>),
06179   //                           undef)
06180   //                       Constant<0>)
06181   // In this case the vector is the extract_subvector expression and the index
06182   // is 2, as specified by the shuffle.
06183   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
06184   SDValue ShuffleVec = SVOp->getOperand(0);
06185   MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
06186   assert(ShuffleVecVT.getVectorElementType() ==
06187          ExtractedFromVec.getSimpleValueType().getVectorElementType());
06188 
06189   int ShuffleIdx = SVOp->getMaskElt(Idx);
06190   if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
06191     ExtractedFromVec = ShuffleVec;
06192     return ShuffleIdx;
06193   }
06194   return Idx;
06195 }
06196 
06197 static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
06198   MVT VT = Op.getSimpleValueType();
06199 
06200   // Skip if insert_vec_elt is not supported.
06201   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06202   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
06203     return SDValue();
06204 
06205   SDLoc DL(Op);
06206   unsigned NumElems = Op.getNumOperands();
06207 
06208   SDValue VecIn1;
06209   SDValue VecIn2;
06210   SmallVector<unsigned, 4> InsertIndices;
06211   SmallVector<int, 8> Mask(NumElems, -1);
06212 
06213   for (unsigned i = 0; i != NumElems; ++i) {
06214     unsigned Opc = Op.getOperand(i).getOpcode();
06215 
06216     if (Opc == ISD::UNDEF)
06217       continue;
06218 
06219     if (Opc != ISD::EXTRACT_VECTOR_ELT) {
06220       // Quit if more than 1 elements need inserting.
06221       if (InsertIndices.size() > 1)
06222         return SDValue();
06223 
06224       InsertIndices.push_back(i);
06225       continue;
06226     }
06227 
06228     SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
06229     SDValue ExtIdx = Op.getOperand(i).getOperand(1);
06230     // Quit if non-constant index.
06231     if (!isa<ConstantSDNode>(ExtIdx))
06232       return SDValue();
06233     int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
06234 
06235     // Quit if extracted from vector of different type.
06236     if (ExtractedFromVec.getValueType() != VT)
06237       return SDValue();
06238 
06239     if (!VecIn1.getNode())
06240       VecIn1 = ExtractedFromVec;
06241     else if (VecIn1 != ExtractedFromVec) {
06242       if (!VecIn2.getNode())
06243         VecIn2 = ExtractedFromVec;
06244       else if (VecIn2 != ExtractedFromVec)
06245         // Quit if more than 2 vectors to shuffle
06246         return SDValue();
06247     }
06248 
06249     if (ExtractedFromVec == VecIn1)
06250       Mask[i] = Idx;
06251     else if (ExtractedFromVec == VecIn2)
06252       Mask[i] = Idx + NumElems;
06253   }
06254 
06255   if (!VecIn1.getNode())
06256     return SDValue();
06257 
06258   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
06259   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
06260   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
06261     unsigned Idx = InsertIndices[i];
06262     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
06263                      DAG.getIntPtrConstant(Idx));
06264   }
06265 
06266   return NV;
06267 }
06268 
06269 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
06270 SDValue
06271 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
06272 
06273   MVT VT = Op.getSimpleValueType();
06274   assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
06275          "Unexpected type in LowerBUILD_VECTORvXi1!");
06276 
06277   SDLoc dl(Op);
06278   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06279     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
06280     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06281     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06282   }
06283 
06284   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
06285     SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
06286     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06287     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06288   }
06289 
06290   bool AllContants = true;
06291   uint64_t Immediate = 0;
06292   int NonConstIdx = -1;
06293   bool IsSplat = true;
06294   unsigned NumNonConsts = 0;
06295   unsigned NumConsts = 0;
06296   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
06297     SDValue In = Op.getOperand(idx);
06298     if (In.getOpcode() == ISD::UNDEF)
06299       continue;
06300     if (!isa<ConstantSDNode>(In)) {
06301       AllContants = false;
06302       NonConstIdx = idx;
06303       NumNonConsts++;
06304     }
06305     else {
06306       NumConsts++;
06307       if (cast<ConstantSDNode>(In)->getZExtValue())
06308       Immediate |= (1ULL << idx);
06309     }
06310     if (In != Op.getOperand(0))
06311       IsSplat = false;
06312   }
06313 
06314   if (AllContants) {
06315     SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
06316       DAG.getConstant(Immediate, MVT::i16));
06317     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
06318                        DAG.getIntPtrConstant(0));
06319   }
06320 
06321   if (NumNonConsts == 1 && NonConstIdx != 0) {
06322     SDValue DstVec;
06323     if (NumConsts) {
06324       SDValue VecAsImm = DAG.getConstant(Immediate,
06325                                          MVT::getIntegerVT(VT.getSizeInBits()));
06326       DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
06327     }
06328     else 
06329       DstVec = DAG.getUNDEF(VT);
06330     return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
06331                        Op.getOperand(NonConstIdx),
06332                        DAG.getIntPtrConstant(NonConstIdx));
06333   }
06334   if (!IsSplat && (NonConstIdx != 0))
06335     llvm_unreachable("Unsupported BUILD_VECTOR operation");
06336   MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
06337   SDValue Select;
06338   if (IsSplat)
06339     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06340                           DAG.getConstant(-1, SelectVT),
06341                           DAG.getConstant(0, SelectVT));
06342   else
06343     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06344                          DAG.getConstant((Immediate | 1), SelectVT),
06345                          DAG.getConstant(Immediate, SelectVT));
06346   return DAG.getNode(ISD::BITCAST, dl, VT, Select);
06347 }
06348 
06349 /// \brief Return true if \p N implements a horizontal binop and return the
06350 /// operands for the horizontal binop into V0 and V1.
06351 /// 
06352 /// This is a helper function of PerformBUILD_VECTORCombine.
06353 /// This function checks that the build_vector \p N in input implements a
06354 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
06355 /// operation to match.
06356 /// For example, if \p Opcode is equal to ISD::ADD, then this function
06357 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
06358 /// is equal to ISD::SUB, then this function checks if this is a horizontal
06359 /// arithmetic sub.
06360 ///
06361 /// This function only analyzes elements of \p N whose indices are
06362 /// in range [BaseIdx, LastIdx).
06363 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
06364                               SelectionDAG &DAG,
06365                               unsigned BaseIdx, unsigned LastIdx,
06366                               SDValue &V0, SDValue &V1) {
06367   EVT VT = N->getValueType(0);
06368 
06369   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
06370   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
06371          "Invalid Vector in input!");
06372   
06373   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
06374   bool CanFold = true;
06375   unsigned ExpectedVExtractIdx = BaseIdx;
06376   unsigned NumElts = LastIdx - BaseIdx;
06377   V0 = DAG.getUNDEF(VT);
06378   V1 = DAG.getUNDEF(VT);
06379 
06380   // Check if N implements a horizontal binop.
06381   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
06382     SDValue Op = N->getOperand(i + BaseIdx);
06383 
06384     // Skip UNDEFs.
06385     if (Op->getOpcode() == ISD::UNDEF) {
06386       // Update the expected vector extract index.
06387       if (i * 2 == NumElts)
06388         ExpectedVExtractIdx = BaseIdx;
06389       ExpectedVExtractIdx += 2;
06390       continue;
06391     }
06392 
06393     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
06394 
06395     if (!CanFold)
06396       break;
06397 
06398     SDValue Op0 = Op.getOperand(0);
06399     SDValue Op1 = Op.getOperand(1);
06400 
06401     // Try to match the following pattern:
06402     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
06403     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06404         Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06405         Op0.getOperand(0) == Op1.getOperand(0) &&
06406         isa<ConstantSDNode>(Op0.getOperand(1)) &&
06407         isa<ConstantSDNode>(Op1.getOperand(1)));
06408     if (!CanFold)
06409       break;
06410 
06411     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06412     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
06413 
06414     if (i * 2 < NumElts) {
06415       if (V0.getOpcode() == ISD::UNDEF)
06416         V0 = Op0.getOperand(0);
06417     } else {
06418       if (V1.getOpcode() == ISD::UNDEF)
06419         V1 = Op0.getOperand(0);
06420       if (i * 2 == NumElts)
06421         ExpectedVExtractIdx = BaseIdx;
06422     }
06423 
06424     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
06425     if (I0 == ExpectedVExtractIdx)
06426       CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
06427     else if (IsCommutable && I1 == ExpectedVExtractIdx) {
06428       // Try to match the following dag sequence:
06429       // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
06430       CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
06431     } else
06432       CanFold = false;
06433 
06434     ExpectedVExtractIdx += 2;
06435   }
06436 
06437   return CanFold;
06438 }
06439 
06440 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
06441 /// a concat_vector. 
06442 ///
06443 /// This is a helper function of PerformBUILD_VECTORCombine.
06444 /// This function expects two 256-bit vectors called V0 and V1.
06445 /// At first, each vector is split into two separate 128-bit vectors.
06446 /// Then, the resulting 128-bit vectors are used to implement two
06447 /// horizontal binary operations. 
06448 ///
06449 /// The kind of horizontal binary operation is defined by \p X86Opcode.
06450 ///
06451 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
06452 /// the two new horizontal binop.
06453 /// When Mode is set, the first horizontal binop dag node would take as input
06454 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
06455 /// horizontal binop dag node would take as input the lower 128-bit of V1
06456 /// and the upper 128-bit of V1.
06457 ///   Example:
06458 ///     HADD V0_LO, V0_HI
06459 ///     HADD V1_LO, V1_HI
06460 ///
06461 /// Otherwise, the first horizontal binop dag node takes as input the lower
06462 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
06463 /// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
06464 ///   Example:
06465 ///     HADD V0_LO, V1_LO
06466 ///     HADD V0_HI, V1_HI
06467 ///
06468 /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
06469 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
06470 /// the upper 128-bits of the result.
06471 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
06472                                      SDLoc DL, SelectionDAG &DAG,
06473                                      unsigned X86Opcode, bool Mode,
06474                                      bool isUndefLO, bool isUndefHI) {
06475   EVT VT = V0.getValueType();
06476   assert(VT.is256BitVector() && VT == V1.getValueType() &&
06477          "Invalid nodes in input!");
06478 
06479   unsigned NumElts = VT.getVectorNumElements();
06480   SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
06481   SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
06482   SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
06483   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
06484   EVT NewVT = V0_LO.getValueType();
06485 
06486   SDValue LO = DAG.getUNDEF(NewVT);
06487   SDValue HI = DAG.getUNDEF(NewVT);
06488 
06489   if (Mode) {
06490     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06491     if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
06492       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
06493     if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
06494       HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
06495   } else {
06496     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06497     if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
06498                        V1_LO->getOpcode() != ISD::UNDEF))
06499       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
06500 
06501     if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
06502                        V1_HI->getOpcode() != ISD::UNDEF))
06503       HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
06504   }
06505 
06506   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
06507 }
06508 
06509 /// \brief Try to fold a build_vector that performs an 'addsub' into the
06510 /// sequence of 'vadd + vsub + blendi'.
06511 static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
06512                            const X86Subtarget *Subtarget) {
06513   SDLoc DL(BV);
06514   EVT VT = BV->getValueType(0);
06515   unsigned NumElts = VT.getVectorNumElements();
06516   SDValue InVec0 = DAG.getUNDEF(VT);
06517   SDValue InVec1 = DAG.getUNDEF(VT);
06518 
06519   assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
06520           VT == MVT::v2f64) && "build_vector with an invalid type found!");
06521 
06522   // Odd-numbered elements in the input build vector are obtained from
06523   // adding two integer/float elements.
06524   // Even-numbered elements in the input build vector are obtained from
06525   // subtracting two integer/float elements.
06526   unsigned ExpectedOpcode = ISD::FSUB;
06527   unsigned NextExpectedOpcode = ISD::FADD;
06528   bool AddFound = false;
06529   bool SubFound = false;
06530 
06531   for (unsigned i = 0, e = NumElts; i != e; i++) {
06532     SDValue Op = BV->getOperand(i);
06533 
06534     // Skip 'undef' values.
06535     unsigned Opcode = Op.getOpcode();
06536     if (Opcode == ISD::UNDEF) {
06537       std::swap(ExpectedOpcode, NextExpectedOpcode);
06538       continue;
06539     }
06540 
06541     // Early exit if we found an unexpected opcode.
06542     if (Opcode != ExpectedOpcode)
06543       return SDValue();
06544 
06545     SDValue Op0 = Op.getOperand(0);
06546     SDValue Op1 = Op.getOperand(1);
06547 
06548     // Try to match the following pattern:
06549     // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
06550     // Early exit if we cannot match that sequence.
06551     if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06552         Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06553         !isa<ConstantSDNode>(Op0.getOperand(1)) ||
06554         !isa<ConstantSDNode>(Op1.getOperand(1)) ||
06555         Op0.getOperand(1) != Op1.getOperand(1))
06556       return SDValue();
06557 
06558     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06559     if (I0 != i)
06560       return SDValue();
06561 
06562     // We found a valid add/sub node. Update the information accordingly.
06563     if (i & 1)
06564       AddFound = true;
06565     else
06566       SubFound = true;
06567 
06568     // Update InVec0 and InVec1.
06569     if (InVec0.getOpcode() == ISD::UNDEF)
06570       InVec0 = Op0.getOperand(0);
06571     if (InVec1.getOpcode() == ISD::UNDEF)
06572       InVec1 = Op1.getOperand(0);
06573 
06574     // Make sure that operands in input to each add/sub node always
06575     // come from a same pair of vectors.
06576     if (InVec0 != Op0.getOperand(0)) {
06577       if (ExpectedOpcode == ISD::FSUB)
06578         return SDValue();
06579 
06580       // FADD is commutable. Try to commute the operands
06581       // and then test again.
06582       std::swap(Op0, Op1);
06583       if (InVec0 != Op0.getOperand(0))
06584         return SDValue();
06585     }
06586 
06587     if (InVec1 != Op1.getOperand(0))
06588       return SDValue();
06589 
06590     // Update the pair of expected opcodes.
06591     std::swap(ExpectedOpcode, NextExpectedOpcode);
06592   }
06593 
06594   // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
06595   if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
06596       InVec1.getOpcode() != ISD::UNDEF)
06597     return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
06598 
06599   return SDValue();
06600 }
06601 
06602 static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
06603                                           const X86Subtarget *Subtarget) {
06604   SDLoc DL(N);
06605   EVT VT = N->getValueType(0);
06606   unsigned NumElts = VT.getVectorNumElements();
06607   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
06608   SDValue InVec0, InVec1;
06609 
06610   // Try to match an ADDSUB.
06611   if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
06612       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
06613     SDValue Value = matchAddSub(BV, DAG, Subtarget);
06614     if (Value.getNode())
06615       return Value;
06616   }
06617 
06618   // Try to match horizontal ADD/SUB.
06619   unsigned NumUndefsLO = 0;
06620   unsigned NumUndefsHI = 0;
06621   unsigned Half = NumElts/2;
06622 
06623   // Count the number of UNDEF operands in the build_vector in input.
06624   for (unsigned i = 0, e = Half; i != e; ++i)
06625     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06626       NumUndefsLO++;
06627 
06628   for (unsigned i = Half, e = NumElts; i != e; ++i)
06629     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06630       NumUndefsHI++;
06631 
06632   // Early exit if this is either a build_vector of all UNDEFs or all the
06633   // operands but one are UNDEF.
06634   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
06635     return SDValue();
06636 
06637   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
06638     // Try to match an SSE3 float HADD/HSUB.
06639     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06640       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06641     
06642     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06643       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06644   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
06645     // Try to match an SSSE3 integer HADD/HSUB.
06646     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06647       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
06648     
06649     if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06650       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
06651   }
06652   
06653   if (!Subtarget->hasAVX())
06654     return SDValue();
06655 
06656   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
06657     // Try to match an AVX horizontal add/sub of packed single/double
06658     // precision floating point values from 256-bit vectors.
06659     SDValue InVec2, InVec3;
06660     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
06661         isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
06662         ((InVec0.getOpcode() == ISD::UNDEF ||
06663           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06664         ((InVec1.getOpcode() == ISD::UNDEF ||
06665           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06666       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06667 
06668     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
06669         isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
06670         ((InVec0.getOpcode() == ISD::UNDEF ||
06671           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06672         ((InVec1.getOpcode() == ISD::UNDEF ||
06673           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06674       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06675   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
06676     // Try to match an AVX2 horizontal add/sub of signed integers.
06677     SDValue InVec2, InVec3;
06678     unsigned X86Opcode;
06679     bool CanFold = true;
06680 
06681     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
06682         isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
06683         ((InVec0.getOpcode() == ISD::UNDEF ||
06684           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06685         ((InVec1.getOpcode() == ISD::UNDEF ||
06686           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06687       X86Opcode = X86ISD::HADD;
06688     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
06689         isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
06690         ((InVec0.getOpcode() == ISD::UNDEF ||
06691           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06692         ((InVec1.getOpcode() == ISD::UNDEF ||
06693           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06694       X86Opcode = X86ISD::HSUB;
06695     else
06696       CanFold = false;
06697 
06698     if (CanFold) {
06699       // Fold this build_vector into a single horizontal add/sub.
06700       // Do this only if the target has AVX2.
06701       if (Subtarget->hasAVX2())
06702         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
06703  
06704       // Do not try to expand this build_vector into a pair of horizontal
06705       // add/sub if we can emit a pair of scalar add/sub.
06706       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06707         return SDValue();
06708 
06709       // Convert this build_vector into a pair of horizontal binop followed by
06710       // a concat vector.
06711       bool isUndefLO = NumUndefsLO == Half;
06712       bool isUndefHI = NumUndefsHI == Half;
06713       return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
06714                                    isUndefLO, isUndefHI);
06715     }
06716   }
06717 
06718   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
06719        VT == MVT::v16i16) && Subtarget->hasAVX()) {
06720     unsigned X86Opcode;
06721     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06722       X86Opcode = X86ISD::HADD;
06723     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06724       X86Opcode = X86ISD::HSUB;
06725     else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06726       X86Opcode = X86ISD::FHADD;
06727     else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06728       X86Opcode = X86ISD::FHSUB;
06729     else
06730       return SDValue();
06731 
06732     // Don't try to expand this build_vector into a pair of horizontal add/sub
06733     // if we can simply emit a pair of scalar add/sub.
06734     if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06735       return SDValue();
06736 
06737     // Convert this build_vector into two horizontal add/sub followed by
06738     // a concat vector.
06739     bool isUndefLO = NumUndefsLO == Half;
06740     bool isUndefHI = NumUndefsHI == Half;
06741     return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
06742                                  isUndefLO, isUndefHI);
06743   }
06744 
06745   return SDValue();
06746 }
06747 
06748 SDValue
06749 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
06750   SDLoc dl(Op);
06751 
06752   MVT VT = Op.getSimpleValueType();
06753   MVT ExtVT = VT.getVectorElementType();
06754   unsigned NumElems = Op.getNumOperands();
06755 
06756   // Generate vectors for predicate vectors.
06757   if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
06758     return LowerBUILD_VECTORvXi1(Op, DAG);
06759 
06760   // Vectors containing all zeros can be matched by pxor and xorps later
06761   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06762     // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
06763     // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
06764     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
06765       return Op;
06766 
06767     return getZeroVector(VT, Subtarget, DAG, dl);
06768   }
06769 
06770   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
06771   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
06772   // vpcmpeqd on 256-bit vectors.
06773   if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
06774     if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
06775       return Op;
06776 
06777     if (!VT.is512BitVector())
06778       return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
06779   }
06780 
06781   SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
06782   if (Broadcast.getNode())
06783     return Broadcast;
06784 
06785   unsigned EVTBits = ExtVT.getSizeInBits();
06786 
06787   unsigned NumZero  = 0;
06788   unsigned NumNonZero = 0;
06789   unsigned NonZeros = 0;
06790   bool IsAllConstants = true;
06791   SmallSet<SDValue, 8> Values;
06792   for (unsigned i = 0; i < NumElems; ++i) {
06793     SDValue Elt = Op.getOperand(i);
06794     if (Elt.getOpcode() == ISD::UNDEF)
06795       continue;
06796     Values.insert(Elt);
06797     if (Elt.getOpcode() != ISD::Constant &&
06798         Elt.getOpcode() != ISD::ConstantFP)
06799       IsAllConstants = false;
06800     if (X86::isZeroNode(Elt))
06801       NumZero++;
06802     else {
06803       NonZeros |= (1 << i);
06804       NumNonZero++;
06805     }
06806   }
06807 
06808   // All undef vector. Return an UNDEF.  All zero vectors were handled above.
06809   if (NumNonZero == 0)
06810     return DAG.getUNDEF(VT);
06811 
06812   // Special case for single non-zero, non-undef, element.
06813   if (NumNonZero == 1) {
06814     unsigned Idx = countTrailingZeros(NonZeros);
06815     SDValue Item = Op.getOperand(Idx);
06816 
06817     // If this is an insertion of an i64 value on x86-32, and if the top bits of
06818     // the value are obviously zero, truncate the value to i32 and do the
06819     // insertion that way.  Only do this if the value is non-constant or if the
06820     // value is a constant being inserted into element 0.  It is cheaper to do
06821     // a constant pool load than it is to do a movd + shuffle.
06822     if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
06823         (!IsAllConstants || Idx == 0)) {
06824       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
06825         // Handle SSE only.
06826         assert(VT == MVT::v2i64 && "Expected an SSE value type!");
06827         EVT VecVT = MVT::v4i32;
06828         unsigned VecElts = 4;
06829 
06830         // Truncate the value (which may itself be a constant) to i32, and
06831         // convert it to a vector with movd (S2V+shuffle to zero extend).
06832         Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
06833         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
06834 
06835         // If using the new shuffle lowering, just directly insert this.
06836         if (ExperimentalVectorShuffleLowering)
06837           return DAG.getNode(
06838               ISD::BITCAST, dl, VT,
06839               getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
06840 
06841         Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06842 
06843         // Now we have our 32-bit value zero extended in the low element of
06844         // a vector.  If Idx != 0, swizzle it into place.
06845         if (Idx != 0) {
06846           SmallVector<int, 4> Mask;
06847           Mask.push_back(Idx);
06848           for (unsigned i = 1; i != VecElts; ++i)
06849             Mask.push_back(i);
06850           Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
06851                                       &Mask[0]);
06852         }
06853         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06854       }
06855     }
06856 
06857     // If we have a constant or non-constant insertion into the low element of
06858     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
06859     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
06860     // depending on what the source datatype is.
06861     if (Idx == 0) {
06862       if (NumZero == 0)
06863         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06864 
06865       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
06866           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
06867         if (VT.is256BitVector() || VT.is512BitVector()) {
06868           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
06869           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
06870                              Item, DAG.getIntPtrConstant(0));
06871         }
06872         assert(VT.is128BitVector() && "Expected an SSE value type!");
06873         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06874         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
06875         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06876       }
06877 
06878       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
06879         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
06880         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
06881         if (VT.is256BitVector()) {
06882           SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
06883           Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
06884         } else {
06885           assert(VT.is128BitVector() && "Expected an SSE value type!");
06886           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06887         }
06888         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06889       }
06890     }
06891 
06892     // Is it a vector logical left shift?
06893     if (NumElems == 2 && Idx == 1 &&
06894         X86::isZeroNode(Op.getOperand(0)) &&
06895         !X86::isZeroNode(Op.getOperand(1))) {
06896       unsigned NumBits = VT.getSizeInBits();
06897       return getVShift(true, VT,
06898                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
06899                                    VT, Op.getOperand(1)),
06900                        NumBits/2, DAG, *this, dl);
06901     }
06902 
06903     if (IsAllConstants) // Otherwise, it's better to do a constpool load.
06904       return SDValue();
06905 
06906     // Otherwise, if this is a vector with i32 or f32 elements, and the element
06907     // is a non-constant being inserted into an element other than the low one,
06908     // we can't use a constant pool load.  Instead, use SCALAR_TO_VECTOR (aka
06909     // movd/movss) to move this into the low element, then shuffle it into
06910     // place.
06911     if (EVTBits == 32) {
06912       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06913 
06914       // If using the new shuffle lowering, just directly insert this.
06915       if (ExperimentalVectorShuffleLowering)
06916         return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
06917 
06918       // Turn it into a shuffle of zero and zero-extended scalar to vector.
06919       Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
06920       SmallVector<int, 8> MaskVec;
06921       for (unsigned i = 0; i != NumElems; ++i)
06922         MaskVec.push_back(i == Idx ? 0 : 1);
06923       return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
06924     }
06925   }
06926 
06927   // Splat is obviously ok. Let legalizer expand it to a shuffle.
06928   if (Values.size() == 1) {
06929     if (EVTBits == 32) {
06930       // Instead of a shuffle like this:
06931       // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
06932       // Check if it's possible to issue this instead.
06933       // shuffle (vload ptr)), undef, <1, 1, 1, 1>
06934       unsigned Idx = countTrailingZeros(NonZeros);
06935       SDValue Item = Op.getOperand(Idx);
06936       if (Op.getNode()->isOnlyUserOf(Item.getNode()))
06937         return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
06938     }
06939     return SDValue();
06940   }
06941 
06942   // A vector full of immediates; various special cases are already
06943   // handled, so this is best done with a single constant-pool load.
06944   if (IsAllConstants)
06945     return SDValue();
06946 
06947   // For AVX-length vectors, build the individual 128-bit pieces and use
06948   // shuffles to put them in place.
06949   if (VT.is256BitVector() || VT.is512BitVector()) {
06950     SmallVector<SDValue, 64> V;
06951     for (unsigned i = 0; i != NumElems; ++i)
06952       V.push_back(Op.getOperand(i));
06953 
06954     EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
06955 
06956     // Build both the lower and upper subvector.
06957     SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06958                                 makeArrayRef(&V[0], NumElems/2));
06959     SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06960                                 makeArrayRef(&V[NumElems / 2], NumElems/2));
06961 
06962     // Recreate the wider vector with the lower and upper part.
06963     if (VT.is256BitVector())
06964       return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06965     return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06966   }
06967 
06968   // Let legalizer expand 2-wide build_vectors.
06969   if (EVTBits == 64) {
06970     if (NumNonZero == 1) {
06971       // One half is zero or undef.
06972       unsigned Idx = countTrailingZeros(NonZeros);
06973       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
06974                                  Op.getOperand(Idx));
06975       return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
06976     }
06977     return SDValue();
06978   }
06979 
06980   // If element VT is < 32 bits, convert it to inserts into a zero vector.
06981   if (EVTBits == 8 && NumElems == 16) {
06982     SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
06983                                         Subtarget, *this);
06984     if (V.getNode()) return V;
06985   }
06986 
06987   if (EVTBits == 16 && NumElems == 8) {
06988     SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
06989                                       Subtarget, *this);
06990     if (V.getNode()) return V;
06991   }
06992 
06993   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
06994   if (EVTBits == 32 && NumElems == 4) {
06995     SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
06996                                       NumZero, DAG, Subtarget, *this);
06997     if (V.getNode())
06998       return V;
06999   }
07000 
07001   // If element VT is == 32 bits, turn it into a number of shuffles.
07002   SmallVector<SDValue, 8> V(NumElems);
07003   if (NumElems == 4 && NumZero > 0) {
07004     for (unsigned i = 0; i < 4; ++i) {
07005       bool isZero = !(NonZeros & (1 << i));
07006       if (isZero)
07007         V[i] = getZeroVector(VT, Subtarget, DAG, dl);
07008       else
07009         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
07010     }
07011 
07012     for (unsigned i = 0; i < 2; ++i) {
07013       switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
07014         default: break;
07015         case 0:
07016           V[i] = V[i*2];  // Must be a zero vector.
07017           break;
07018         case 1:
07019           V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
07020           break;
07021         case 2:
07022           V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
07023           break;
07024         case 3:
07025           V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
07026           break;
07027       }
07028     }
07029 
07030     bool Reverse1 = (NonZeros & 0x3) == 2;
07031     bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
07032     int MaskVec[] = {
07033       Reverse1 ? 1 : 0,
07034       Reverse1 ? 0 : 1,
07035       static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
07036       static_cast<int>(Reverse2 ? NumElems   : NumElems+1)
07037     };
07038     return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
07039   }
07040 
07041   if (Values.size() > 1 && VT.is128BitVector()) {
07042     // Check for a build vector of consecutive loads.
07043     for (unsigned i = 0; i < NumElems; ++i)
07044       V[i] = Op.getOperand(i);
07045 
07046     // Check for elements which are consecutive loads.
07047     SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
07048     if (LD.getNode())
07049       return LD;
07050 
07051     // Check for a build vector from mostly shuffle plus few inserting.
07052     SDValue Sh = buildFromShuffleMostly(Op, DAG);
07053     if (Sh.getNode())
07054       return Sh;
07055 
07056     // For SSE 4.1, use insertps to put the high elements into the low element.
07057     if (getSubtarget()->hasSSE41()) {
07058       SDValue Result;
07059       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
07060         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
07061       else
07062         Result = DAG.getUNDEF(VT);
07063 
07064       for (unsigned i = 1; i < NumElems; ++i) {
07065         if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
07066         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
07067                              Op.getOperand(i), DAG.getIntPtrConstant(i));
07068       }
07069       return Result;
07070     }
07071 
07072     // Otherwise, expand into a number of unpckl*, start by extending each of
07073     // our (non-undef) elements to the full vector width with the element in the
07074     // bottom slot of the vector (which generates no code for SSE).
07075     for (unsigned i = 0; i < NumElems; ++i) {
07076       if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
07077         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
07078       else
07079         V[i] = DAG.getUNDEF(VT);
07080     }
07081 
07082     // Next, we iteratively mix elements, e.g. for v4f32:
07083     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
07084     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
07085     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
07086     unsigned EltStride = NumElems >> 1;
07087     while (EltStride != 0) {
07088       for (unsigned i = 0; i < EltStride; ++i) {
07089         // If V[i+EltStride] is undef and this is the first round of mixing,
07090         // then it is safe to just drop this shuffle: V[i] is already in the
07091         // right place, the one element (since it's the first round) being
07092         // inserted as undef can be dropped.  This isn't safe for successive
07093         // rounds because they will permute elements within both vectors.
07094         if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
07095             EltStride == NumElems/2)
07096           continue;
07097 
07098         V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
07099       }
07100       EltStride >>= 1;
07101     }
07102     return V[0];
07103   }
07104   return SDValue();
07105 }
07106 
07107 // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
07108 // to create 256-bit vectors from two other 128-bit ones.
07109 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
07110   SDLoc dl(Op);
07111   MVT ResVT = Op.getSimpleValueType();
07112 
07113   assert((ResVT.is256BitVector() ||
07114           ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
07115 
07116   SDValue V1 = Op.getOperand(0);
07117   SDValue V2 = Op.getOperand(1);
07118   unsigned NumElems = ResVT.getVectorNumElements();
07119   if(ResVT.is256BitVector())
07120     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
07121 
07122   if (Op.getNumOperands() == 4) {
07123     MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
07124                                 ResVT.getVectorNumElements()/2);
07125     SDValue V3 = Op.getOperand(2);
07126     SDValue V4 = Op.getOperand(3);
07127     return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
07128       Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
07129   }
07130   return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
07131 }
07132 
07133 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
07134   MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
07135   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
07136          (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
07137           Op.getNumOperands() == 4)));
07138 
07139   // AVX can use the vinsertf128 instruction to create 256-bit vectors
07140   // from t