LLVM API Documentation

X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86InstrBuilder.h"
00019 #include "X86MachineFunctionInfo.h"
00020 #include "X86TargetMachine.h"
00021 #include "X86TargetObjectFile.h"
00022 #include "llvm/ADT/SmallSet.h"
00023 #include "llvm/ADT/Statistic.h"
00024 #include "llvm/ADT/StringExtras.h"
00025 #include "llvm/ADT/StringSwitch.h"
00026 #include "llvm/ADT/VariadicFunction.h"
00027 #include "llvm/CodeGen/IntrinsicLowering.h"
00028 #include "llvm/CodeGen/MachineFrameInfo.h"
00029 #include "llvm/CodeGen/MachineFunction.h"
00030 #include "llvm/CodeGen/MachineInstrBuilder.h"
00031 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00032 #include "llvm/CodeGen/MachineModuleInfo.h"
00033 #include "llvm/CodeGen/MachineRegisterInfo.h"
00034 #include "llvm/IR/CallSite.h"
00035 #include "llvm/IR/CallingConv.h"
00036 #include "llvm/IR/Constants.h"
00037 #include "llvm/IR/DerivedTypes.h"
00038 #include "llvm/IR/Function.h"
00039 #include "llvm/IR/GlobalAlias.h"
00040 #include "llvm/IR/GlobalVariable.h"
00041 #include "llvm/IR/Instructions.h"
00042 #include "llvm/IR/Intrinsics.h"
00043 #include "llvm/MC/MCAsmInfo.h"
00044 #include "llvm/MC/MCContext.h"
00045 #include "llvm/MC/MCExpr.h"
00046 #include "llvm/MC/MCSymbol.h"
00047 #include "llvm/Support/CommandLine.h"
00048 #include "llvm/Support/Debug.h"
00049 #include "llvm/Support/ErrorHandling.h"
00050 #include "llvm/Support/MathExtras.h"
00051 #include "llvm/Target/TargetOptions.h"
00052 #include "X86IntrinsicsInfo.h"
00053 #include <bitset>
00054 #include <numeric>
00055 #include <cctype>
00056 using namespace llvm;
00057 
00058 #define DEBUG_TYPE "x86-isel"
00059 
00060 STATISTIC(NumTailCalls, "Number of tail calls");
00061 
00062 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00063     "x86-experimental-vector-widening-legalization", cl::init(false),
00064     cl::desc("Enable an experimental vector type legalization through widening "
00065              "rather than promotion."),
00066     cl::Hidden);
00067 
00068 static cl::opt<bool> ExperimentalVectorShuffleLowering(
00069     "x86-experimental-vector-shuffle-lowering", cl::init(false),
00070     cl::desc("Enable an experimental vector shuffle lowering code path."),
00071     cl::Hidden);
00072 
00073 // Forward declarations.
00074 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
00075                        SDValue V2);
00076 
00077 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
00078                                 SelectionDAG &DAG, SDLoc dl,
00079                                 unsigned vectorWidth) {
00080   assert((vectorWidth == 128 || vectorWidth == 256) &&
00081          "Unsupported vector width");
00082   EVT VT = Vec.getValueType();
00083   EVT ElVT = VT.getVectorElementType();
00084   unsigned Factor = VT.getSizeInBits()/vectorWidth;
00085   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
00086                                   VT.getVectorNumElements()/Factor);
00087 
00088   // Extract from UNDEF is UNDEF.
00089   if (Vec.getOpcode() == ISD::UNDEF)
00090     return DAG.getUNDEF(ResultVT);
00091 
00092   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
00093   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
00094 
00095   // This is the index of the first element of the vectorWidth-bit chunk
00096   // we want.
00097   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
00098                                * ElemsPerChunk);
00099 
00100   // If the input is a buildvector just emit a smaller one.
00101   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
00102     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
00103                        makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
00104                                     ElemsPerChunk));
00105 
00106   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00107   SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
00108                                VecIdx);
00109 
00110   return Result;
00111 
00112 }
00113 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
00114 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
00115 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
00116 /// instructions or a simple subregister reference. Idx is an index in the
00117 /// 128 bits we want.  It need not be aligned to a 128-bit bounday.  That makes
00118 /// lowering EXTRACT_VECTOR_ELT operations easier.
00119 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
00120                                    SelectionDAG &DAG, SDLoc dl) {
00121   assert((Vec.getValueType().is256BitVector() ||
00122           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
00123   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
00124 }
00125 
00126 /// Generate a DAG to grab 256-bits from a 512-bit vector.
00127 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
00128                                    SelectionDAG &DAG, SDLoc dl) {
00129   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
00130   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
00131 }
00132 
00133 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
00134                                unsigned IdxVal, SelectionDAG &DAG,
00135                                SDLoc dl, unsigned vectorWidth) {
00136   assert((vectorWidth == 128 || vectorWidth == 256) &&
00137          "Unsupported vector width");
00138   // Inserting UNDEF is Result
00139   if (Vec.getOpcode() == ISD::UNDEF)
00140     return Result;
00141   EVT VT = Vec.getValueType();
00142   EVT ElVT = VT.getVectorElementType();
00143   EVT ResultVT = Result.getValueType();
00144 
00145   // Insert the relevant vectorWidth bits.
00146   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
00147 
00148   // This is the index of the first element of the vectorWidth-bit chunk
00149   // we want.
00150   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
00151                                * ElemsPerChunk);
00152 
00153   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
00154   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
00155                      VecIdx);
00156 }
00157 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
00158 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
00159 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
00160 /// simple superregister reference.  Idx is an index in the 128 bits
00161 /// we want.  It need not be aligned to a 128-bit bounday.  That makes
00162 /// lowering INSERT_VECTOR_ELT operations easier.
00163 static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
00164                                   unsigned IdxVal, SelectionDAG &DAG,
00165                                   SDLoc dl) {
00166   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
00167   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
00168 }
00169 
00170 static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
00171                                   unsigned IdxVal, SelectionDAG &DAG,
00172                                   SDLoc dl) {
00173   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
00174   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
00175 }
00176 
00177 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
00178 /// instructions. This is used because creating CONCAT_VECTOR nodes of
00179 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
00180 /// large BUILD_VECTORS.
00181 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
00182                                    unsigned NumElems, SelectionDAG &DAG,
00183                                    SDLoc dl) {
00184   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00185   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
00186 }
00187 
00188 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
00189                                    unsigned NumElems, SelectionDAG &DAG,
00190                                    SDLoc dl) {
00191   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
00192   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
00193 }
00194 
00195 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00196   if (TT.isOSBinFormatMachO()) {
00197     if (TT.getArch() == Triple::x86_64)
00198       return new X86_64MachoTargetObjectFile();
00199     return new TargetLoweringObjectFileMachO();
00200   }
00201 
00202   if (TT.isOSLinux())
00203     return new X86LinuxTargetObjectFile();
00204   if (TT.isOSBinFormatELF())
00205     return new TargetLoweringObjectFileELF();
00206   if (TT.isKnownWindowsMSVCEnvironment())
00207     return new X86WindowsTargetObjectFile();
00208   if (TT.isOSBinFormatCOFF())
00209     return new TargetLoweringObjectFileCOFF();
00210   llvm_unreachable("unknown subtarget type");
00211 }
00212 
00213 // FIXME: This should stop caching the target machine as soon as
00214 // we can remove resetOperationActions et al.
00215 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
00216   : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
00217   Subtarget = &TM.getSubtarget<X86Subtarget>();
00218   X86ScalarSSEf64 = Subtarget->hasSSE2();
00219   X86ScalarSSEf32 = Subtarget->hasSSE1();
00220   TD = getDataLayout();
00221 
00222   resetOperationActions();
00223 }
00224 
00225 void X86TargetLowering::resetOperationActions() {
00226   const TargetMachine &TM = getTargetMachine();
00227   static bool FirstTimeThrough = true;
00228 
00229   // If none of the target options have changed, then we don't need to reset the
00230   // operation actions.
00231   if (!FirstTimeThrough && TO == TM.Options) return;
00232 
00233   if (!FirstTimeThrough) {
00234     // Reinitialize the actions.
00235     initActions();
00236     FirstTimeThrough = false;
00237   }
00238 
00239   TO = TM.Options;
00240 
00241   // Set up the TargetLowering object.
00242   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
00243 
00244   // X86 is weird, it always uses i8 for shift amounts and setcc results.
00245   setBooleanContents(ZeroOrOneBooleanContent);
00246   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00247   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00248 
00249   // For 64-bit since we have so many registers use the ILP scheduler, for
00250   // 32-bit code use the register pressure specific scheduling.
00251   // For Atom, always use ILP scheduling.
00252   if (Subtarget->isAtom())
00253     setSchedulingPreference(Sched::ILP);
00254   else if (Subtarget->is64Bit())
00255     setSchedulingPreference(Sched::ILP);
00256   else
00257     setSchedulingPreference(Sched::RegPressure);
00258   const X86RegisterInfo *RegInfo =
00259       TM.getSubtarget<X86Subtarget>().getRegisterInfo();
00260   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00261 
00262   // Bypass expensive divides on Atom when compiling with O2
00263   if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
00264     addBypassSlowDiv(32, 8);
00265     if (Subtarget->is64Bit())
00266       addBypassSlowDiv(64, 16);
00267   }
00268 
00269   if (Subtarget->isTargetKnownWindowsMSVC()) {
00270     // Setup Windows compiler runtime calls.
00271     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00272     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00273     setLibcallName(RTLIB::SREM_I64, "_allrem");
00274     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00275     setLibcallName(RTLIB::MUL_I64, "_allmul");
00276     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00277     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00278     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00279     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00280     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00281 
00282     // The _ftol2 runtime function has an unusual calling conv, which
00283     // is modeled by a special pseudo-instruction.
00284     setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
00285     setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
00286     setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
00287     setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
00288   }
00289 
00290   if (Subtarget->isTargetDarwin()) {
00291     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00292     setUseUnderscoreSetJmp(false);
00293     setUseUnderscoreLongJmp(false);
00294   } else if (Subtarget->isTargetWindowsGNU()) {
00295     // MS runtime is weird: it exports _setjmp, but longjmp!
00296     setUseUnderscoreSetJmp(true);
00297     setUseUnderscoreLongJmp(false);
00298   } else {
00299     setUseUnderscoreSetJmp(true);
00300     setUseUnderscoreLongJmp(true);
00301   }
00302 
00303   // Set up the register classes.
00304   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00305   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00306   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00307   if (Subtarget->is64Bit())
00308     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00309 
00310   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00311 
00312   // We don't accept any truncstore of integer registers.
00313   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00314   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00315   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00316   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00317   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00318   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00319 
00320   // SETOEQ and SETUNE require checking two conditions.
00321   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00322   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00323   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00324   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00325   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00326   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00327 
00328   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00329   // operation.
00330   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00331   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00332   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00333 
00334   if (Subtarget->is64Bit()) {
00335     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00336     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00337   } else if (!TM.Options.UseSoftFloat) {
00338     // We have an algorithm for SSE2->double, and we turn this into a
00339     // 64-bit FILD followed by conditional FADD for other targets.
00340     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00341     // We have an algorithm for SSE2, and we turn this into a 64-bit
00342     // FILD for other targets.
00343     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00344   }
00345 
00346   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00347   // this operation.
00348   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00349   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00350 
00351   if (!TM.Options.UseSoftFloat) {
00352     // SSE has no i16 to fp conversion, only i32
00353     if (X86ScalarSSEf32) {
00354       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00355       // f32 and f64 cases are Legal, f80 case is not
00356       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00357     } else {
00358       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00359       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00360     }
00361   } else {
00362     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00363     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00364   }
00365 
00366   // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00367   // are Legal, f80 is custom lowered.
00368   setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00369   setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00370 
00371   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00372   // this operation.
00373   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00374   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00375 
00376   if (X86ScalarSSEf32) {
00377     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00378     // f32 and f64 cases are Legal, f80 case is not
00379     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00380   } else {
00381     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00382     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00383   }
00384 
00385   // Handle FP_TO_UINT by promoting the destination to a larger signed
00386   // conversion.
00387   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00388   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00389   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00390 
00391   if (Subtarget->is64Bit()) {
00392     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
00393     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00394   } else if (!TM.Options.UseSoftFloat) {
00395     // Since AVX is a superset of SSE3, only check for SSE here.
00396     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00397       // Expand FP_TO_UINT into a select.
00398       // FIXME: We would like to use a Custom expander here eventually to do
00399       // the optimal thing for SSE vs. the default expansion in the legalizer.
00400       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00401     else
00402       // With SSE3 we can use fisttpll to convert to a signed i64; without
00403       // SSE, we're stuck with a fistpll.
00404       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00405   }
00406 
00407   if (isTargetFTOL()) {
00408     // Use the _ftol2 runtime function, which has a pseudo-instruction
00409     // to handle its weird calling convention.
00410     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00411   }
00412 
00413   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00414   if (!X86ScalarSSEf64) {
00415     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00416     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00417     if (Subtarget->is64Bit()) {
00418       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00419       // Without SSE, i64->f64 goes through memory.
00420       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00421     }
00422   }
00423 
00424   // Scalar integer divide and remainder are lowered to use operations that
00425   // produce two results, to match the available instructions. This exposes
00426   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00427   // into a single instruction.
00428   //
00429   // Scalar integer multiply-high is also lowered to use two-result
00430   // operations, to match the available instructions. However, plain multiply
00431   // (low) operations are left as Legal, as there are single-result
00432   // instructions for this in x86. Using the two-result multiply instructions
00433   // when both high and low results are needed must be arranged by dagcombine.
00434   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00435     MVT VT = IntVTs[i];
00436     setOperationAction(ISD::MULHS, VT, Expand);
00437     setOperationAction(ISD::MULHU, VT, Expand);
00438     setOperationAction(ISD::SDIV, VT, Expand);
00439     setOperationAction(ISD::UDIV, VT, Expand);
00440     setOperationAction(ISD::SREM, VT, Expand);
00441     setOperationAction(ISD::UREM, VT, Expand);
00442 
00443     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00444     setOperationAction(ISD::ADDC, VT, Custom);
00445     setOperationAction(ISD::ADDE, VT, Custom);
00446     setOperationAction(ISD::SUBC, VT, Custom);
00447     setOperationAction(ISD::SUBE, VT, Custom);
00448   }
00449 
00450   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00451   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00452   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00453   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00454   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00455   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00456   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00457   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00458   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00459   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00460   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00461   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00462   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00463   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00464   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00465   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00466   if (Subtarget->is64Bit())
00467     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00468   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00469   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00470   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00471   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00472   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
00473   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00474   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00475   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00476 
00477   // Promote the i8 variants and force them on up to i32 which has a shorter
00478   // encoding.
00479   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00480   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00481   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00482   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00483   if (Subtarget->hasBMI()) {
00484     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00485     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00486     if (Subtarget->is64Bit())
00487       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00488   } else {
00489     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00490     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00491     if (Subtarget->is64Bit())
00492       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00493   }
00494 
00495   if (Subtarget->hasLZCNT()) {
00496     // When promoting the i8 variants, force them to i32 for a shorter
00497     // encoding.
00498     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00499     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00500     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00501     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00502     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00503     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00504     if (Subtarget->is64Bit())
00505       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00506   } else {
00507     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00508     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00509     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00510     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00511     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00512     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00513     if (Subtarget->is64Bit()) {
00514       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00515       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00516     }
00517   }
00518 
00519   // Special handling for half-precision floating point conversions.
00520   // If we don't have F16C support, then lower half float conversions
00521   // into library calls.
00522   if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
00523     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00524     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00525   }
00526 
00527   // There's never any support for operations beyond MVT::f32.
00528   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00529   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00530   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00531   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00532 
00533   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
00534   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00535   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00536   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00537 
00538   if (Subtarget->hasPOPCNT()) {
00539     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00540   } else {
00541     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00542     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00543     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00544     if (Subtarget->is64Bit())
00545       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00546   }
00547 
00548   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00549 
00550   if (!Subtarget->hasMOVBE())
00551     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00552 
00553   // These should be promoted to a larger select which is supported.
00554   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00555   // X86 wants to expand cmov itself.
00556   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00557   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00558   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00559   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00560   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00561   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00562   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00563   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00564   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00565   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00566   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00567   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00568   if (Subtarget->is64Bit()) {
00569     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00570     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00571   }
00572   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00573   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00574   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00575   // support continuation, user-level threading, and etc.. As a result, no
00576   // other SjLj exception interfaces are implemented and please don't build
00577   // your own exception handling based on them.
00578   // LLVM/Clang supports zero-cost DWARF exception handling.
00579   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00580   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00581 
00582   // Darwin ABI issue.
00583   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00584   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00585   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00586   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00587   if (Subtarget->is64Bit())
00588     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00589   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00590   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00591   if (Subtarget->is64Bit()) {
00592     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00593     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00594     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00595     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00596     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00597   }
00598   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00599   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00600   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00601   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00602   if (Subtarget->is64Bit()) {
00603     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00604     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00605     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00606   }
00607 
00608   if (Subtarget->hasSSE1())
00609     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00610 
00611   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00612 
00613   // Expand certain atomics
00614   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00615     MVT VT = IntVTs[i];
00616     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00617     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00618     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00619   }
00620 
00621   if (Subtarget->hasCmpxchg16b()) {
00622     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00623   }
00624 
00625   // FIXME - use subtarget debug flags
00626   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00627       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00628     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00629   }
00630 
00631   if (Subtarget->is64Bit()) {
00632     setExceptionPointerRegister(X86::RAX);
00633     setExceptionSelectorRegister(X86::RDX);
00634   } else {
00635     setExceptionPointerRegister(X86::EAX);
00636     setExceptionSelectorRegister(X86::EDX);
00637   }
00638   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00639   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00640 
00641   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00642   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00643 
00644   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00645   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00646 
00647   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00648   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00649   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00650   if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
00651     // TargetInfo::X86_64ABIBuiltinVaList
00652     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00653     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00654   } else {
00655     // TargetInfo::CharPtrBuiltinVaList
00656     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00657     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00658   }
00659 
00660   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00661   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00662 
00663   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
00664 
00665   if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
00666     // f32 and f64 use SSE.
00667     // Set up the FP register classes.
00668     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00669     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00670 
00671     // Use ANDPD to simulate FABS.
00672     setOperationAction(ISD::FABS , MVT::f64, Custom);
00673     setOperationAction(ISD::FABS , MVT::f32, Custom);
00674 
00675     // Use XORP to simulate FNEG.
00676     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00677     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00678 
00679     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00680     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00681     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00682 
00683     // Lower this to FGETSIGNx86 plus an AND.
00684     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00685     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00686 
00687     // We don't support sin/cos/fmod
00688     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00689     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00690     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00691     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00692     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00693     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00694 
00695     // Expand FP immediates into loads from the stack, except for the special
00696     // cases we handle.
00697     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00698     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00699   } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
00700     // Use SSE for f32, x87 for f64.
00701     // Set up the FP register classes.
00702     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00703     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00704 
00705     // Use ANDPS to simulate FABS.
00706     setOperationAction(ISD::FABS , MVT::f32, Custom);
00707 
00708     // Use XORP to simulate FNEG.
00709     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00710 
00711     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00712 
00713     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00714     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00715     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00716 
00717     // We don't support sin/cos/fmod
00718     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00719     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00720     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00721 
00722     // Special cases we handle for FP constants.
00723     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00724     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00725     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00726     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00727     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00728 
00729     if (!TM.Options.UnsafeFPMath) {
00730       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00731       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00732       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00733     }
00734   } else if (!TM.Options.UseSoftFloat) {
00735     // f32 and f64 in x87.
00736     // Set up the FP register classes.
00737     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00738     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00739 
00740     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00741     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00742     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00743     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00744 
00745     if (!TM.Options.UnsafeFPMath) {
00746       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00747       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00748       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00749       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00750       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00751       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00752     }
00753     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00754     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00755     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00756     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00757     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00758     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00759     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00760     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00761   }
00762 
00763   // We don't support FMA.
00764   setOperationAction(ISD::FMA, MVT::f64, Expand);
00765   setOperationAction(ISD::FMA, MVT::f32, Expand);
00766 
00767   // Long double always uses X87.
00768   if (!TM.Options.UseSoftFloat) {
00769     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00770     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00771     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00772     {
00773       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00774       addLegalFPImmediate(TmpFlt);  // FLD0
00775       TmpFlt.changeSign();
00776       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00777 
00778       bool ignored;
00779       APFloat TmpFlt2(+1.0);
00780       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00781                       &ignored);
00782       addLegalFPImmediate(TmpFlt2);  // FLD1
00783       TmpFlt2.changeSign();
00784       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00785     }
00786 
00787     if (!TM.Options.UnsafeFPMath) {
00788       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00789       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00790       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00791     }
00792 
00793     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00794     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00795     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00796     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00797     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00798     setOperationAction(ISD::FMA, MVT::f80, Expand);
00799   }
00800 
00801   // Always use a library call for pow.
00802   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00803   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00804   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00805 
00806   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00807   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00808   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00809   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00810   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00811 
00812   // First set operation action for all vector types to either promote
00813   // (for widening) or expand (for scalarization). Then we will selectively
00814   // turn on ones that can be effectively codegen'd.
00815   for (int i = MVT::FIRST_VECTOR_VALUETYPE;
00816            i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
00817     MVT VT = (MVT::SimpleValueType)i;
00818     setOperationAction(ISD::ADD , VT, Expand);
00819     setOperationAction(ISD::SUB , VT, Expand);
00820     setOperationAction(ISD::FADD, VT, Expand);
00821     setOperationAction(ISD::FNEG, VT, Expand);
00822     setOperationAction(ISD::FSUB, VT, Expand);
00823     setOperationAction(ISD::MUL , VT, Expand);
00824     setOperationAction(ISD::FMUL, VT, Expand);
00825     setOperationAction(ISD::SDIV, VT, Expand);
00826     setOperationAction(ISD::UDIV, VT, Expand);
00827     setOperationAction(ISD::FDIV, VT, Expand);
00828     setOperationAction(ISD::SREM, VT, Expand);
00829     setOperationAction(ISD::UREM, VT, Expand);
00830     setOperationAction(ISD::LOAD, VT, Expand);
00831     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00832     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00833     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00834     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00835     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00836     setOperationAction(ISD::FABS, VT, Expand);
00837     setOperationAction(ISD::FSIN, VT, Expand);
00838     setOperationAction(ISD::FSINCOS, VT, Expand);
00839     setOperationAction(ISD::FCOS, VT, Expand);
00840     setOperationAction(ISD::FSINCOS, VT, Expand);
00841     setOperationAction(ISD::FREM, VT, Expand);
00842     setOperationAction(ISD::FMA,  VT, Expand);
00843     setOperationAction(ISD::FPOWI, VT, Expand);
00844     setOperationAction(ISD::FSQRT, VT, Expand);
00845     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00846     setOperationAction(ISD::FFLOOR, VT, Expand);
00847     setOperationAction(ISD::FCEIL, VT, Expand);
00848     setOperationAction(ISD::FTRUNC, VT, Expand);
00849     setOperationAction(ISD::FRINT, VT, Expand);
00850     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00851     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00852     setOperationAction(ISD::MULHS, VT, Expand);
00853     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00854     setOperationAction(ISD::MULHU, VT, Expand);
00855     setOperationAction(ISD::SDIVREM, VT, Expand);
00856     setOperationAction(ISD::UDIVREM, VT, Expand);
00857     setOperationAction(ISD::FPOW, VT, Expand);
00858     setOperationAction(ISD::CTPOP, VT, Expand);
00859     setOperationAction(ISD::CTTZ, VT, Expand);
00860     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00861     setOperationAction(ISD::CTLZ, VT, Expand);
00862     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00863     setOperationAction(ISD::SHL, VT, Expand);
00864     setOperationAction(ISD::SRA, VT, Expand);
00865     setOperationAction(ISD::SRL, VT, Expand);
00866     setOperationAction(ISD::ROTL, VT, Expand);
00867     setOperationAction(ISD::ROTR, VT, Expand);
00868     setOperationAction(ISD::BSWAP, VT, Expand);
00869     setOperationAction(ISD::SETCC, VT, Expand);
00870     setOperationAction(ISD::FLOG, VT, Expand);
00871     setOperationAction(ISD::FLOG2, VT, Expand);
00872     setOperationAction(ISD::FLOG10, VT, Expand);
00873     setOperationAction(ISD::FEXP, VT, Expand);
00874     setOperationAction(ISD::FEXP2, VT, Expand);
00875     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00876     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00877     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00878     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00879     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00880     setOperationAction(ISD::TRUNCATE, VT, Expand);
00881     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00882     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00883     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00884     setOperationAction(ISD::VSELECT, VT, Expand);
00885     setOperationAction(ISD::SELECT_CC, VT, Expand);
00886     for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
00887              InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00888       setTruncStoreAction(VT,
00889                           (MVT::SimpleValueType)InnerVT, Expand);
00890     setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
00891     setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
00892 
00893     // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
00894     // we have to deal with them whether we ask for Expansion or not. Setting
00895     // Expand causes its own optimisation problems though, so leave them legal.
00896     if (VT.getVectorElementType() == MVT::i1)
00897       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
00898   }
00899 
00900   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00901   // with -msoft-float, disable use of MMX as well.
00902   if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
00903     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00904     // No operations on x86mmx supported, everything uses intrinsics.
00905   }
00906 
00907   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00908   // into smaller operations.
00909   setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
00910   setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
00911   setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
00912   setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
00913   setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
00914   setOperationAction(ISD::AND,                MVT::v4i16, Expand);
00915   setOperationAction(ISD::AND,                MVT::v2i32, Expand);
00916   setOperationAction(ISD::AND,                MVT::v1i64, Expand);
00917   setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
00918   setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
00919   setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
00920   setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
00921   setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
00922   setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
00923   setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
00924   setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
00925   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
00926   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
00927   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
00928   setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
00929   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00930   setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
00931   setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
00932   setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
00933   setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
00934   setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
00935   setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
00936   setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
00937   setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
00938 
00939   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
00940     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00941 
00942     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00943     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00944     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00945     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00946     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00947     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00948     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00949     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00950     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00951     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00952     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00953     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00954   }
00955 
00956   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
00957     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00958 
00959     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
00960     // registers cannot be used even for integer operations.
00961     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00962     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00963     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00964     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00965 
00966     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00967     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00968     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00969     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00970     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00971     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00972     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00973     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00974     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00975     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00976     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00977     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00978     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00979     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00980     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00981     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00982     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00983     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00984     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00985     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00986     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00987     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00988 
00989     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00990     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00991     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00992     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00993 
00994     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00995     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00996     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00997     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00998     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00999 
01000     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
01001     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01002       MVT VT = (MVT::SimpleValueType)i;
01003       // Do not attempt to custom lower non-power-of-2 vectors
01004       if (!isPowerOf2_32(VT.getVectorNumElements()))
01005         continue;
01006       // Do not attempt to custom lower non-128-bit vectors
01007       if (!VT.is128BitVector())
01008         continue;
01009       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01010       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01011       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01012     }
01013 
01014     // We support custom legalizing of sext and anyext loads for specific
01015     // memory vector types which we can load as a scalar (or sequence of
01016     // scalars) and extend in-register to a legal 128-bit vector type. For sext
01017     // loads these must work with a single scalar load.
01018     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom);
01019     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom);
01020     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom);
01021     setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom);
01022     setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom);
01023     setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom);
01024     setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
01025     setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Custom);
01026     setLoadExtAction(ISD::EXTLOAD, MVT::v8i8, Custom);
01027 
01028     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
01029     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
01030     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
01031     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
01032     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
01033     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
01034 
01035     if (Subtarget->is64Bit()) {
01036       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01037       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01038     }
01039 
01040     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
01041     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
01042       MVT VT = (MVT::SimpleValueType)i;
01043 
01044       // Do not attempt to promote non-128-bit vectors
01045       if (!VT.is128BitVector())
01046         continue;
01047 
01048       setOperationAction(ISD::AND,    VT, Promote);
01049       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
01050       setOperationAction(ISD::OR,     VT, Promote);
01051       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
01052       setOperationAction(ISD::XOR,    VT, Promote);
01053       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
01054       setOperationAction(ISD::LOAD,   VT, Promote);
01055       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
01056       setOperationAction(ISD::SELECT, VT, Promote);
01057       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
01058     }
01059 
01060     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
01061 
01062     // Custom lower v2i64 and v2f64 selects.
01063     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
01064     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
01065     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
01066     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
01067 
01068     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
01069     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
01070 
01071     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
01072     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
01073     // As there is no 64-bit GPR available, we need build a special custom
01074     // sequence to convert from v2i32 to v2f32.
01075     if (!Subtarget->is64Bit())
01076       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
01077 
01078     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
01079     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
01080 
01081     setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
01082 
01083     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
01084     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
01085     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
01086   }
01087 
01088   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
01089     setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
01090     setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
01091     setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
01092     setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
01093     setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
01094     setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
01095     setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
01096     setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
01097     setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
01098     setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
01099 
01100     setOperationAction(ISD::FFLOOR,             MVT::v4f32, Legal);
01101     setOperationAction(ISD::FCEIL,              MVT::v4f32, Legal);
01102     setOperationAction(ISD::FTRUNC,             MVT::v4f32, Legal);
01103     setOperationAction(ISD::FRINT,              MVT::v4f32, Legal);
01104     setOperationAction(ISD::FNEARBYINT,         MVT::v4f32, Legal);
01105     setOperationAction(ISD::FFLOOR,             MVT::v2f64, Legal);
01106     setOperationAction(ISD::FCEIL,              MVT::v2f64, Legal);
01107     setOperationAction(ISD::FTRUNC,             MVT::v2f64, Legal);
01108     setOperationAction(ISD::FRINT,              MVT::v2f64, Legal);
01109     setOperationAction(ISD::FNEARBYINT,         MVT::v2f64, Legal);
01110 
01111     // FIXME: Do we need to handle scalar-to-vector here?
01112     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
01113 
01114     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
01115     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
01116     setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
01117     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
01118     setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom);
01119     // There is no BLENDI for byte vectors. We don't need to custom lower
01120     // some vselects for now.
01121     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
01122 
01123     // SSE41 brings specific instructions for doing vector sign extend even in
01124     // cases where we don't have SRA.
01125     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Custom);
01126     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Custom);
01127     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, Custom);
01128 
01129     // i8 and i16 vectors are custom because the source register and source
01130     // source memory operand types are not the same width.  f32 vectors are
01131     // custom since the immediate controlling the insert encodes additional
01132     // information.
01133     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
01134     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
01135     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
01136     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
01137 
01138     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
01139     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
01140     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
01141     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
01142 
01143     // FIXME: these should be Legal, but that's only for the case where
01144     // the index is constant.  For now custom expand to deal with that.
01145     if (Subtarget->is64Bit()) {
01146       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01147       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01148     }
01149   }
01150 
01151   if (Subtarget->hasSSE2()) {
01152     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01153     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01154 
01155     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01156     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01157 
01158     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01159     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01160 
01161     // In the customized shift lowering, the legal cases in AVX2 will be
01162     // recognized.
01163     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01164     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01165 
01166     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01167     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01168 
01169     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01170   }
01171 
01172   if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
01173     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01174     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01175     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01176     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01177     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01178     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01179 
01180     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01181     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01182     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01183 
01184     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01185     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01186     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01187     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01188     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01189     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01190     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01191     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01192     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01193     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01194     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01195     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01196 
01197     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01198     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01199     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01200     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01201     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01202     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01203     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01204     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01205     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01206     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01207     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01208     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01209 
01210     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01211     // even though v8i16 is a legal type.
01212     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01213     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01214     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01215 
01216     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01217     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01218     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01219 
01220     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01221     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01222 
01223     setLoadExtAction(ISD::EXTLOAD,              MVT::v4f32, Legal);
01224 
01225     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01226     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01227 
01228     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01229     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01230 
01231     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01232     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01233 
01234     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01235     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01236     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01237     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01238 
01239     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01240     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01241     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01242 
01243     setOperationAction(ISD::VSELECT,           MVT::v4f64, Custom);
01244     setOperationAction(ISD::VSELECT,           MVT::v4i64, Custom);
01245     setOperationAction(ISD::VSELECT,           MVT::v8i32, Custom);
01246     setOperationAction(ISD::VSELECT,           MVT::v8f32, Custom);
01247 
01248     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01249     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01250     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01251     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01252     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01253     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01254     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01255     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01256     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01257     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01258     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01259     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01260 
01261     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
01262       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01263       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01264       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01265       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01266       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01267       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01268     }
01269 
01270     if (Subtarget->hasInt256()) {
01271       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01272       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01273       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01274       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01275 
01276       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01277       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01278       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01279       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01280 
01281       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01282       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01283       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01284       // Don't lower v32i8 because there is no 128-bit byte mul
01285 
01286       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01287       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01288       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01289       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01290 
01291       setOperationAction(ISD::VSELECT,         MVT::v16i16, Custom);
01292       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01293     } else {
01294       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01295       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01296       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01297       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01298 
01299       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01300       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01301       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01302       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01303 
01304       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01305       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01306       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01307       // Don't lower v32i8 because there is no 128-bit byte mul
01308     }
01309 
01310     // In the customized shift lowering, the legal cases in AVX2 will be
01311     // recognized.
01312     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01313     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01314 
01315     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01316     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01317 
01318     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01319 
01320     // Custom lower several nodes for 256-bit types.
01321     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01322              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01323       MVT VT = (MVT::SimpleValueType)i;
01324 
01325       // Extract subvector is special because the value type
01326       // (result) is 128-bit but the source is 256-bit wide.
01327       if (VT.is128BitVector())
01328         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01329 
01330       // Do not attempt to custom lower other non-256-bit vectors
01331       if (!VT.is256BitVector())
01332         continue;
01333 
01334       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01335       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01336       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01337       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01338       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01339       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01340       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01341     }
01342 
01343     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01344     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
01345       MVT VT = (MVT::SimpleValueType)i;
01346 
01347       // Do not attempt to promote non-256-bit vectors
01348       if (!VT.is256BitVector())
01349         continue;
01350 
01351       setOperationAction(ISD::AND,    VT, Promote);
01352       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01353       setOperationAction(ISD::OR,     VT, Promote);
01354       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01355       setOperationAction(ISD::XOR,    VT, Promote);
01356       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01357       setOperationAction(ISD::LOAD,   VT, Promote);
01358       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01359       setOperationAction(ISD::SELECT, VT, Promote);
01360       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01361     }
01362   }
01363 
01364   if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
01365     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01366     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01367     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01368     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01369 
01370     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01371     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01372     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01373 
01374     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01375     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01376     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01377     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01378     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01379     setLoadExtAction(ISD::EXTLOAD,              MVT::v8f32, Legal);
01380     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01381     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01382     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01383     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01384     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01385 
01386     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01387     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01388     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01389     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01390     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01391     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01392 
01393     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01394     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01395     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01396     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01397     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01398     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01399     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01400     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01401 
01402     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
01403     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
01404     setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
01405     setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
01406     if (Subtarget->is64Bit()) {
01407       setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
01408       setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
01409       setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
01410       setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
01411     }
01412     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01413     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01414     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01415     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01416     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01417     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01418     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01419     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01420     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01421     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01422 
01423     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01424     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01425     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01426     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01427     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01428     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01429     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01430     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01431     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01432     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01433     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01434     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01435     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01436 
01437     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01438     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01439     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01440     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01441     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,    Custom);
01442     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
01443 
01444     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01445     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01446 
01447     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01448 
01449     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01450     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01451     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01452     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01453     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01454     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01455     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01456     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01457     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01458 
01459     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01460     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01461 
01462     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01463     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01464 
01465     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01466 
01467     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01468     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01469 
01470     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01471     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01472 
01473     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01474     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01475 
01476     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01477     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01478     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01479     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01480     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01481     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01482 
01483     if (Subtarget->hasCDI()) {
01484       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
01485       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01486     }
01487 
01488     // Custom lower several nodes.
01489     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
01490              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
01491       MVT VT = (MVT::SimpleValueType)i;
01492 
01493       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01494       // Extract subvector is special because the value type
01495       // (result) is 256/128-bit but the source is 512-bit wide.
01496       if (VT.is128BitVector() || VT.is256BitVector())
01497         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01498 
01499       if (VT.getVectorElementType() == MVT::i1)
01500         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01501 
01502       // Do not attempt to custom lower other non-512-bit vectors
01503       if (!VT.is512BitVector())
01504         continue;
01505 
01506       if ( EltSize >= 32) {
01507         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01508         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01509         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01510         setOperationAction(ISD::VSELECT,             VT, Legal);
01511         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01512         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01513         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01514       }
01515     }
01516     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01517       MVT VT = (MVT::SimpleValueType)i;
01518 
01519       // Do not attempt to promote non-256-bit vectors
01520       if (!VT.is512BitVector())
01521         continue;
01522 
01523       setOperationAction(ISD::SELECT, VT, Promote);
01524       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01525     }
01526   }// has  AVX-512
01527 
01528   if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
01529     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
01530     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
01531 
01532     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01533     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01534 
01535     setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
01536     setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
01537     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
01538     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
01539 
01540     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01541       const MVT VT = (MVT::SimpleValueType)i;
01542 
01543       const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01544 
01545       // Do not attempt to promote non-256-bit vectors
01546       if (!VT.is512BitVector())
01547         continue;
01548 
01549       if ( EltSize < 32) {
01550         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01551         setOperationAction(ISD::VSELECT,             VT, Legal);
01552       }
01553     }
01554   }
01555 
01556   if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
01557     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
01558     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
01559 
01560     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
01561     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
01562   }
01563 
01564   // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
01565   // of this type with custom code.
01566   for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
01567            VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
01568     setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
01569                        Custom);
01570   }
01571 
01572   // We want to custom lower some of our intrinsics.
01573   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01574   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01575   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01576   if (!Subtarget->is64Bit())
01577     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01578 
01579   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01580   // handle type legalization for these operations here.
01581   //
01582   // FIXME: We really should do custom legalization for addition and
01583   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01584   // than generic legalization for 64-bit multiplication-with-overflow, though.
01585   for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
01586     // Add/Sub/Mul with overflow operations are custom lowered.
01587     MVT VT = IntVTs[i];
01588     setOperationAction(ISD::SADDO, VT, Custom);
01589     setOperationAction(ISD::UADDO, VT, Custom);
01590     setOperationAction(ISD::SSUBO, VT, Custom);
01591     setOperationAction(ISD::USUBO, VT, Custom);
01592     setOperationAction(ISD::SMULO, VT, Custom);
01593     setOperationAction(ISD::UMULO, VT, Custom);
01594   }
01595 
01596   // There are no 8-bit 3-address imul/mul instructions
01597   setOperationAction(ISD::SMULO, MVT::i8, Expand);
01598   setOperationAction(ISD::UMULO, MVT::i8, Expand);
01599 
01600   if (!Subtarget->is64Bit()) {
01601     // These libcalls are not available in 32-bit.
01602     setLibcallName(RTLIB::SHL_I128, nullptr);
01603     setLibcallName(RTLIB::SRL_I128, nullptr);
01604     setLibcallName(RTLIB::SRA_I128, nullptr);
01605   }
01606 
01607   // Combine sin / cos into one node or libcall if possible.
01608   if (Subtarget->hasSinCos()) {
01609     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01610     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01611     if (Subtarget->isTargetDarwin()) {
01612       // For MacOSX, we don't want to the normal expansion of a libcall to
01613       // sincos. We want to issue a libcall to __sincos_stret to avoid memory
01614       // traffic.
01615       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01616       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01617     }
01618   }
01619 
01620   if (Subtarget->isTargetWin64()) {
01621     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01622     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01623     setOperationAction(ISD::SREM, MVT::i128, Custom);
01624     setOperationAction(ISD::UREM, MVT::i128, Custom);
01625     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01626     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01627   }
01628 
01629   // We have target-specific dag combine patterns for the following nodes:
01630   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01631   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01632   setTargetDAGCombine(ISD::VSELECT);
01633   setTargetDAGCombine(ISD::SELECT);
01634   setTargetDAGCombine(ISD::SHL);
01635   setTargetDAGCombine(ISD::SRA);
01636   setTargetDAGCombine(ISD::SRL);
01637   setTargetDAGCombine(ISD::OR);
01638   setTargetDAGCombine(ISD::AND);
01639   setTargetDAGCombine(ISD::ADD);
01640   setTargetDAGCombine(ISD::FADD);
01641   setTargetDAGCombine(ISD::FSUB);
01642   setTargetDAGCombine(ISD::FMA);
01643   setTargetDAGCombine(ISD::SUB);
01644   setTargetDAGCombine(ISD::LOAD);
01645   setTargetDAGCombine(ISD::STORE);
01646   setTargetDAGCombine(ISD::ZERO_EXTEND);
01647   setTargetDAGCombine(ISD::ANY_EXTEND);
01648   setTargetDAGCombine(ISD::SIGN_EXTEND);
01649   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01650   setTargetDAGCombine(ISD::TRUNCATE);
01651   setTargetDAGCombine(ISD::SINT_TO_FP);
01652   setTargetDAGCombine(ISD::SETCC);
01653   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
01654   setTargetDAGCombine(ISD::BUILD_VECTOR);
01655   if (Subtarget->is64Bit())
01656     setTargetDAGCombine(ISD::MUL);
01657   setTargetDAGCombine(ISD::XOR);
01658 
01659   computeRegisterProperties();
01660 
01661   // On Darwin, -Os means optimize for size without hurting performance,
01662   // do not reduce the limit.
01663   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01664   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
01665   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01666   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01667   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01668   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01669   setPrefLoopAlignment(4); // 2^4 bytes.
01670 
01671   // Predictable cmov don't hurt on atom because it's in-order.
01672   PredictableSelectIsExpensive = !Subtarget->isAtom();
01673 
01674   setPrefFunctionAlignment(4); // 2^4 bytes.
01675 
01676   InitIntrinsicTables();
01677 }
01678 
01679 // This has so far only been implemented for 64-bit MachO.
01680 bool X86TargetLowering::useLoadStackGuardNode() const {
01681   return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO &&
01682          Subtarget->is64Bit();
01683 }
01684 
01685 TargetLoweringBase::LegalizeTypeAction
01686 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01687   if (ExperimentalVectorWideningLegalization &&
01688       VT.getVectorNumElements() != 1 &&
01689       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01690     return TypeWidenVector;
01691 
01692   return TargetLoweringBase::getPreferredVectorAction(VT);
01693 }
01694 
01695 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01696   if (!VT.isVector())
01697     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01698 
01699   const unsigned NumElts = VT.getVectorNumElements();
01700   const EVT EltVT = VT.getVectorElementType();
01701   if (VT.is512BitVector()) {
01702     if (Subtarget->hasAVX512())
01703       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01704           EltVT == MVT::f32 || EltVT == MVT::f64)
01705         switch(NumElts) {
01706         case  8: return MVT::v8i1;
01707         case 16: return MVT::v16i1;
01708       }
01709     if (Subtarget->hasBWI())
01710       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01711         switch(NumElts) {
01712         case 32: return MVT::v32i1;
01713         case 64: return MVT::v64i1;
01714       }
01715   }
01716 
01717   if (VT.is256BitVector() || VT.is128BitVector()) {
01718     if (Subtarget->hasVLX())
01719       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01720           EltVT == MVT::f32 || EltVT == MVT::f64)
01721         switch(NumElts) {
01722         case 2: return MVT::v2i1;
01723         case 4: return MVT::v4i1;
01724         case 8: return MVT::v8i1;
01725       }
01726     if (Subtarget->hasBWI() && Subtarget->hasVLX())
01727       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01728         switch(NumElts) {
01729         case  8: return MVT::v8i1;
01730         case 16: return MVT::v16i1;
01731         case 32: return MVT::v32i1;
01732       }
01733   }
01734 
01735   return VT.changeVectorElementTypeToInteger();
01736 }
01737 
01738 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
01739 /// the desired ByVal argument alignment.
01740 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01741   if (MaxAlign == 16)
01742     return;
01743   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01744     if (VTy->getBitWidth() == 128)
01745       MaxAlign = 16;
01746   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01747     unsigned EltAlign = 0;
01748     getMaxByValAlign(ATy->getElementType(), EltAlign);
01749     if (EltAlign > MaxAlign)
01750       MaxAlign = EltAlign;
01751   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01752     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
01753       unsigned EltAlign = 0;
01754       getMaxByValAlign(STy->getElementType(i), EltAlign);
01755       if (EltAlign > MaxAlign)
01756         MaxAlign = EltAlign;
01757       if (MaxAlign == 16)
01758         break;
01759     }
01760   }
01761 }
01762 
01763 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
01764 /// function arguments in the caller parameter area. For X86, aggregates
01765 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01766 /// are at 4-byte boundaries.
01767 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
01768   if (Subtarget->is64Bit()) {
01769     // Max of 8 and alignment of type.
01770     unsigned TyAlign = TD->getABITypeAlignment(Ty);
01771     if (TyAlign > 8)
01772       return TyAlign;
01773     return 8;
01774   }
01775 
01776   unsigned Align = 4;
01777   if (Subtarget->hasSSE1())
01778     getMaxByValAlign(Ty, Align);
01779   return Align;
01780 }
01781 
01782 /// getOptimalMemOpType - Returns the target specific optimal type for load
01783 /// and store operations as a result of memset, memcpy, and memmove
01784 /// lowering. If DstAlign is zero that means it's safe to destination
01785 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01786 /// means there isn't a need to check it against alignment requirement,
01787 /// probably because the source does not need to be loaded. If 'IsMemset' is
01788 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01789 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01790 /// source is constant so it does not need to be loaded.
01791 /// It returns EVT::Other if the type should be determined using generic
01792 /// target-independent logic.
01793 EVT
01794 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01795                                        unsigned DstAlign, unsigned SrcAlign,
01796                                        bool IsMemset, bool ZeroMemset,
01797                                        bool MemcpyStrSrc,
01798                                        MachineFunction &MF) const {
01799   const Function *F = MF.getFunction();
01800   if ((!IsMemset || ZeroMemset) &&
01801       !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
01802                                        Attribute::NoImplicitFloat)) {
01803     if (Size >= 16 &&
01804         (Subtarget->isUnalignedMemAccessFast() ||
01805          ((DstAlign == 0 || DstAlign >= 16) &&
01806           (SrcAlign == 0 || SrcAlign >= 16)))) {
01807       if (Size >= 32) {
01808         if (Subtarget->hasInt256())
01809           return MVT::v8i32;
01810         if (Subtarget->hasFp256())
01811           return MVT::v8f32;
01812       }
01813       if (Subtarget->hasSSE2())
01814         return MVT::v4i32;
01815       if (Subtarget->hasSSE1())
01816         return MVT::v4f32;
01817     } else if (!MemcpyStrSrc && Size >= 8 &&
01818                !Subtarget->is64Bit() &&
01819                Subtarget->hasSSE2()) {
01820       // Do not use f64 to lower memcpy if source is string constant. It's
01821       // better to use i32 to avoid the loads.
01822       return MVT::f64;
01823     }
01824   }
01825   if (Subtarget->is64Bit() && Size >= 8)
01826     return MVT::i64;
01827   return MVT::i32;
01828 }
01829 
01830 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
01831   if (VT == MVT::f32)
01832     return X86ScalarSSEf32;
01833   else if (VT == MVT::f64)
01834     return X86ScalarSSEf64;
01835   return true;
01836 }
01837 
01838 bool
01839 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
01840                                                   unsigned,
01841                                                   unsigned,
01842                                                   bool *Fast) const {
01843   if (Fast)
01844     *Fast = Subtarget->isUnalignedMemAccessFast();
01845   return true;
01846 }
01847 
01848 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
01849 /// current function.  The returned value is a member of the
01850 /// MachineJumpTableInfo::JTEntryKind enum.
01851 unsigned X86TargetLowering::getJumpTableEncoding() const {
01852   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
01853   // symbol.
01854   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
01855       Subtarget->isPICStyleGOT())
01856     return MachineJumpTableInfo::EK_Custom32;
01857 
01858   // Otherwise, use the normal jump table encoding heuristics.
01859   return TargetLowering::getJumpTableEncoding();
01860 }
01861 
01862 const MCExpr *
01863 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
01864                                              const MachineBasicBlock *MBB,
01865                                              unsigned uid,MCContext &Ctx) const{
01866   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
01867          Subtarget->isPICStyleGOT());
01868   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
01869   // entries.
01870   return MCSymbolRefExpr::Create(MBB->getSymbol(),
01871                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
01872 }
01873 
01874 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
01875 /// jumptable.
01876 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
01877                                                     SelectionDAG &DAG) const {
01878   if (!Subtarget->is64Bit())
01879     // This doesn't have SDLoc associated with it, but is not really the
01880     // same as a Register.
01881     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
01882   return Table;
01883 }
01884 
01885 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
01886 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
01887 /// MCExpr.
01888 const MCExpr *X86TargetLowering::
01889 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
01890                              MCContext &Ctx) const {
01891   // X86-64 uses RIP relative addressing based on the jump table label.
01892   if (Subtarget->isPICStyleRIPRel())
01893     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
01894 
01895   // Otherwise, the reference is relative to the PIC base.
01896   return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
01897 }
01898 
01899 // FIXME: Why this routine is here? Move to RegInfo!
01900 std::pair<const TargetRegisterClass*, uint8_t>
01901 X86TargetLowering::findRepresentativeClass(MVT VT) const{
01902   const TargetRegisterClass *RRC = nullptr;
01903   uint8_t Cost = 1;
01904   switch (VT.SimpleTy) {
01905   default:
01906     return TargetLowering::findRepresentativeClass(VT);
01907   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
01908     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
01909     break;
01910   case MVT::x86mmx:
01911     RRC = &X86::VR64RegClass;
01912     break;
01913   case MVT::f32: case MVT::f64:
01914   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01915   case MVT::v4f32: case MVT::v2f64:
01916   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
01917   case MVT::v4f64:
01918     RRC = &X86::VR128RegClass;
01919     break;
01920   }
01921   return std::make_pair(RRC, Cost);
01922 }
01923 
01924 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
01925                                                unsigned &Offset) const {
01926   if (!Subtarget->isTargetLinux())
01927     return false;
01928 
01929   if (Subtarget->is64Bit()) {
01930     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
01931     Offset = 0x28;
01932     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
01933       AddressSpace = 256;
01934     else
01935       AddressSpace = 257;
01936   } else {
01937     // %gs:0x14 on i386
01938     Offset = 0x14;
01939     AddressSpace = 256;
01940   }
01941   return true;
01942 }
01943 
01944 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
01945                                             unsigned DestAS) const {
01946   assert(SrcAS != DestAS && "Expected different address spaces!");
01947 
01948   return SrcAS < 256 && DestAS < 256;
01949 }
01950 
01951 //===----------------------------------------------------------------------===//
01952 //               Return Value Calling Convention Implementation
01953 //===----------------------------------------------------------------------===//
01954 
01955 #include "X86GenCallingConv.inc"
01956 
01957 bool
01958 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
01959                                   MachineFunction &MF, bool isVarArg,
01960                         const SmallVectorImpl<ISD::OutputArg> &Outs,
01961                         LLVMContext &Context) const {
01962   SmallVector<CCValAssign, 16> RVLocs;
01963   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
01964   return CCInfo.CheckReturn(Outs, RetCC_X86);
01965 }
01966 
01967 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
01968   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
01969   return ScratchRegs;
01970 }
01971 
01972 SDValue
01973 X86TargetLowering::LowerReturn(SDValue Chain,
01974                                CallingConv::ID CallConv, bool isVarArg,
01975                                const SmallVectorImpl<ISD::OutputArg> &Outs,
01976                                const SmallVectorImpl<SDValue> &OutVals,
01977                                SDLoc dl, SelectionDAG &DAG) const {
01978   MachineFunction &MF = DAG.getMachineFunction();
01979   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
01980 
01981   SmallVector<CCValAssign, 16> RVLocs;
01982   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
01983   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
01984 
01985   SDValue Flag;
01986   SmallVector<SDValue, 6> RetOps;
01987   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
01988   // Operand #1 = Bytes To Pop
01989   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
01990                    MVT::i16));
01991 
01992   // Copy the result values into the output registers.
01993   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01994     CCValAssign &VA = RVLocs[i];
01995     assert(VA.isRegLoc() && "Can only return in registers!");
01996     SDValue ValToCopy = OutVals[i];
01997     EVT ValVT = ValToCopy.getValueType();
01998 
01999     // Promote values to the appropriate types
02000     if (VA.getLocInfo() == CCValAssign::SExt)
02001       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
02002     else if (VA.getLocInfo() == CCValAssign::ZExt)
02003       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
02004     else if (VA.getLocInfo() == CCValAssign::AExt)
02005       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
02006     else if (VA.getLocInfo() == CCValAssign::BCvt)
02007       ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
02008 
02009     assert(VA.getLocInfo() != CCValAssign::FPExt &&
02010            "Unexpected FP-extend for return value.");  
02011 
02012     // If this is x86-64, and we disabled SSE, we can't return FP values,
02013     // or SSE or MMX vectors.
02014     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
02015          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
02016           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
02017       report_fatal_error("SSE register return with SSE disabled");
02018     }
02019     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
02020     // llvm-gcc has never done it right and no one has noticed, so this
02021     // should be OK for now.
02022     if (ValVT == MVT::f64 &&
02023         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
02024       report_fatal_error("SSE2 register return with SSE2 disabled");
02025 
02026     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
02027     // the RET instruction and handled by the FP Stackifier.
02028     if (VA.getLocReg() == X86::FP0 ||
02029         VA.getLocReg() == X86::FP1) {
02030       // If this is a copy from an xmm register to ST(0), use an FPExtend to
02031       // change the value to the FP stack register class.
02032       if (isScalarFPTypeInSSEReg(VA.getValVT()))
02033         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
02034       RetOps.push_back(ValToCopy);
02035       // Don't emit a copytoreg.
02036       continue;
02037     }
02038 
02039     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
02040     // which is returned in RAX / RDX.
02041     if (Subtarget->is64Bit()) {
02042       if (ValVT == MVT::x86mmx) {
02043         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
02044           ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
02045           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
02046                                   ValToCopy);
02047           // If we don't have SSE2 available, convert to v4f32 so the generated
02048           // register is legal.
02049           if (!Subtarget->hasSSE2())
02050             ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
02051         }
02052       }
02053     }
02054 
02055     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
02056     Flag = Chain.getValue(1);
02057     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02058   }
02059 
02060   // The x86-64 ABIs require that for returning structs by value we copy
02061   // the sret argument into %rax/%eax (depending on ABI) for the return.
02062   // Win32 requires us to put the sret argument to %eax as well.
02063   // We saved the argument into a virtual register in the entry block,
02064   // so now we copy the value out and into %rax/%eax.
02065   if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
02066       (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
02067     MachineFunction &MF = DAG.getMachineFunction();
02068     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02069     unsigned Reg = FuncInfo->getSRetReturnReg();
02070     assert(Reg &&
02071            "SRetReturnReg should have been set in LowerFormalArguments().");
02072     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
02073 
02074     unsigned RetValReg
02075         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02076           X86::RAX : X86::EAX;
02077     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02078     Flag = Chain.getValue(1);
02079 
02080     // RAX/EAX now acts like a return value.
02081     RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
02082   }
02083 
02084   RetOps[0] = Chain;  // Update chain.
02085 
02086   // Add the flag if we have it.
02087   if (Flag.getNode())
02088     RetOps.push_back(Flag);
02089 
02090   return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
02091 }
02092 
02093 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02094   if (N->getNumValues() != 1)
02095     return false;
02096   if (!N->hasNUsesOfValue(1, 0))
02097     return false;
02098 
02099   SDValue TCChain = Chain;
02100   SDNode *Copy = *N->use_begin();
02101   if (Copy->getOpcode() == ISD::CopyToReg) {
02102     // If the copy has a glue operand, we conservatively assume it isn't safe to
02103     // perform a tail call.
02104     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02105       return false;
02106     TCChain = Copy->getOperand(0);
02107   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02108     return false;
02109 
02110   bool HasRet = false;
02111   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02112        UI != UE; ++UI) {
02113     if (UI->getOpcode() != X86ISD::RET_FLAG)
02114       return false;
02115     // If we are returning more than one value, we can definitely
02116     // not make a tail call see PR19530
02117     if (UI->getNumOperands() > 4)
02118       return false;
02119     if (UI->getNumOperands() == 4 &&
02120         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
02121       return false;
02122     HasRet = true;
02123   }
02124 
02125   if (!HasRet)
02126     return false;
02127 
02128   Chain = TCChain;
02129   return true;
02130 }
02131 
02132 EVT
02133 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02134                                             ISD::NodeType ExtendKind) const {
02135   MVT ReturnMVT;
02136   // TODO: Is this also valid on 32-bit?
02137   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02138     ReturnMVT = MVT::i8;
02139   else
02140     ReturnMVT = MVT::i32;
02141 
02142   EVT MinVT = getRegisterType(Context, ReturnMVT);
02143   return VT.bitsLT(MinVT) ? MinVT : VT;
02144 }
02145 
02146 /// LowerCallResult - Lower the result values of a call into the
02147 /// appropriate copies out of appropriate physical registers.
02148 ///
02149 SDValue
02150 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02151                                    CallingConv::ID CallConv, bool isVarArg,
02152                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02153                                    SDLoc dl, SelectionDAG &DAG,
02154                                    SmallVectorImpl<SDValue> &InVals) const {
02155 
02156   // Assign locations to each value returned by this call.
02157   SmallVector<CCValAssign, 16> RVLocs;
02158   bool Is64Bit = Subtarget->is64Bit();
02159   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02160                  *DAG.getContext());
02161   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02162 
02163   // Copy all of the result registers out of their specified physreg.
02164   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02165     CCValAssign &VA = RVLocs[i];
02166     EVT CopyVT = VA.getValVT();
02167 
02168     // If this is x86-64, and we disabled SSE, we can't return FP values
02169     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02170         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02171       report_fatal_error("SSE register return with SSE disabled");
02172     }
02173 
02174     // If we prefer to use the value in xmm registers, copy it out as f80 and
02175     // use a truncate to move it from fp stack reg to xmm reg.
02176     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02177         isScalarFPTypeInSSEReg(VA.getValVT()))
02178       CopyVT = MVT::f80;
02179 
02180     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02181                                CopyVT, InFlag).getValue(1);
02182     SDValue Val = Chain.getValue(0);
02183 
02184     if (CopyVT != VA.getValVT())
02185       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02186                         // This truncation won't change the value.
02187                         DAG.getIntPtrConstant(1));
02188 
02189     InFlag = Chain.getValue(2);
02190     InVals.push_back(Val);
02191   }
02192 
02193   return Chain;
02194 }
02195 
02196 //===----------------------------------------------------------------------===//
02197 //                C & StdCall & Fast Calling Convention implementation
02198 //===----------------------------------------------------------------------===//
02199 //  StdCall calling convention seems to be standard for many Windows' API
02200 //  routines and around. It differs from C calling convention just a little:
02201 //  callee should clean up the stack, not caller. Symbols should be also
02202 //  decorated in some fancy way :) It doesn't support any vector arguments.
02203 //  For info on fast calling convention see Fast Calling Convention (tail call)
02204 //  implementation LowerX86_32FastCCCallTo.
02205 
02206 /// CallIsStructReturn - Determines whether a call uses struct return
02207 /// semantics.
02208 enum StructReturnType {
02209   NotStructReturn,
02210   RegStructReturn,
02211   StackStructReturn
02212 };
02213 static StructReturnType
02214 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
02215   if (Outs.empty())
02216     return NotStructReturn;
02217 
02218   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02219   if (!Flags.isSRet())
02220     return NotStructReturn;
02221   if (Flags.isInReg())
02222     return RegStructReturn;
02223   return StackStructReturn;
02224 }
02225 
02226 /// ArgsAreStructReturn - Determines whether a function uses struct
02227 /// return semantics.
02228 static StructReturnType
02229 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
02230   if (Ins.empty())
02231     return NotStructReturn;
02232 
02233   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02234   if (!Flags.isSRet())
02235     return NotStructReturn;
02236   if (Flags.isInReg())
02237     return RegStructReturn;
02238   return StackStructReturn;
02239 }
02240 
02241 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
02242 /// by "Src" to address "Dst" with size and alignment information specified by
02243 /// the specific parameter attribute. The copy will be passed as a byval
02244 /// function parameter.
02245 static SDValue
02246 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02247                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02248                           SDLoc dl) {
02249   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
02250 
02251   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02252                        /*isVolatile*/false, /*AlwaysInline=*/true,
02253                        MachinePointerInfo(), MachinePointerInfo());
02254 }
02255 
02256 /// IsTailCallConvention - Return true if the calling convention is one that
02257 /// supports tail call optimization.
02258 static bool IsTailCallConvention(CallingConv::ID CC) {
02259   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02260           CC == CallingConv::HiPE);
02261 }
02262 
02263 /// \brief Return true if the calling convention is a C calling convention.
02264 static bool IsCCallConvention(CallingConv::ID CC) {
02265   return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
02266           CC == CallingConv::X86_64_SysV);
02267 }
02268 
02269 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02270   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02271     return false;
02272 
02273   CallSite CS(CI);
02274   CallingConv::ID CalleeCC = CS.getCallingConv();
02275   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
02276     return false;
02277 
02278   return true;
02279 }
02280 
02281 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
02282 /// a tailcall target by changing its ABI.
02283 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
02284                                    bool GuaranteedTailCallOpt) {
02285   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
02286 }
02287 
02288 SDValue
02289 X86TargetLowering::LowerMemArgument(SDValue Chain,
02290                                     CallingConv::ID CallConv,
02291                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02292                                     SDLoc dl, SelectionDAG &DAG,
02293                                     const CCValAssign &VA,
02294                                     MachineFrameInfo *MFI,
02295                                     unsigned i) const {
02296   // Create the nodes corresponding to a load from this parameter slot.
02297   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02298   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
02299       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02300   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02301   EVT ValVT;
02302 
02303   // If value is passed by pointer we have address passed instead of the value
02304   // itself.
02305   if (VA.getLocInfo() == CCValAssign::Indirect)
02306     ValVT = VA.getLocVT();
02307   else
02308     ValVT = VA.getValVT();
02309 
02310   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02311   // changed with more analysis.
02312   // In case of tail call optimization mark all arguments mutable. Since they
02313   // could be overwritten by lowering of arguments in case of a tail call.
02314   if (Flags.isByVal()) {
02315     unsigned Bytes = Flags.getByValSize();
02316     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02317     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02318     return DAG.getFrameIndex(FI, getPointerTy());
02319   } else {
02320     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02321                                     VA.getLocMemOffset(), isImmutable);
02322     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02323     return DAG.getLoad(ValVT, dl, Chain, FIN,
02324                        MachinePointerInfo::getFixedStack(FI),
02325                        false, false, false, 0);
02326   }
02327 }
02328 
02329 // FIXME: Get this from tablegen.
02330 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
02331                                                 const X86Subtarget *Subtarget) {
02332   assert(Subtarget->is64Bit());
02333 
02334   if (Subtarget->isCallingConvWin64(CallConv)) {
02335     static const MCPhysReg GPR64ArgRegsWin64[] = {
02336       X86::RCX, X86::RDX, X86::R8,  X86::R9
02337     };
02338     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
02339   }
02340 
02341   static const MCPhysReg GPR64ArgRegs64Bit[] = {
02342     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02343   };
02344   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
02345 }
02346 
02347 // FIXME: Get this from tablegen.
02348 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
02349                                                 CallingConv::ID CallConv,
02350                                                 const X86Subtarget *Subtarget) {
02351   assert(Subtarget->is64Bit());
02352   if (Subtarget->isCallingConvWin64(CallConv)) {
02353     // The XMM registers which might contain var arg parameters are shadowed
02354     // in their paired GPR.  So we only need to save the GPR to their home
02355     // slots.
02356     // TODO: __vectorcall will change this.
02357     return None;
02358   }
02359 
02360   const Function *Fn = MF.getFunction();
02361   bool NoImplicitFloatOps = Fn->getAttributes().
02362       hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
02363   assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
02364          "SSE register cannot be used when SSE is disabled!");
02365   if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
02366       !Subtarget->hasSSE1())
02367     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
02368     // registers.
02369     return None;
02370 
02371   static const MCPhysReg XMMArgRegs64Bit[] = {
02372     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02373     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02374   };
02375   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
02376 }
02377 
02378 SDValue
02379 X86TargetLowering::LowerFormalArguments(SDValue Chain,
02380                                         CallingConv::ID CallConv,
02381                                         bool isVarArg,
02382                                       const SmallVectorImpl<ISD::InputArg> &Ins,
02383                                         SDLoc dl,
02384                                         SelectionDAG &DAG,
02385                                         SmallVectorImpl<SDValue> &InVals)
02386                                           const {
02387   MachineFunction &MF = DAG.getMachineFunction();
02388   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02389 
02390   const Function* Fn = MF.getFunction();
02391   if (Fn->hasExternalLinkage() &&
02392       Subtarget->isTargetCygMing() &&
02393       Fn->getName() == "main")
02394     FuncInfo->setForceFramePointer(true);
02395 
02396   MachineFrameInfo *MFI = MF.getFrameInfo();
02397   bool Is64Bit = Subtarget->is64Bit();
02398   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02399 
02400   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02401          "Var args not supported with calling convention fastcc, ghc or hipe");
02402 
02403   // Assign locations to all of the incoming arguments.
02404   SmallVector<CCValAssign, 16> ArgLocs;
02405   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02406 
02407   // Allocate shadow area for Win64
02408   if (IsWin64)
02409     CCInfo.AllocateStack(32, 8);
02410 
02411   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02412 
02413   unsigned LastVal = ~0U;
02414   SDValue ArgValue;
02415   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02416     CCValAssign &VA = ArgLocs[i];
02417     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02418     // places.
02419     assert(VA.getValNo() != LastVal &&
02420            "Don't support value assigned to multiple locs yet");
02421     (void)LastVal;
02422     LastVal = VA.getValNo();
02423 
02424     if (VA.isRegLoc()) {
02425       EVT RegVT = VA.getLocVT();
02426       const TargetRegisterClass *RC;
02427       if (RegVT == MVT::i32)
02428         RC = &X86::GR32RegClass;
02429       else if (Is64Bit && RegVT == MVT::i64)
02430         RC = &X86::GR64RegClass;
02431       else if (RegVT == MVT::f32)
02432         RC = &X86::FR32RegClass;
02433       else if (RegVT == MVT::f64)
02434         RC = &X86::FR64RegClass;
02435       else if (RegVT.is512BitVector())
02436         RC = &X86::VR512RegClass;
02437       else if (RegVT.is256BitVector())
02438         RC = &X86::VR256RegClass;
02439       else if (RegVT.is128BitVector())
02440         RC = &X86::VR128RegClass;
02441       else if (RegVT == MVT::x86mmx)
02442         RC = &X86::VR64RegClass;
02443       else if (RegVT == MVT::i1)
02444         RC = &X86::VK1RegClass;
02445       else if (RegVT == MVT::v8i1)
02446         RC = &X86::VK8RegClass;
02447       else if (RegVT == MVT::v16i1)
02448         RC = &X86::VK16RegClass;
02449       else if (RegVT == MVT::v32i1)
02450         RC = &X86::VK32RegClass;
02451       else if (RegVT == MVT::v64i1)
02452         RC = &X86::VK64RegClass;
02453       else
02454         llvm_unreachable("Unknown argument type!");
02455 
02456       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02457       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02458 
02459       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02460       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02461       // right size.
02462       if (VA.getLocInfo() == CCValAssign::SExt)
02463         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02464                                DAG.getValueType(VA.getValVT()));
02465       else if (VA.getLocInfo() == CCValAssign::ZExt)
02466         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02467                                DAG.getValueType(VA.getValVT()));
02468       else if (VA.getLocInfo() == CCValAssign::BCvt)
02469         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
02470 
02471       if (VA.isExtInLoc()) {
02472         // Handle MMX values passed in XMM regs.
02473         if (RegVT.isVector())
02474           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02475         else
02476           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02477       }
02478     } else {
02479       assert(VA.isMemLoc());
02480       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02481     }
02482 
02483     // If value is passed via pointer - do a load.
02484     if (VA.getLocInfo() == CCValAssign::Indirect)
02485       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02486                              MachinePointerInfo(), false, false, false, 0);
02487 
02488     InVals.push_back(ArgValue);
02489   }
02490 
02491   if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
02492     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02493       // The x86-64 ABIs require that for returning structs by value we copy
02494       // the sret argument into %rax/%eax (depending on ABI) for the return.
02495       // Win32 requires us to put the sret argument to %eax as well.
02496       // Save the argument into a virtual register so that we can access it
02497       // from the return points.
02498       if (Ins[i].Flags.isSRet()) {
02499         unsigned Reg = FuncInfo->getSRetReturnReg();
02500         if (!Reg) {
02501           MVT PtrTy = getPointerTy();
02502           Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02503           FuncInfo->setSRetReturnReg(Reg);
02504         }
02505         SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02506         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02507         break;
02508       }
02509     }
02510   }
02511 
02512   unsigned StackSize = CCInfo.getNextStackOffset();
02513   // Align stack specially for tail calls.
02514   if (FuncIsMadeTailCallSafe(CallConv,
02515                              MF.getTarget().Options.GuaranteedTailCallOpt))
02516     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02517 
02518   // If the function takes variable number of arguments, make a frame index for
02519   // the start of the first vararg value... for expansion of llvm.va_start. We
02520   // can skip this if there are no va_start calls.
02521   if (MFI->hasVAStart() &&
02522       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02523                    CallConv != CallingConv::X86_ThisCall))) {
02524     FuncInfo->setVarArgsFrameIndex(
02525         MFI->CreateFixedObject(1, StackSize, true));
02526   }
02527 
02528   // 64-bit calling conventions support varargs and register parameters, so we
02529   // have to do extra work to spill them in the prologue or forward them to
02530   // musttail calls.
02531   if (Is64Bit && isVarArg &&
02532       (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
02533     // Find the first unallocated argument registers.
02534     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
02535     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
02536     unsigned NumIntRegs =
02537         CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
02538     unsigned NumXMMRegs =
02539         CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
02540     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02541            "SSE register cannot be used when SSE is disabled!");
02542 
02543     // Gather all the live in physical registers.
02544     SmallVector<SDValue, 6> LiveGPRs;
02545     SmallVector<SDValue, 8> LiveXMMRegs;
02546     SDValue ALVal;
02547     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
02548       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
02549       LiveGPRs.push_back(
02550           DAG.getCopyFromReg(DAG.getEntryNode(), dl, GPR, MVT::i64));
02551     }
02552     if (!ArgXMMs.empty()) {
02553       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02554       ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
02555       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
02556         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
02557         LiveXMMRegs.push_back(
02558             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
02559       }
02560     }
02561 
02562     // Store them to the va_list returned by va_start.
02563     if (MFI->hasVAStart()) {
02564       if (IsWin64) {
02565         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
02566         // Get to the caller-allocated home save location.  Add 8 to account
02567         // for the return address.
02568         int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02569         FuncInfo->setRegSaveFrameIndex(
02570           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02571         // Fixup to set vararg frame on shadow area (4 x i64).
02572         if (NumIntRegs < 4)
02573           FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02574       } else {
02575         // For X86-64, if there are vararg parameters that are passed via
02576         // registers, then we must store them to their spots on the stack so
02577         // they may be loaded by deferencing the result of va_next.
02578         FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02579         FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
02580         FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
02581             ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
02582       }
02583 
02584       // Store the integer parameter registers.
02585       SmallVector<SDValue, 8> MemOps;
02586       SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02587                                         getPointerTy());
02588       unsigned Offset = FuncInfo->getVarArgsGPOffset();
02589       for (SDValue Val : LiveGPRs) {
02590         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
02591                                   DAG.getIntPtrConstant(Offset));
02592         SDValue Store =
02593           DAG.getStore(Val.getValue(1), dl, Val, FIN,
02594                        MachinePointerInfo::getFixedStack(
02595                          FuncInfo->getRegSaveFrameIndex(), Offset),
02596                        false, false, 0);
02597         MemOps.push_back(Store);
02598         Offset += 8;
02599       }
02600 
02601       if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
02602         // Now store the XMM (fp + vector) parameter registers.
02603         SmallVector<SDValue, 12> SaveXMMOps;
02604         SaveXMMOps.push_back(Chain);
02605         SaveXMMOps.push_back(ALVal);
02606         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02607                                FuncInfo->getRegSaveFrameIndex()));
02608         SaveXMMOps.push_back(DAG.getIntPtrConstant(
02609                                FuncInfo->getVarArgsFPOffset()));
02610         SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
02611                           LiveXMMRegs.end());
02612         MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02613                                      MVT::Other, SaveXMMOps));
02614       }
02615 
02616       if (!MemOps.empty())
02617         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02618     } else {
02619       // Add all GPRs, al, and XMMs to the list of forwards.  We will add then
02620       // to the liveout set on a musttail call.
02621       assert(MFI->hasMustTailInVarArgFunc());
02622       auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
02623       typedef X86MachineFunctionInfo::Forward Forward;
02624 
02625       for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
02626         unsigned VReg =
02627             MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
02628         Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
02629         Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
02630       }
02631 
02632       if (!ArgXMMs.empty()) {
02633         unsigned ALVReg =
02634             MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
02635         Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
02636         Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
02637 
02638         for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
02639           unsigned VReg =
02640               MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
02641           Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
02642           Forwards.push_back(
02643               Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
02644         }
02645       }
02646     }
02647   }
02648 
02649   // Some CCs need callee pop.
02650   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02651                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02652     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02653   } else {
02654     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02655     // If this is an sret function, the return should pop the hidden pointer.
02656     if (!Is64Bit && !IsTailCallConvention(CallConv) &&
02657         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02658         argsAreStructReturn(Ins) == StackStructReturn)
02659       FuncInfo->setBytesToPopOnReturn(4);
02660   }
02661 
02662   if (!Is64Bit) {
02663     // RegSaveFrameIndex is X86-64 only.
02664     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02665     if (CallConv == CallingConv::X86_FastCall ||
02666         CallConv == CallingConv::X86_ThisCall)
02667       // fastcc functions can't have varargs.
02668       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02669   }
02670 
02671   FuncInfo->setArgumentStackSize(StackSize);
02672 
02673   return Chain;
02674 }
02675 
02676 SDValue
02677 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
02678                                     SDValue StackPtr, SDValue Arg,
02679                                     SDLoc dl, SelectionDAG &DAG,
02680                                     const CCValAssign &VA,
02681                                     ISD::ArgFlagsTy Flags) const {
02682   unsigned LocMemOffset = VA.getLocMemOffset();
02683   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
02684   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
02685   if (Flags.isByVal())
02686     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
02687 
02688   return DAG.getStore(Chain, dl, Arg, PtrOff,
02689                       MachinePointerInfo::getStack(LocMemOffset),
02690                       false, false, 0);
02691 }
02692 
02693 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
02694 /// optimization is performed and it is required.
02695 SDValue
02696 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
02697                                            SDValue &OutRetAddr, SDValue Chain,
02698                                            bool IsTailCall, bool Is64Bit,
02699                                            int FPDiff, SDLoc dl) const {
02700   // Adjust the Return address stack slot.
02701   EVT VT = getPointerTy();
02702   OutRetAddr = getReturnAddressFrameIndex(DAG);
02703 
02704   // Load the "old" Return address.
02705   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
02706                            false, false, false, 0);
02707   return SDValue(OutRetAddr.getNode(), 1);
02708 }
02709 
02710 /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
02711 /// optimization is performed and it is required (FPDiff!=0).
02712 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
02713                                         SDValue Chain, SDValue RetAddrFrIdx,
02714                                         EVT PtrVT, unsigned SlotSize,
02715                                         int FPDiff, SDLoc dl) {
02716   // Store the return address to the appropriate stack slot.
02717   if (!FPDiff) return Chain;
02718   // Calculate the new stack slot for the return address.
02719   int NewReturnAddrFI =
02720     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
02721                                          false);
02722   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
02723   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
02724                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
02725                        false, false, 0);
02726   return Chain;
02727 }
02728 
02729 SDValue
02730 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
02731                              SmallVectorImpl<SDValue> &InVals) const {
02732   SelectionDAG &DAG                     = CLI.DAG;
02733   SDLoc &dl                             = CLI.DL;
02734   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
02735   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
02736   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
02737   SDValue Chain                         = CLI.Chain;
02738   SDValue Callee                        = CLI.Callee;
02739   CallingConv::ID CallConv              = CLI.CallConv;
02740   bool &isTailCall                      = CLI.IsTailCall;
02741   bool isVarArg                         = CLI.IsVarArg;
02742 
02743   MachineFunction &MF = DAG.getMachineFunction();
02744   bool Is64Bit        = Subtarget->is64Bit();
02745   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
02746   StructReturnType SR = callIsStructReturn(Outs);
02747   bool IsSibcall      = false;
02748   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
02749 
02750   if (MF.getTarget().Options.DisableTailCalls)
02751     isTailCall = false;
02752 
02753   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
02754   if (IsMustTail) {
02755     // Force this to be a tail call.  The verifier rules are enough to ensure
02756     // that we can lower this successfully without moving the return address
02757     // around.
02758     isTailCall = true;
02759   } else if (isTailCall) {
02760     // Check if it's really possible to do a tail call.
02761     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
02762                     isVarArg, SR != NotStructReturn,
02763                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
02764                     Outs, OutVals, Ins, DAG);
02765 
02766     // Sibcalls are automatically detected tailcalls which do not require
02767     // ABI changes.
02768     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
02769       IsSibcall = true;
02770 
02771     if (isTailCall)
02772       ++NumTailCalls;
02773   }
02774 
02775   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02776          "Var args not supported with calling convention fastcc, ghc or hipe");
02777 
02778   // Analyze operands of the call, assigning locations to each operand.
02779   SmallVector<CCValAssign, 16> ArgLocs;
02780   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02781 
02782   // Allocate shadow area for Win64
02783   if (IsWin64)
02784     CCInfo.AllocateStack(32, 8);
02785 
02786   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
02787 
02788   // Get a count of how many bytes are to be pushed on the stack.
02789   unsigned NumBytes = CCInfo.getNextStackOffset();
02790   if (IsSibcall)
02791     // This is a sibcall. The memory operands are available in caller's
02792     // own caller's stack.
02793     NumBytes = 0;
02794   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
02795            IsTailCallConvention(CallConv))
02796     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
02797 
02798   int FPDiff = 0;
02799   if (isTailCall && !IsSibcall && !IsMustTail) {
02800     // Lower arguments at fp - stackoffset + fpdiff.
02801     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
02802 
02803     FPDiff = NumBytesCallerPushed - NumBytes;
02804 
02805     // Set the delta of movement of the returnaddr stackslot.
02806     // But only set if delta is greater than previous delta.
02807     if (FPDiff < X86Info->getTCReturnAddrDelta())
02808       X86Info->setTCReturnAddrDelta(FPDiff);
02809   }
02810 
02811   unsigned NumBytesToPush = NumBytes;
02812   unsigned NumBytesToPop = NumBytes;
02813 
02814   // If we have an inalloca argument, all stack space has already been allocated
02815   // for us and be right at the top of the stack.  We don't support multiple
02816   // arguments passed in memory when using inalloca.
02817   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
02818     NumBytesToPush = 0;
02819     if (!ArgLocs.back().isMemLoc())
02820       report_fatal_error("cannot use inalloca attribute on a register "
02821                          "parameter");
02822     if (ArgLocs.back().getLocMemOffset() != 0)
02823       report_fatal_error("any parameter with the inalloca attribute must be "
02824                          "the only memory argument");
02825   }
02826 
02827   if (!IsSibcall)
02828     Chain = DAG.getCALLSEQ_START(
02829         Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
02830 
02831   SDValue RetAddrFrIdx;
02832   // Load return address for tail calls.
02833   if (isTailCall && FPDiff)
02834     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
02835                                     Is64Bit, FPDiff, dl);
02836 
02837   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
02838   SmallVector<SDValue, 8> MemOpChains;
02839   SDValue StackPtr;
02840 
02841   // Walk the register/memloc assignments, inserting copies/loads.  In the case
02842   // of tail call optimization arguments are handle later.
02843   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
02844       DAG.getSubtarget().getRegisterInfo());
02845   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02846     // Skip inalloca arguments, they have already been written.
02847     ISD::ArgFlagsTy Flags = Outs[i].Flags;
02848     if (Flags.isInAlloca())
02849       continue;
02850 
02851     CCValAssign &VA = ArgLocs[i];
02852     EVT RegVT = VA.getLocVT();
02853     SDValue Arg = OutVals[i];
02854     bool isByVal = Flags.isByVal();
02855 
02856     // Promote the value if needed.
02857     switch (VA.getLocInfo()) {
02858     default: llvm_unreachable("Unknown loc info!");
02859     case CCValAssign::Full: break;
02860     case CCValAssign::SExt:
02861       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02862       break;
02863     case CCValAssign::ZExt:
02864       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
02865       break;
02866     case CCValAssign::AExt:
02867       if (RegVT.is128BitVector()) {
02868         // Special case: passing MMX values in XMM registers.
02869         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
02870         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
02871         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
02872       } else
02873         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
02874       break;
02875     case CCValAssign::BCvt:
02876       Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
02877       break;
02878     case CCValAssign::Indirect: {
02879       // Store the argument.
02880       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
02881       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
02882       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
02883                            MachinePointerInfo::getFixedStack(FI),
02884                            false, false, 0);
02885       Arg = SpillSlot;
02886       break;
02887     }
02888     }
02889 
02890     if (VA.isRegLoc()) {
02891       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
02892       if (isVarArg && IsWin64) {
02893         // Win64 ABI requires argument XMM reg to be copied to the corresponding
02894         // shadow reg if callee is a varargs function.
02895         unsigned ShadowReg = 0;
02896         switch (VA.getLocReg()) {
02897         case X86::XMM0: ShadowReg = X86::RCX; break;
02898         case X86::XMM1: ShadowReg = X86::RDX; break;
02899         case X86::XMM2: ShadowReg = X86::R8; break;
02900         case X86::XMM3: ShadowReg = X86::R9; break;
02901         }
02902         if (ShadowReg)
02903           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
02904       }
02905     } else if (!IsSibcall && (!isTailCall || isByVal)) {
02906       assert(VA.isMemLoc());
02907       if (!StackPtr.getNode())
02908         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
02909                                       getPointerTy());
02910       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
02911                                              dl, DAG, VA, Flags));
02912     }
02913   }
02914 
02915   if (!MemOpChains.empty())
02916     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
02917 
02918   if (Subtarget->isPICStyleGOT()) {
02919     // ELF / PIC requires GOT in the EBX register before function calls via PLT
02920     // GOT pointer.
02921     if (!isTailCall) {
02922       RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
02923                DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
02924     } else {
02925       // If we are tail calling and generating PIC/GOT style code load the
02926       // address of the callee into ECX. The value in ecx is used as target of
02927       // the tail jump. This is done to circumvent the ebx/callee-saved problem
02928       // for tail calls on PIC/GOT architectures. Normally we would just put the
02929       // address of GOT into ebx and then call target@PLT. But for tail calls
02930       // ebx would be restored (since ebx is callee saved) before jumping to the
02931       // target@PLT.
02932 
02933       // Note: The actual moving to ECX is done further down.
02934       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02935       if (G && !G->getGlobal()->hasHiddenVisibility() &&
02936           !G->getGlobal()->hasProtectedVisibility())
02937         Callee = LowerGlobalAddress(Callee, DAG);
02938       else if (isa<ExternalSymbolSDNode>(Callee))
02939         Callee = LowerExternalSymbol(Callee, DAG);
02940     }
02941   }
02942 
02943   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
02944     // From AMD64 ABI document:
02945     // For calls that may call functions that use varargs or stdargs
02946     // (prototype-less calls or calls to functions containing ellipsis (...) in
02947     // the declaration) %al is used as hidden argument to specify the number
02948     // of SSE registers used. The contents of %al do not need to match exactly
02949     // the number of registers, but must be an ubound on the number of SSE
02950     // registers used and is in the range 0 - 8 inclusive.
02951 
02952     // Count the number of XMM registers allocated.
02953     static const MCPhysReg XMMArgRegs[] = {
02954       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02955       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02956     };
02957     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
02958     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02959            && "SSE registers cannot be used when SSE is disabled");
02960 
02961     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
02962                                         DAG.getConstant(NumXMMRegs, MVT::i8)));
02963   }
02964 
02965   if (Is64Bit && isVarArg && IsMustTail) {
02966     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
02967     for (const auto &F : Forwards) {
02968       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02969       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
02970     }
02971   }
02972 
02973   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
02974   // don't need this because the eligibility check rejects calls that require
02975   // shuffling arguments passed in memory.
02976   if (!IsSibcall && isTailCall) {
02977     // Force all the incoming stack arguments to be loaded from the stack
02978     // before any new outgoing arguments are stored to the stack, because the
02979     // outgoing stack slots may alias the incoming argument stack slots, and
02980     // the alias isn't otherwise explicit. This is slightly more conservative
02981     // than necessary, because it means that each store effectively depends
02982     // on every argument instead of just those arguments it would clobber.
02983     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
02984 
02985     SmallVector<SDValue, 8> MemOpChains2;
02986     SDValue FIN;
02987     int FI = 0;
02988     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02989       CCValAssign &VA = ArgLocs[i];
02990       if (VA.isRegLoc())
02991         continue;
02992       assert(VA.isMemLoc());
02993       SDValue Arg = OutVals[i];
02994       ISD::ArgFlagsTy Flags = Outs[i].Flags;
02995       // Skip inalloca arguments.  They don't require any work.
02996       if (Flags.isInAlloca())
02997         continue;
02998       // Create frame index.
02999       int32_t Offset = VA.getLocMemOffset()+FPDiff;
03000       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
03001       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03002       FIN = DAG.getFrameIndex(FI, getPointerTy());
03003 
03004       if (Flags.isByVal()) {
03005         // Copy relative to framepointer.
03006         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
03007         if (!StackPtr.getNode())
03008           StackPtr = DAG.getCopyFromReg(Chain, dl,
03009                                         RegInfo->getStackRegister(),
03010                                         getPointerTy());
03011         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
03012 
03013         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
03014                                                          ArgChain,
03015                                                          Flags, DAG, dl));
03016       } else {
03017         // Store relative to framepointer.
03018         MemOpChains2.push_back(
03019           DAG.getStore(ArgChain, dl, Arg, FIN,
03020                        MachinePointerInfo::getFixedStack(FI),
03021                        false, false, 0));
03022       }
03023     }
03024 
03025     if (!MemOpChains2.empty())
03026       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03027 
03028     // Store the return address to the appropriate stack slot.
03029     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
03030                                      getPointerTy(), RegInfo->getSlotSize(),
03031                                      FPDiff, dl);
03032   }
03033 
03034   // Build a sequence of copy-to-reg nodes chained together with token chain
03035   // and flag operands which copy the outgoing args into registers.
03036   SDValue InFlag;
03037   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
03038     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
03039                              RegsToPass[i].second, InFlag);
03040     InFlag = Chain.getValue(1);
03041   }
03042 
03043   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
03044     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
03045     // In the 64-bit large code model, we have to make all calls
03046     // through a register, since the call instruction's 32-bit
03047     // pc-relative offset may not be large enough to hold the whole
03048     // address.
03049   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
03050     // If the callee is a GlobalAddress node (quite common, every direct call
03051     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
03052     // it.
03053 
03054     // We should use extra load for direct calls to dllimported functions in
03055     // non-JIT mode.
03056     const GlobalValue *GV = G->getGlobal();
03057     if (!GV->hasDLLImportStorageClass()) {
03058       unsigned char OpFlags = 0;
03059       bool ExtraLoad = false;
03060       unsigned WrapperKind = ISD::DELETED_NODE;
03061 
03062       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
03063       // external symbols most go through the PLT in PIC mode.  If the symbol
03064       // has hidden or protected visibility, or if it is static or local, then
03065       // we don't need to use the PLT - we can directly call it.
03066       if (Subtarget->isTargetELF() &&
03067           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
03068           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
03069         OpFlags = X86II::MO_PLT;
03070       } else if (Subtarget->isPICStyleStubAny() &&
03071                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
03072                  (!Subtarget->getTargetTriple().isMacOSX() ||
03073                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03074         // PC-relative references to external symbols should go through $stub,
03075         // unless we're building with the leopard linker or later, which
03076         // automatically synthesizes these stubs.
03077         OpFlags = X86II::MO_DARWIN_STUB;
03078       } else if (Subtarget->isPICStyleRIPRel() &&
03079                  isa<Function>(GV) &&
03080                  cast<Function>(GV)->getAttributes().
03081                    hasAttribute(AttributeSet::FunctionIndex,
03082                                 Attribute::NonLazyBind)) {
03083         // If the function is marked as non-lazy, generate an indirect call
03084         // which loads from the GOT directly. This avoids runtime overhead
03085         // at the cost of eager binding (and one extra byte of encoding).
03086         OpFlags = X86II::MO_GOTPCREL;
03087         WrapperKind = X86ISD::WrapperRIP;
03088         ExtraLoad = true;
03089       }
03090 
03091       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
03092                                           G->getOffset(), OpFlags);
03093 
03094       // Add a wrapper if needed.
03095       if (WrapperKind != ISD::DELETED_NODE)
03096         Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
03097       // Add extra indirection if needed.
03098       if (ExtraLoad)
03099         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
03100                              MachinePointerInfo::getGOT(),
03101                              false, false, false, 0);
03102     }
03103   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03104     unsigned char OpFlags = 0;
03105 
03106     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
03107     // external symbols should go through the PLT.
03108     if (Subtarget->isTargetELF() &&
03109         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
03110       OpFlags = X86II::MO_PLT;
03111     } else if (Subtarget->isPICStyleStubAny() &&
03112                (!Subtarget->getTargetTriple().isMacOSX() ||
03113                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03114       // PC-relative references to external symbols should go through $stub,
03115       // unless we're building with the leopard linker or later, which
03116       // automatically synthesizes these stubs.
03117       OpFlags = X86II::MO_DARWIN_STUB;
03118     }
03119 
03120     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
03121                                          OpFlags);
03122   }
03123 
03124   // Returns a chain & a flag for retval copy to use.
03125   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
03126   SmallVector<SDValue, 8> Ops;
03127 
03128   if (!IsSibcall && isTailCall) {
03129     Chain = DAG.getCALLSEQ_END(Chain,
03130                                DAG.getIntPtrConstant(NumBytesToPop, true),
03131                                DAG.getIntPtrConstant(0, true), InFlag, dl);
03132     InFlag = Chain.getValue(1);
03133   }
03134 
03135   Ops.push_back(Chain);
03136   Ops.push_back(Callee);
03137 
03138   if (isTailCall)
03139     Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
03140 
03141   // Add argument registers to the end of the list so that they are known live
03142   // into the call.
03143   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03144     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03145                                   RegsToPass[i].second.getValueType()));
03146 
03147   // Add a register mask operand representing the call-preserved registers.
03148   const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
03149   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
03150   assert(Mask && "Missing call preserved mask for calling convention");
03151   Ops.push_back(DAG.getRegisterMask(Mask));
03152 
03153   if (InFlag.getNode())
03154     Ops.push_back(InFlag);
03155 
03156   if (isTailCall) {
03157     // We used to do:
03158     //// If this is the first return lowered for this function, add the regs
03159     //// to the liveout set for the function.
03160     // This isn't right, although it's probably harmless on x86; liveouts
03161     // should be computed from returns not tail calls.  Consider a void
03162     // function making a tail call to a function returning int.
03163     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03164   }
03165 
03166   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03167   InFlag = Chain.getValue(1);
03168 
03169   // Create the CALLSEQ_END node.
03170   unsigned NumBytesForCalleeToPop;
03171   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03172                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03173     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03174   else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
03175            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03176            SR == StackStructReturn)
03177     // If this is a call to a struct-return function, the callee
03178     // pops the hidden struct pointer, so we have to push it back.
03179     // This is common for Darwin/X86, Linux & Mingw32 targets.
03180     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03181     NumBytesForCalleeToPop = 4;
03182   else
03183     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03184 
03185   // Returns a flag for retval copy to use.
03186   if (!IsSibcall) {
03187     Chain = DAG.getCALLSEQ_END(Chain,
03188                                DAG.getIntPtrConstant(NumBytesToPop, true),
03189                                DAG.getIntPtrConstant(NumBytesForCalleeToPop,
03190                                                      true),
03191                                InFlag, dl);
03192     InFlag = Chain.getValue(1);
03193   }
03194 
03195   // Handle result values, copying them out of physregs into vregs that we
03196   // return.
03197   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03198                          Ins, dl, DAG, InVals);
03199 }
03200 
03201 //===----------------------------------------------------------------------===//
03202 //                Fast Calling Convention (tail call) implementation
03203 //===----------------------------------------------------------------------===//
03204 
03205 //  Like std call, callee cleans arguments, convention except that ECX is
03206 //  reserved for storing the tail called function address. Only 2 registers are
03207 //  free for argument passing (inreg). Tail call optimization is performed
03208 //  provided:
03209 //                * tailcallopt is enabled
03210 //                * caller/callee are fastcc
03211 //  On X86_64 architecture with GOT-style position independent code only local
03212 //  (within module) calls are supported at the moment.
03213 //  To keep the stack aligned according to platform abi the function
03214 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03215 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03216 //  If a tail called function callee has more arguments than the caller the
03217 //  caller needs to make sure that there is room to move the RETADDR to. This is
03218 //  achieved by reserving an area the size of the argument delta right after the
03219 //  original RETADDR, but before the saved framepointer or the spilled registers
03220 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03221 //  stack layout:
03222 //    arg1
03223 //    arg2
03224 //    RETADDR
03225 //    [ new RETADDR
03226 //      move area ]
03227 //    (possible EBP)
03228 //    ESI
03229 //    EDI
03230 //    local1 ..
03231 
03232 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
03233 /// for a 16 byte align requirement.
03234 unsigned
03235 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03236                                                SelectionDAG& DAG) const {
03237   MachineFunction &MF = DAG.getMachineFunction();
03238   const TargetMachine &TM = MF.getTarget();
03239   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03240       TM.getSubtargetImpl()->getRegisterInfo());
03241   const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
03242   unsigned StackAlignment = TFI.getStackAlignment();
03243   uint64_t AlignMask = StackAlignment - 1;
03244   int64_t Offset = StackSize;
03245   unsigned SlotSize = RegInfo->getSlotSize();
03246   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03247     // Number smaller than 12 so just add the difference.
03248     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03249   } else {
03250     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03251     Offset = ((~AlignMask) & Offset) + StackAlignment +
03252       (StackAlignment-SlotSize);
03253   }
03254   return Offset;
03255 }
03256 
03257 /// MatchingStackOffset - Return true if the given stack call argument is
03258 /// already available in the same position (relatively) of the caller's
03259 /// incoming argument stack.
03260 static
03261 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03262                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03263                          const X86InstrInfo *TII) {
03264   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03265   int FI = INT_MAX;
03266   if (Arg.getOpcode() == ISD::CopyFromReg) {
03267     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03268     if (!TargetRegisterInfo::isVirtualRegister(VR))
03269       return false;
03270     MachineInstr *Def = MRI->getVRegDef(VR);
03271     if (!Def)
03272       return false;
03273     if (!Flags.isByVal()) {
03274       if (!TII->isLoadFromStackSlot(Def, FI))
03275         return false;
03276     } else {
03277       unsigned Opcode = Def->getOpcode();
03278       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
03279           Def->getOperand(1).isFI()) {
03280         FI = Def->getOperand(1).getIndex();
03281         Bytes = Flags.getByValSize();
03282       } else
03283         return false;
03284     }
03285   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03286     if (Flags.isByVal())
03287       // ByVal argument is passed in as a pointer but it's now being
03288       // dereferenced. e.g.
03289       // define @foo(%struct.X* %A) {
03290       //   tail call @bar(%struct.X* byval %A)
03291       // }
03292       return false;
03293     SDValue Ptr = Ld->getBasePtr();
03294     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03295     if (!FINode)
03296       return false;
03297     FI = FINode->getIndex();
03298   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03299     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03300     FI = FINode->getIndex();
03301     Bytes = Flags.getByValSize();
03302   } else
03303     return false;
03304 
03305   assert(FI != INT_MAX);
03306   if (!MFI->isFixedObjectIndex(FI))
03307     return false;
03308   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03309 }
03310 
03311 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03312 /// for tail call optimization. Targets which want to do tail call
03313 /// optimization should implement this function.
03314 bool
03315 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03316                                                      CallingConv::ID CalleeCC,
03317                                                      bool isVarArg,
03318                                                      bool isCalleeStructRet,
03319                                                      bool isCallerStructRet,
03320                                                      Type *RetTy,
03321                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03322                                     const SmallVectorImpl<SDValue> &OutVals,
03323                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03324                                                      SelectionDAG &DAG) const {
03325   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
03326     return false;
03327 
03328   // If -tailcallopt is specified, make fastcc functions tail-callable.
03329   const MachineFunction &MF = DAG.getMachineFunction();
03330   const Function *CallerF = MF.getFunction();
03331 
03332   // If the function return type is x86_fp80 and the callee return type is not,
03333   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03334   // perform a tailcall optimization here.
03335   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03336     return false;
03337 
03338   CallingConv::ID CallerCC = CallerF->getCallingConv();
03339   bool CCMatch = CallerCC == CalleeCC;
03340   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03341   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03342 
03343   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03344     if (IsTailCallConvention(CalleeCC) && CCMatch)
03345       return true;
03346     return false;
03347   }
03348 
03349   // Look for obvious safe cases to perform tail call optimization that do not
03350   // require ABI changes. This is what gcc calls sibcall.
03351 
03352   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03353   // emit a special epilogue.
03354   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03355       DAG.getSubtarget().getRegisterInfo());
03356   if (RegInfo->needsStackRealignment(MF))
03357     return false;
03358 
03359   // Also avoid sibcall optimization if either caller or callee uses struct
03360   // return semantics.
03361   if (isCalleeStructRet || isCallerStructRet)
03362     return false;
03363 
03364   // An stdcall/thiscall caller is expected to clean up its arguments; the
03365   // callee isn't going to do that.
03366   // FIXME: this is more restrictive than needed. We could produce a tailcall
03367   // when the stack adjustment matches. For example, with a thiscall that takes
03368   // only one argument.
03369   if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
03370                    CallerCC == CallingConv::X86_ThisCall))
03371     return false;
03372 
03373   // Do not sibcall optimize vararg calls unless all arguments are passed via
03374   // registers.
03375   if (isVarArg && !Outs.empty()) {
03376 
03377     // Optimizing for varargs on Win64 is unlikely to be safe without
03378     // additional testing.
03379     if (IsCalleeWin64 || IsCallerWin64)
03380       return false;
03381 
03382     SmallVector<CCValAssign, 16> ArgLocs;
03383     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03384                    *DAG.getContext());
03385 
03386     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03387     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03388       if (!ArgLocs[i].isRegLoc())
03389         return false;
03390   }
03391 
03392   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03393   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03394   // this into a sibcall.
03395   bool Unused = false;
03396   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03397     if (!Ins[i].Used) {
03398       Unused = true;
03399       break;
03400     }
03401   }
03402   if (Unused) {
03403     SmallVector<CCValAssign, 16> RVLocs;
03404     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03405                    *DAG.getContext());
03406     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03407     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03408       CCValAssign &VA = RVLocs[i];
03409       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03410         return false;
03411     }
03412   }
03413 
03414   // If the calling conventions do not match, then we'd better make sure the
03415   // results are returned in the same way as what the caller expects.
03416   if (!CCMatch) {
03417     SmallVector<CCValAssign, 16> RVLocs1;
03418     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03419                     *DAG.getContext());
03420     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03421 
03422     SmallVector<CCValAssign, 16> RVLocs2;
03423     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03424                     *DAG.getContext());
03425     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03426 
03427     if (RVLocs1.size() != RVLocs2.size())
03428       return false;
03429     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03430       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03431         return false;
03432       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03433         return false;
03434       if (RVLocs1[i].isRegLoc()) {
03435         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03436           return false;
03437       } else {
03438         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03439           return false;
03440       }
03441     }
03442   }
03443 
03444   // If the callee takes no arguments then go on to check the results of the
03445   // call.
03446   if (!Outs.empty()) {
03447     // Check if stack adjustment is needed. For now, do not do this if any
03448     // argument is passed on the stack.
03449     SmallVector<CCValAssign, 16> ArgLocs;
03450     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03451                    *DAG.getContext());
03452 
03453     // Allocate shadow area for Win64
03454     if (IsCalleeWin64)
03455       CCInfo.AllocateStack(32, 8);
03456 
03457     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03458     if (CCInfo.getNextStackOffset()) {
03459       MachineFunction &MF = DAG.getMachineFunction();
03460       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
03461         return false;
03462 
03463       // Check if the arguments are already laid out in the right way as
03464       // the caller's fixed stack objects.
03465       MachineFrameInfo *MFI = MF.getFrameInfo();
03466       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03467       const X86InstrInfo *TII =
03468           static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
03469       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03470         CCValAssign &VA = ArgLocs[i];
03471         SDValue Arg = OutVals[i];
03472         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03473         if (VA.getLocInfo() == CCValAssign::Indirect)
03474           return false;
03475         if (!VA.isRegLoc()) {
03476           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03477                                    MFI, MRI, TII))
03478             return false;
03479         }
03480       }
03481     }
03482 
03483     // If the tailcall address may be in a register, then make sure it's
03484     // possible to register allocate for it. In 32-bit, the call address can
03485     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03486     // callee-saved registers are restored. These happen to be the same
03487     // registers used to pass 'inreg' arguments so watch out for those.
03488     if (!Subtarget->is64Bit() &&
03489         ((!isa<GlobalAddressSDNode>(Callee) &&
03490           !isa<ExternalSymbolSDNode>(Callee)) ||
03491          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03492       unsigned NumInRegs = 0;
03493       // In PIC we need an extra register to formulate the address computation
03494       // for the callee.
03495       unsigned MaxInRegs =
03496   (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03497 
03498       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03499         CCValAssign &VA = ArgLocs[i];
03500         if (!VA.isRegLoc())
03501           continue;
03502         unsigned Reg = VA.getLocReg();
03503         switch (Reg) {
03504         default: break;
03505         case X86::EAX: case X86::EDX: case X86::ECX:
03506           if (++NumInRegs == MaxInRegs)
03507             return false;
03508           break;
03509         }
03510       }
03511     }
03512   }
03513 
03514   return true;
03515 }
03516 
03517 FastISel *
03518 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03519                                   const TargetLibraryInfo *libInfo) const {
03520   return X86::createFastISel(funcInfo, libInfo);
03521 }
03522 
03523 //===----------------------------------------------------------------------===//
03524 //                           Other Lowering Hooks
03525 //===----------------------------------------------------------------------===//
03526 
03527 static bool MayFoldLoad(SDValue Op) {
03528   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03529 }
03530 
03531 static bool MayFoldIntoStore(SDValue Op) {
03532   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03533 }
03534 
03535 static bool isTargetShuffle(unsigned Opcode) {
03536   switch(Opcode) {
03537   default: return false;
03538   case X86ISD::PSHUFB:
03539   case X86ISD::PSHUFD:
03540   case X86ISD::PSHUFHW:
03541   case X86ISD::PSHUFLW:
03542   case X86ISD::SHUFP:
03543   case X86ISD::PALIGNR:
03544   case X86ISD::MOVLHPS:
03545   case X86ISD::MOVLHPD:
03546   case X86ISD::MOVHLPS:
03547   case X86ISD::MOVLPS:
03548   case X86ISD::MOVLPD:
03549   case X86ISD::MOVSHDUP:
03550   case X86ISD::MOVSLDUP:
03551   case X86ISD::MOVDDUP:
03552   case X86ISD::MOVSS:
03553   case X86ISD::MOVSD:
03554   case X86ISD::UNPCKL:
03555   case X86ISD::UNPCKH:
03556   case X86ISD::VPERMILP:
03557   case X86ISD::VPERM2X128:
03558   case X86ISD::VPERMI:
03559     return true;
03560   }
03561 }
03562 
03563 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03564                                     SDValue V1, SelectionDAG &DAG) {
03565   switch(Opc) {
03566   default: llvm_unreachable("Unknown x86 shuffle node");
03567   case X86ISD::MOVSHDUP:
03568   case X86ISD::MOVSLDUP:
03569   case X86ISD::MOVDDUP:
03570     return DAG.getNode(Opc, dl, VT, V1);
03571   }
03572 }
03573 
03574 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03575                                     SDValue V1, unsigned TargetMask,
03576                                     SelectionDAG &DAG) {
03577   switch(Opc) {
03578   default: llvm_unreachable("Unknown x86 shuffle node");
03579   case X86ISD::PSHUFD:
03580   case X86ISD::PSHUFHW:
03581   case X86ISD::PSHUFLW:
03582   case X86ISD::VPERMILP:
03583   case X86ISD::VPERMI:
03584     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
03585   }
03586 }
03587 
03588 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03589                                     SDValue V1, SDValue V2, unsigned TargetMask,
03590                                     SelectionDAG &DAG) {
03591   switch(Opc) {
03592   default: llvm_unreachable("Unknown x86 shuffle node");
03593   case X86ISD::PALIGNR:
03594   case X86ISD::VALIGN:
03595   case X86ISD::SHUFP:
03596   case X86ISD::VPERM2X128:
03597     return DAG.getNode(Opc, dl, VT, V1, V2,
03598                        DAG.getConstant(TargetMask, MVT::i8));
03599   }
03600 }
03601 
03602 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03603                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03604   switch(Opc) {
03605   default: llvm_unreachable("Unknown x86 shuffle node");
03606   case X86ISD::MOVLHPS:
03607   case X86ISD::MOVLHPD:
03608   case X86ISD::MOVHLPS:
03609   case X86ISD::MOVLPS:
03610   case X86ISD::MOVLPD:
03611   case X86ISD::MOVSS:
03612   case X86ISD::MOVSD:
03613   case X86ISD::UNPCKL:
03614   case X86ISD::UNPCKH:
03615     return DAG.getNode(Opc, dl, VT, V1, V2);
03616   }
03617 }
03618 
03619 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03620   MachineFunction &MF = DAG.getMachineFunction();
03621   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
03622       DAG.getSubtarget().getRegisterInfo());
03623   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03624   int ReturnAddrIndex = FuncInfo->getRAIndex();
03625 
03626   if (ReturnAddrIndex == 0) {
03627     // Set up a frame object for the return address.
03628     unsigned SlotSize = RegInfo->getSlotSize();
03629     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03630                                                            -(int64_t)SlotSize,
03631                                                            false);
03632     FuncInfo->setRAIndex(ReturnAddrIndex);
03633   }
03634 
03635   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
03636 }
03637 
03638 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03639                                        bool hasSymbolicDisplacement) {
03640   // Offset should fit into 32 bit immediate field.
03641   if (!isInt<32>(Offset))
03642     return false;
03643 
03644   // If we don't have a symbolic displacement - we don't have any extra
03645   // restrictions.
03646   if (!hasSymbolicDisplacement)
03647     return true;
03648 
03649   // FIXME: Some tweaks might be needed for medium code model.
03650   if (M != CodeModel::Small && M != CodeModel::Kernel)
03651     return false;
03652 
03653   // For small code model we assume that latest object is 16MB before end of 31
03654   // bits boundary. We may also accept pretty large negative constants knowing
03655   // that all objects are in the positive half of address space.
03656   if (M == CodeModel::Small && Offset < 16*1024*1024)
03657     return true;
03658 
03659   // For kernel code model we know that all object resist in the negative half
03660   // of 32bits address space. We may not accept negative offsets, since they may
03661   // be just off and we may accept pretty large positive ones.
03662   if (M == CodeModel::Kernel && Offset > 0)
03663     return true;
03664 
03665   return false;
03666 }
03667 
03668 /// isCalleePop - Determines whether the callee is required to pop its
03669 /// own arguments. Callee pop is necessary to support tail calls.
03670 bool X86::isCalleePop(CallingConv::ID CallingConv,
03671                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
03672   switch (CallingConv) {
03673   default:
03674     return false;
03675   case CallingConv::X86_StdCall:
03676   case CallingConv::X86_FastCall:
03677   case CallingConv::X86_ThisCall:
03678     return !is64Bit;
03679   case CallingConv::Fast:
03680   case CallingConv::GHC:
03681   case CallingConv::HiPE:
03682     if (IsVarArg)
03683       return false;
03684     return TailCallOpt;
03685   }
03686 }
03687 
03688 /// \brief Return true if the condition is an unsigned comparison operation.
03689 static bool isX86CCUnsigned(unsigned X86CC) {
03690   switch (X86CC) {
03691   default: llvm_unreachable("Invalid integer condition!");
03692   case X86::COND_E:     return true;
03693   case X86::COND_G:     return false;
03694   case X86::COND_GE:    return false;
03695   case X86::COND_L:     return false;
03696   case X86::COND_LE:    return false;
03697   case X86::COND_NE:    return true;
03698   case X86::COND_B:     return true;
03699   case X86::COND_A:     return true;
03700   case X86::COND_BE:    return true;
03701   case X86::COND_AE:    return true;
03702   }
03703   llvm_unreachable("covered switch fell through?!");
03704 }
03705 
03706 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
03707 /// specific condition code, returning the condition code and the LHS/RHS of the
03708 /// comparison to make.
03709 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
03710                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
03711   if (!isFP) {
03712     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
03713       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
03714         // X > -1   -> X == 0, jump !sign.
03715         RHS = DAG.getConstant(0, RHS.getValueType());
03716         return X86::COND_NS;
03717       }
03718       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
03719         // X < 0   -> X == 0, jump on sign.
03720         return X86::COND_S;
03721       }
03722       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
03723         // X < 1   -> X <= 0
03724         RHS = DAG.getConstant(0, RHS.getValueType());
03725         return X86::COND_LE;
03726       }
03727     }
03728 
03729     switch (SetCCOpcode) {
03730     default: llvm_unreachable("Invalid integer condition!");
03731     case ISD::SETEQ:  return X86::COND_E;
03732     case ISD::SETGT:  return X86::COND_G;
03733     case ISD::SETGE:  return X86::COND_GE;
03734     case ISD::SETLT:  return X86::COND_L;
03735     case ISD::SETLE:  return X86::COND_LE;
03736     case ISD::SETNE:  return X86::COND_NE;
03737     case ISD::SETULT: return X86::COND_B;
03738     case ISD::SETUGT: return X86::COND_A;
03739     case ISD::SETULE: return X86::COND_BE;
03740     case ISD::SETUGE: return X86::COND_AE;
03741     }
03742   }
03743 
03744   // First determine if it is required or is profitable to flip the operands.
03745 
03746   // If LHS is a foldable load, but RHS is not, flip the condition.
03747   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
03748       !ISD::isNON_EXTLoad(RHS.getNode())) {
03749     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
03750     std::swap(LHS, RHS);
03751   }
03752 
03753   switch (SetCCOpcode) {
03754   default: break;
03755   case ISD::SETOLT:
03756   case ISD::SETOLE:
03757   case ISD::SETUGT:
03758   case ISD::SETUGE:
03759     std::swap(LHS, RHS);
03760     break;
03761   }
03762 
03763   // On a floating point condition, the flags are set as follows:
03764   // ZF  PF  CF   op
03765   //  0 | 0 | 0 | X > Y
03766   //  0 | 0 | 1 | X < Y
03767   //  1 | 0 | 0 | X == Y
03768   //  1 | 1 | 1 | unordered
03769   switch (SetCCOpcode) {
03770   default: llvm_unreachable("Condcode should be pre-legalized away");
03771   case ISD::SETUEQ:
03772   case ISD::SETEQ:   return X86::COND_E;
03773   case ISD::SETOLT:              // flipped
03774   case ISD::SETOGT:
03775   case ISD::SETGT:   return X86::COND_A;
03776   case ISD::SETOLE:              // flipped
03777   case ISD::SETOGE:
03778   case ISD::SETGE:   return X86::COND_AE;
03779   case ISD::SETUGT:              // flipped
03780   case ISD::SETULT:
03781   case ISD::SETLT:   return X86::COND_B;
03782   case ISD::SETUGE:              // flipped
03783   case ISD::SETULE:
03784   case ISD::SETLE:   return X86::COND_BE;
03785   case ISD::SETONE:
03786   case ISD::SETNE:   return X86::COND_NE;
03787   case ISD::SETUO:   return X86::COND_P;
03788   case ISD::SETO:    return X86::COND_NP;
03789   case ISD::SETOEQ:
03790   case ISD::SETUNE:  return X86::COND_INVALID;
03791   }
03792 }
03793 
03794 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
03795 /// code. Current x86 isa includes the following FP cmov instructions:
03796 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
03797 static bool hasFPCMov(unsigned X86CC) {
03798   switch (X86CC) {
03799   default:
03800     return false;
03801   case X86::COND_B:
03802   case X86::COND_BE:
03803   case X86::COND_E:
03804   case X86::COND_P:
03805   case X86::COND_A:
03806   case X86::COND_AE:
03807   case X86::COND_NE:
03808   case X86::COND_NP:
03809     return true;
03810   }
03811 }
03812 
03813 /// isFPImmLegal - Returns true if the target can instruction select the
03814 /// specified FP immediate natively. If false, the legalizer will
03815 /// materialize the FP immediate as a load from a constant pool.
03816 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
03817   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
03818     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
03819       return true;
03820   }
03821   return false;
03822 }
03823 
03824 /// \brief Returns true if it is beneficial to convert a load of a constant
03825 /// to just the constant itself.
03826 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
03827                                                           Type *Ty) const {
03828   assert(Ty->isIntegerTy());
03829 
03830   unsigned BitSize = Ty->getPrimitiveSizeInBits();
03831   if (BitSize == 0 || BitSize > 64)
03832     return false;
03833   return true;
03834 }
03835 
03836 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
03837 /// the specified range (L, H].
03838 static bool isUndefOrInRange(int Val, int Low, int Hi) {
03839   return (Val < 0) || (Val >= Low && Val < Hi);
03840 }
03841 
03842 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
03843 /// specified value.
03844 static bool isUndefOrEqual(int Val, int CmpVal) {
03845   return (Val < 0 || Val == CmpVal);
03846 }
03847 
03848 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
03849 /// from position Pos and ending in Pos+Size, falls within the specified
03850 /// sequential range (L, L+Pos]. or is undef.
03851 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
03852                                        unsigned Pos, unsigned Size, int Low) {
03853   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
03854     if (!isUndefOrEqual(Mask[i], Low))
03855       return false;
03856   return true;
03857 }
03858 
03859 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
03860 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
03861 /// the second operand.
03862 static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
03863   if (VT == MVT::v4f32 || VT == MVT::v4i32 )
03864     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
03865   if (VT == MVT::v2f64 || VT == MVT::v2i64)
03866     return (Mask[0] < 2 && Mask[1] < 2);
03867   return false;
03868 }
03869 
03870 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
03871 /// is suitable for input to PSHUFHW.
03872 static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03873   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03874     return false;
03875 
03876   // Lower quadword copied in order or undef.
03877   if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
03878     return false;
03879 
03880   // Upper quadword shuffled.
03881   for (unsigned i = 4; i != 8; ++i)
03882     if (!isUndefOrInRange(Mask[i], 4, 8))
03883       return false;
03884 
03885   if (VT == MVT::v16i16) {
03886     // Lower quadword copied in order or undef.
03887     if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
03888       return false;
03889 
03890     // Upper quadword shuffled.
03891     for (unsigned i = 12; i != 16; ++i)
03892       if (!isUndefOrInRange(Mask[i], 12, 16))
03893         return false;
03894   }
03895 
03896   return true;
03897 }
03898 
03899 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
03900 /// is suitable for input to PSHUFLW.
03901 static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
03902   if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
03903     return false;
03904 
03905   // Upper quadword copied in order.
03906   if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
03907     return false;
03908 
03909   // Lower quadword shuffled.
03910   for (unsigned i = 0; i != 4; ++i)
03911     if (!isUndefOrInRange(Mask[i], 0, 4))
03912       return false;
03913 
03914   if (VT == MVT::v16i16) {
03915     // Upper quadword copied in order.
03916     if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
03917       return false;
03918 
03919     // Lower quadword shuffled.
03920     for (unsigned i = 8; i != 12; ++i)
03921       if (!isUndefOrInRange(Mask[i], 8, 12))
03922         return false;
03923   }
03924 
03925   return true;
03926 }
03927 
03928 /// \brief Return true if the mask specifies a shuffle of elements that is
03929 /// suitable for input to intralane (palignr) or interlane (valign) vector
03930 /// right-shift.
03931 static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
03932   unsigned NumElts = VT.getVectorNumElements();
03933   unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
03934   unsigned NumLaneElts = NumElts/NumLanes;
03935 
03936   // Do not handle 64-bit element shuffles with palignr.
03937   if (NumLaneElts == 2)
03938     return false;
03939 
03940   for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
03941     unsigned i;
03942     for (i = 0; i != NumLaneElts; ++i) {
03943       if (Mask[i+l] >= 0)
03944         break;
03945     }
03946 
03947     // Lane is all undef, go to next lane
03948     if (i == NumLaneElts)
03949       continue;
03950 
03951     int Start = Mask[i+l];
03952 
03953     // Make sure its in this lane in one of the sources
03954     if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
03955         !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
03956       return false;
03957 
03958     // If not lane 0, then we must match lane 0
03959     if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
03960       return false;
03961 
03962     // Correct second source to be contiguous with first source
03963     if (Start >= (int)NumElts)
03964       Start -= NumElts - NumLaneElts;
03965 
03966     // Make sure we're shifting in the right direction.
03967     if (Start <= (int)(i+l))
03968       return false;
03969 
03970     Start -= i;
03971 
03972     // Check the rest of the elements to see if they are consecutive.
03973     for (++i; i != NumLaneElts; ++i) {
03974       int Idx = Mask[i+l];
03975 
03976       // Make sure its in this lane
03977       if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
03978           !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
03979         return false;
03980 
03981       // If not lane 0, then we must match lane 0
03982       if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
03983         return false;
03984 
03985       if (Idx >= (int)NumElts)
03986         Idx -= NumElts - NumLaneElts;
03987 
03988       if (!isUndefOrEqual(Idx, Start+i))
03989         return false;
03990 
03991     }
03992   }
03993 
03994   return true;
03995 }
03996 
03997 /// \brief Return true if the node specifies a shuffle of elements that is
03998 /// suitable for input to PALIGNR.
03999 static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
04000                           const X86Subtarget *Subtarget) {
04001   if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
04002       (VT.is256BitVector() && !Subtarget->hasInt256()) ||
04003       VT.is512BitVector())
04004     // FIXME: Add AVX512BW.
04005     return false;
04006 
04007   return isAlignrMask(Mask, VT, false);
04008 }
04009 
04010 /// \brief Return true if the node specifies a shuffle of elements that is
04011 /// suitable for input to VALIGN.
04012 static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
04013                           const X86Subtarget *Subtarget) {
04014   // FIXME: Add AVX512VL.
04015   if (!VT.is512BitVector() || !Subtarget->hasAVX512())
04016     return false;
04017   return isAlignrMask(Mask, VT, true);
04018 }
04019 
04020 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
04021 /// the two vector operands have swapped position.
04022 static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
04023                                      unsigned NumElems) {
04024   for (unsigned i = 0; i != NumElems; ++i) {
04025     int idx = Mask[i];
04026     if (idx < 0)
04027       continue;
04028     else if (idx < (int)NumElems)
04029       Mask[i] = idx + NumElems;
04030     else
04031       Mask[i] = idx - NumElems;
04032   }
04033 }
04034 
04035 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
04036 /// specifies a shuffle of elements that is suitable for input to 128/256-bit
04037 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
04038 /// reverse of what x86 shuffles want.
04039 static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
04040 
04041   unsigned NumElems = VT.getVectorNumElements();
04042   unsigned NumLanes = VT.getSizeInBits()/128;
04043   unsigned NumLaneElems = NumElems/NumLanes;
04044 
04045   if (NumLaneElems != 2 && NumLaneElems != 4)
04046     return false;
04047 
04048   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04049   bool symetricMaskRequired =
04050     (VT.getSizeInBits() >= 256) && (EltSize == 32);
04051 
04052   // VSHUFPSY divides the resulting vector into 4 chunks.
04053   // The sources are also splitted into 4 chunks, and each destination
04054   // chunk must come from a different source chunk.
04055   //
04056   //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
04057   //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
04058   //
04059   //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
04060   //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
04061   //
04062   // VSHUFPDY divides the resulting vector into 4 chunks.
04063   // The sources are also splitted into 4 chunks, and each destination
04064   // chunk must come from a different source chunk.
04065   //
04066   //  SRC1 =>      X3       X2       X1       X0
04067   //  SRC2 =>      Y3       Y2       Y1       Y0
04068   //
04069   //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
04070   //
04071   SmallVector<int, 4> MaskVal(NumLaneElems, -1);
04072   unsigned HalfLaneElems = NumLaneElems/2;
04073   for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
04074     for (unsigned i = 0; i != NumLaneElems; ++i) {
04075       int Idx = Mask[i+l];
04076       unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
04077       if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
04078         return false;
04079       // For VSHUFPSY, the mask of the second half must be the same as the
04080       // first but with the appropriate offsets. This works in the same way as
04081       // VPERMILPS works with masks.
04082       if (!symetricMaskRequired || Idx < 0)
04083         continue;
04084       if (MaskVal[i] < 0) {
04085         MaskVal[i] = Idx - l;
04086         continue;
04087       }
04088       if ((signed)(Idx - l) != MaskVal[i])
04089         return false;
04090     }
04091   }
04092 
04093   return true;
04094 }
04095 
04096 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
04097 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
04098 static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
04099   if (!VT.is128BitVector())
04100     return false;
04101 
04102   unsigned NumElems = VT.getVectorNumElements();
04103 
04104   if (NumElems != 4)
04105     return false;
04106 
04107   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
04108   return isUndefOrEqual(Mask[0], 6) &&
04109          isUndefOrEqual(Mask[1], 7) &&
04110          isUndefOrEqual(Mask[2], 2) &&
04111          isUndefOrEqual(Mask[3], 3);
04112 }
04113 
04114 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
04115 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
04116 /// <2, 3, 2, 3>
04117 static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
04118   if (!VT.is128BitVector())
04119     return false;
04120 
04121   unsigned NumElems = VT.getVectorNumElements();
04122 
04123   if (NumElems != 4)
04124     return false;
04125 
04126   return isUndefOrEqual(Mask[0], 2) &&
04127          isUndefOrEqual(Mask[1], 3) &&
04128          isUndefOrEqual(Mask[2], 2) &&
04129          isUndefOrEqual(Mask[3], 3);
04130 }
04131 
04132 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
04133 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
04134 static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
04135   if (!VT.is128BitVector())
04136     return false;
04137 
04138   unsigned NumElems = VT.getVectorNumElements();
04139 
04140   if (NumElems != 2 && NumElems != 4)
04141     return false;
04142 
04143   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04144     if (!isUndefOrEqual(Mask[i], i + NumElems))
04145       return false;
04146 
04147   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
04148     if (!isUndefOrEqual(Mask[i], i))
04149       return false;
04150 
04151   return true;
04152 }
04153 
04154 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
04155 /// specifies a shuffle of elements that is suitable for input to MOVLHPS.
04156 static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
04157   if (!VT.is128BitVector())
04158     return false;
04159 
04160   unsigned NumElems = VT.getVectorNumElements();
04161 
04162   if (NumElems != 2 && NumElems != 4)
04163     return false;
04164 
04165   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04166     if (!isUndefOrEqual(Mask[i], i))
04167       return false;
04168 
04169   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
04170     if (!isUndefOrEqual(Mask[i + e], i + NumElems))
04171       return false;
04172 
04173   return true;
04174 }
04175 
04176 /// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand
04177 /// specifies a shuffle of elements that is suitable for input to INSERTPS.
04178 /// i. e: If all but one element come from the same vector.
04179 static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
04180   // TODO: Deal with AVX's VINSERTPS
04181   if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
04182     return false;
04183 
04184   unsigned CorrectPosV1 = 0;
04185   unsigned CorrectPosV2 = 0;
04186   for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
04187     if (Mask[i] == -1) {
04188       ++CorrectPosV1;
04189       ++CorrectPosV2;
04190       continue;
04191     }
04192 
04193     if (Mask[i] == i)
04194       ++CorrectPosV1;
04195     else if (Mask[i] == i + 4)
04196       ++CorrectPosV2;
04197   }
04198 
04199   if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
04200     // We have 3 elements (undefs count as elements from any vector) from one
04201     // vector, and one from another.
04202     return true;
04203 
04204   return false;
04205 }
04206 
04207 //
04208 // Some special combinations that can be optimized.
04209 //
04210 static
04211 SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
04212                                SelectionDAG &DAG) {
04213   MVT VT = SVOp->getSimpleValueType(0);
04214   SDLoc dl(SVOp);
04215 
04216   if (VT != MVT::v8i32 && VT != MVT::v8f32)
04217     return SDValue();
04218 
04219   ArrayRef<int> Mask = SVOp->getMask();
04220 
04221   // These are the special masks that may be optimized.
04222   static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
04223   static const int MaskToOptimizeOdd[]  = {1, 9, 3, 11, 5, 13, 7, 15};
04224   bool MatchEvenMask = true;
04225   bool MatchOddMask  = true;
04226   for (int i=0; i<8; ++i) {
04227     if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
04228       MatchEvenMask = false;
04229     if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
04230       MatchOddMask = false;
04231   }
04232 
04233   if (!MatchEvenMask && !MatchOddMask)
04234     return SDValue();
04235 
04236   SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
04237 
04238   SDValue Op0 = SVOp->getOperand(0);
04239   SDValue Op1 = SVOp->getOperand(1);
04240 
04241   if (MatchEvenMask) {
04242     // Shift the second operand right to 32 bits.
04243     static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
04244     Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
04245   } else {
04246     // Shift the first operand left to 32 bits.
04247     static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
04248     Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
04249   }
04250   static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
04251   return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
04252 }
04253 
04254 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
04255 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
04256 static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
04257                          bool HasInt256, bool V2IsSplat = false) {
04258 
04259   assert(VT.getSizeInBits() >= 128 &&
04260          "Unsupported vector type for unpckl");
04261 
04262   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04263   unsigned NumLanes;
04264   unsigned NumOf256BitLanes;
04265   unsigned NumElts = VT.getVectorNumElements();
04266   if (VT.is256BitVector()) {
04267     if (NumElts != 4 && NumElts != 8 &&
04268         (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04269     return false;
04270     NumLanes = 2;
04271     NumOf256BitLanes = 1;
04272   } else if (VT.is512BitVector()) {
04273     assert(VT.getScalarType().getSizeInBits() >= 32 &&
04274            "Unsupported vector type for unpckh");
04275     NumLanes = 2;
04276     NumOf256BitLanes = 2;
04277   } else {
04278     NumLanes = 1;
04279     NumOf256BitLanes = 1;
04280   }
04281 
04282   unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
04283   unsigned NumLaneElts = NumEltsInStride/NumLanes;
04284 
04285   for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
04286     for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
04287       for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04288         int BitI  = Mask[l256*NumEltsInStride+l+i];
04289         int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
04290         if (!isUndefOrEqual(BitI, j+l256*NumElts))
04291           return false;
04292         if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
04293           return false;
04294         if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
04295           return false;
04296       }
04297     }
04298   }
04299   return true;
04300 }
04301 
04302 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
04303 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
04304 static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
04305                          bool HasInt256, bool V2IsSplat = false) {
04306   assert(VT.getSizeInBits() >= 128 &&
04307          "Unsupported vector type for unpckh");
04308 
04309   // AVX defines UNPCK* to operate independently on 128-bit lanes.
04310   unsigned NumLanes;
04311   unsigned NumOf256BitLanes;
04312   unsigned NumElts = VT.getVectorNumElements();
04313   if (VT.is256BitVector()) {
04314     if (NumElts != 4 && NumElts != 8 &&
04315         (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04316     return false;
04317     NumLanes = 2;
04318     NumOf256BitLanes = 1;
04319   } else if (VT.is512BitVector()) {
04320     assert(VT.getScalarType().getSizeInBits() >= 32 &&
04321            "Unsupported vector type for unpckh");
04322     NumLanes = 2;
04323     NumOf256BitLanes = 2;
04324   } else {
04325     NumLanes = 1;
04326     NumOf256BitLanes = 1;
04327   }
04328 
04329   unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
04330   unsigned NumLaneElts = NumEltsInStride/NumLanes;
04331 
04332   for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
04333     for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
04334       for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04335         int BitI  = Mask[l256*NumEltsInStride+l+i];
04336         int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
04337         if (!isUndefOrEqual(BitI, j+l256*NumElts))
04338           return false;
04339         if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
04340           return false;
04341         if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
04342           return false;
04343       }
04344     }
04345   }
04346   return true;
04347 }
04348 
04349 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
04350 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
04351 /// <0, 0, 1, 1>
04352 static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04353   unsigned NumElts = VT.getVectorNumElements();
04354   bool Is256BitVec = VT.is256BitVector();
04355 
04356   if (VT.is512BitVector())
04357     return false;
04358   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04359          "Unsupported vector type for unpckh");
04360 
04361   if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
04362       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04363     return false;
04364 
04365   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
04366   // FIXME: Need a better way to get rid of this, there's no latency difference
04367   // between UNPCKLPD and MOVDDUP, the later should always be checked first and
04368   // the former later. We should also remove the "_undef" special mask.
04369   if (NumElts == 4 && Is256BitVec)
04370     return false;
04371 
04372   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04373   // independently on 128-bit lanes.
04374   unsigned NumLanes = VT.getSizeInBits()/128;
04375   unsigned NumLaneElts = NumElts/NumLanes;
04376 
04377   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04378     for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
04379       int BitI  = Mask[l+i];
04380       int BitI1 = Mask[l+i+1];
04381 
04382       if (!isUndefOrEqual(BitI, j))
04383         return false;
04384       if (!isUndefOrEqual(BitI1, j))
04385         return false;
04386     }
04387   }
04388 
04389   return true;
04390 }
04391 
04392 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
04393 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
04394 /// <2, 2, 3, 3>
04395 static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
04396   unsigned NumElts = VT.getVectorNumElements();
04397 
04398   if (VT.is512BitVector())
04399     return false;
04400 
04401   assert((VT.is128BitVector() || VT.is256BitVector()) &&
04402          "Unsupported vector type for unpckh");
04403 
04404   if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
04405       (!HasInt256 || (NumElts != 16 && NumElts != 32)))
04406     return false;
04407 
04408   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
04409   // independently on 128-bit lanes.
04410   unsigned NumLanes = VT.getSizeInBits()/128;
04411   unsigned NumLaneElts = NumElts/NumLanes;
04412 
04413   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
04414     for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
04415       int BitI  = Mask[l+i];
04416       int BitI1 = Mask[l+i+1];
04417       if (!isUndefOrEqual(BitI, j))
04418         return false;
04419       if (!isUndefOrEqual(BitI1, j))
04420         return false;
04421     }
04422   }
04423   return true;
04424 }
04425 
04426 // Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
04427 // (src1[0], src0[1]), manipulation with 256-bit sub-vectors
04428 static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
04429   if (!VT.is512BitVector())
04430     return false;
04431 
04432   unsigned NumElts = VT.getVectorNumElements();
04433   unsigned HalfSize = NumElts/2;
04434   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
04435     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
04436       *Imm = 1;
04437       return true;
04438     }
04439   }
04440   if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
04441     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
04442       *Imm = 0;
04443       return true;
04444     }
04445   }
04446   return false;
04447 }
04448 
04449 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
04450 /// specifies a shuffle of elements that is suitable for input to MOVSS,
04451 /// MOVSD, and MOVD, i.e. setting the lowest element.
04452 static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
04453   if (VT.getVectorElementType().getSizeInBits() < 32)
04454     return false;
04455   if (!VT.is128BitVector())
04456     return false;
04457 
04458   unsigned NumElts = VT.getVectorNumElements();
04459 
04460   if (!isUndefOrEqual(Mask[0], NumElts))
04461     return false;
04462 
04463   for (unsigned i = 1; i != NumElts; ++i)
04464     if (!isUndefOrEqual(Mask[i], i))
04465       return false;
04466 
04467   return true;
04468 }
04469 
04470 /// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
04471 /// as permutations between 128-bit chunks or halves. As an example: this
04472 /// shuffle bellow:
04473 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
04474 /// The first half comes from the second half of V1 and the second half from the
04475 /// the second half of V2.
04476 static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04477   if (!HasFp256 || !VT.is256BitVector())
04478     return false;
04479 
04480   // The shuffle result is divided into half A and half B. In total the two
04481   // sources have 4 halves, namely: C, D, E, F. The final values of A and
04482   // B must come from C, D, E or F.
04483   unsigned HalfSize = VT.getVectorNumElements()/2;
04484   bool MatchA = false, MatchB = false;
04485 
04486   // Check if A comes from one of C, D, E, F.
04487   for (unsigned Half = 0; Half != 4; ++Half) {
04488     if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
04489       MatchA = true;
04490       break;
04491     }
04492   }
04493 
04494   // Check if B comes from one of C, D, E, F.
04495   for (unsigned Half = 0; Half != 4; ++Half) {
04496     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
04497       MatchB = true;
04498       break;
04499     }
04500   }
04501 
04502   return MatchA && MatchB;
04503 }
04504 
04505 /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
04506 /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
04507 static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
04508   MVT VT = SVOp->getSimpleValueType(0);
04509 
04510   unsigned HalfSize = VT.getVectorNumElements()/2;
04511 
04512   unsigned FstHalf = 0, SndHalf = 0;
04513   for (unsigned i = 0; i < HalfSize; ++i) {
04514     if (SVOp->getMaskElt(i) > 0) {
04515       FstHalf = SVOp->getMaskElt(i)/HalfSize;
04516       break;
04517     }
04518   }
04519   for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
04520     if (SVOp->getMaskElt(i) > 0) {
04521       SndHalf = SVOp->getMaskElt(i)/HalfSize;
04522       break;
04523     }
04524   }
04525 
04526   return (FstHalf | (SndHalf << 4));
04527 }
04528 
04529 // Symetric in-lane mask. Each lane has 4 elements (for imm8)
04530 static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
04531   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04532   if (EltSize < 32)
04533     return false;
04534 
04535   unsigned NumElts = VT.getVectorNumElements();
04536   Imm8 = 0;
04537   if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
04538     for (unsigned i = 0; i != NumElts; ++i) {
04539       if (Mask[i] < 0)
04540         continue;
04541       Imm8 |= Mask[i] << (i*2);
04542     }
04543     return true;
04544   }
04545 
04546   unsigned LaneSize = 4;
04547   SmallVector<int, 4> MaskVal(LaneSize, -1);
04548 
04549   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04550     for (unsigned i = 0; i != LaneSize; ++i) {
04551       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04552         return false;
04553       if (Mask[i+l] < 0)
04554         continue;
04555       if (MaskVal[i] < 0) {
04556         MaskVal[i] = Mask[i+l] - l;
04557         Imm8 |= MaskVal[i] << (i*2);
04558         continue;
04559       }
04560       if (Mask[i+l] != (signed)(MaskVal[i]+l))
04561         return false;
04562     }
04563   }
04564   return true;
04565 }
04566 
04567 /// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
04568 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
04569 /// Note that VPERMIL mask matching is different depending whether theunderlying
04570 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
04571 /// to the same elements of the low, but to the higher half of the source.
04572 /// In VPERMILPD the two lanes could be shuffled independently of each other
04573 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
04574 static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
04575   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04576   if (VT.getSizeInBits() < 256 || EltSize < 32)
04577     return false;
04578   bool symetricMaskRequired = (EltSize == 32);
04579   unsigned NumElts = VT.getVectorNumElements();
04580 
04581   unsigned NumLanes = VT.getSizeInBits()/128;
04582   unsigned LaneSize = NumElts/NumLanes;
04583   // 2 or 4 elements in one lane
04584 
04585   SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
04586   for (unsigned l = 0; l != NumElts; l += LaneSize) {
04587     for (unsigned i = 0; i != LaneSize; ++i) {
04588       if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
04589         return false;
04590       if (symetricMaskRequired) {
04591         if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
04592           ExpectedMaskVal[i] = Mask[i+l] - l;
04593           continue;
04594         }
04595         if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
04596           return false;
04597       }
04598     }
04599   }
04600   return true;
04601 }
04602 
04603 /// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
04604 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
04605 /// element of vector 2 and the other elements to come from vector 1 in order.
04606 static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
04607                                bool V2IsSplat = false, bool V2IsUndef = false) {
04608   if (!VT.is128BitVector())
04609     return false;
04610 
04611   unsigned NumOps = VT.getVectorNumElements();
04612   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
04613     return false;
04614 
04615   if (!isUndefOrEqual(Mask[0], 0))
04616     return false;
04617 
04618   for (unsigned i = 1; i != NumOps; ++i)
04619     if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
04620           (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
04621           (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
04622       return false;
04623 
04624   return true;
04625 }
04626 
04627 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04628 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
04629 /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
04630 static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
04631                            const X86Subtarget *Subtarget) {
04632   if (!Subtarget->hasSSE3())
04633     return false;
04634 
04635   unsigned NumElems = VT.getVectorNumElements();
04636 
04637   if ((VT.is128BitVector() && NumElems != 4) ||
04638       (VT.is256BitVector() && NumElems != 8) ||
04639       (VT.is512BitVector() && NumElems != 16))
04640     return false;
04641 
04642   // "i+1" is the value the indexed mask element must have
04643   for (unsigned i = 0; i != NumElems; i += 2)
04644     if (!isUndefOrEqual(Mask[i], i+1) ||
04645         !isUndefOrEqual(Mask[i+1], i+1))
04646       return false;
04647 
04648   return true;
04649 }
04650 
04651 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04652 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
04653 /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
04654 static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
04655                            const X86Subtarget *Subtarget) {
04656   if (!Subtarget->hasSSE3())
04657     return false;
04658 
04659   unsigned NumElems = VT.getVectorNumElements();
04660 
04661   if ((VT.is128BitVector() && NumElems != 4) ||
04662       (VT.is256BitVector() && NumElems != 8) ||
04663       (VT.is512BitVector() && NumElems != 16))
04664     return false;
04665 
04666   // "i" is the value the indexed mask element must have
04667   for (unsigned i = 0; i != NumElems; i += 2)
04668     if (!isUndefOrEqual(Mask[i], i) ||
04669         !isUndefOrEqual(Mask[i+1], i))
04670       return false;
04671 
04672   return true;
04673 }
04674 
04675 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
04676 /// specifies a shuffle of elements that is suitable for input to 256-bit
04677 /// version of MOVDDUP.
04678 static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
04679   if (!HasFp256 || !VT.is256BitVector())
04680     return false;
04681 
04682   unsigned NumElts = VT.getVectorNumElements();
04683   if (NumElts != 4)
04684     return false;
04685 
04686   for (unsigned i = 0; i != NumElts/2; ++i)
04687     if (!isUndefOrEqual(Mask[i], 0))
04688       return false;
04689   for (unsigned i = NumElts/2; i != NumElts; ++i)
04690     if (!isUndefOrEqual(Mask[i], NumElts/2))
04691       return false;
04692   return true;
04693 }
04694 
04695 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
04696 /// specifies a shuffle of elements that is suitable for input to 128-bit
04697 /// version of MOVDDUP.
04698 static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
04699   if (!VT.is128BitVector())
04700     return false;
04701 
04702   unsigned e = VT.getVectorNumElements() / 2;
04703   for (unsigned i = 0; i != e; ++i)
04704     if (!isUndefOrEqual(Mask[i], i))
04705       return false;
04706   for (unsigned i = 0; i != e; ++i)
04707     if (!isUndefOrEqual(Mask[e+i], i))
04708       return false;
04709   return true;
04710 }
04711 
04712 /// isVEXTRACTIndex - Return true if the specified
04713 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
04714 /// suitable for instruction that extract 128 or 256 bit vectors
04715 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
04716   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04717   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04718     return false;
04719 
04720   // The index should be aligned on a vecWidth-bit boundary.
04721   uint64_t Index =
04722     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04723 
04724   MVT VT = N->getSimpleValueType(0);
04725   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04726   bool Result = (Index * ElSize) % vecWidth == 0;
04727 
04728   return Result;
04729 }
04730 
04731 /// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
04732 /// operand specifies a subvector insert that is suitable for input to
04733 /// insertion of 128 or 256-bit subvectors
04734 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
04735   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04736   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04737     return false;
04738   // The index should be aligned on a vecWidth-bit boundary.
04739   uint64_t Index =
04740     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04741 
04742   MVT VT = N->getSimpleValueType(0);
04743   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04744   bool Result = (Index * ElSize) % vecWidth == 0;
04745 
04746   return Result;
04747 }
04748 
04749 bool X86::isVINSERT128Index(SDNode *N) {
04750   return isVINSERTIndex(N, 128);
04751 }
04752 
04753 bool X86::isVINSERT256Index(SDNode *N) {
04754   return isVINSERTIndex(N, 256);
04755 }
04756 
04757 bool X86::isVEXTRACT128Index(SDNode *N) {
04758   return isVEXTRACTIndex(N, 128);
04759 }
04760 
04761 bool X86::isVEXTRACT256Index(SDNode *N) {
04762   return isVEXTRACTIndex(N, 256);
04763 }
04764 
04765 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
04766 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
04767 /// Handles 128-bit and 256-bit.
04768 static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
04769   MVT VT = N->getSimpleValueType(0);
04770 
04771   assert((VT.getSizeInBits() >= 128) &&
04772          "Unsupported vector type for PSHUF/SHUFP");
04773 
04774   // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
04775   // independently on 128-bit lanes.
04776   unsigned NumElts = VT.getVectorNumElements();
04777   unsigned NumLanes = VT.getSizeInBits()/128;
04778   unsigned NumLaneElts = NumElts/NumLanes;
04779 
04780   assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
04781          "Only supports 2, 4 or 8 elements per lane");
04782 
04783   unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
04784   unsigned Mask = 0;
04785   for (unsigned i = 0; i != NumElts; ++i) {
04786     int Elt = N->getMaskElt(i);
04787     if (Elt < 0) continue;
04788     Elt &= NumLaneElts - 1;
04789     unsigned ShAmt = (i << Shift) % 8;
04790     Mask |= Elt << ShAmt;
04791   }
04792 
04793   return Mask;
04794 }
04795 
04796 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
04797 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
04798 static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
04799   MVT VT = N->getSimpleValueType(0);
04800 
04801   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04802          "Unsupported vector type for PSHUFHW");
04803 
04804   unsigned NumElts = VT.getVectorNumElements();
04805 
04806   unsigned Mask = 0;
04807   for (unsigned l = 0; l != NumElts; l += 8) {
04808     // 8 nodes per lane, but we only care about the last 4.
04809     for (unsigned i = 0; i < 4; ++i) {
04810       int Elt = N->getMaskElt(l+i+4);
04811       if (Elt < 0) continue;
04812       Elt &= 0x3; // only 2-bits.
04813       Mask |= Elt << (i * 2);
04814     }
04815   }
04816 
04817   return Mask;
04818 }
04819 
04820 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
04821 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
04822 static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
04823   MVT VT = N->getSimpleValueType(0);
04824 
04825   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
04826          "Unsupported vector type for PSHUFHW");
04827 
04828   unsigned NumElts = VT.getVectorNumElements();
04829 
04830   unsigned Mask = 0;
04831   for (unsigned l = 0; l != NumElts; l += 8) {
04832     // 8 nodes per lane, but we only care about the first 4.
04833     for (unsigned i = 0; i < 4; ++i) {
04834       int Elt = N->getMaskElt(l+i);
04835       if (Elt < 0) continue;
04836       Elt &= 0x3; // only 2-bits
04837       Mask |= Elt << (i * 2);
04838     }
04839   }
04840 
04841   return Mask;
04842 }
04843 
04844 /// \brief Return the appropriate immediate to shuffle the specified
04845 /// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
04846 /// VALIGN (if Interlane is true) instructions.
04847 static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
04848                                            bool InterLane) {
04849   MVT VT = SVOp->getSimpleValueType(0);
04850   unsigned EltSize = InterLane ? 1 :
04851     VT.getVectorElementType().getSizeInBits() >> 3;
04852 
04853   unsigned NumElts = VT.getVectorNumElements();
04854   unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
04855   unsigned NumLaneElts = NumElts/NumLanes;
04856 
04857   int Val = 0;
04858   unsigned i;
04859   for (i = 0; i != NumElts; ++i) {
04860     Val = SVOp->getMaskElt(i);
04861     if (Val >= 0)
04862       break;
04863   }
04864   if (Val >= (int)NumElts)
04865     Val -= NumElts - NumLaneElts;
04866 
04867   assert(Val - i > 0 && "PALIGNR imm should be positive");
04868   return (Val - i) * EltSize;
04869 }
04870 
04871 /// \brief Return the appropriate immediate to shuffle the specified
04872 /// VECTOR_SHUFFLE mask with the PALIGNR instruction.
04873 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
04874   return getShuffleAlignrImmediate(SVOp, false);
04875 }
04876 
04877 /// \brief Return the appropriate immediate to shuffle the specified
04878 /// VECTOR_SHUFFLE mask with the VALIGN instruction.
04879 static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
04880   return getShuffleAlignrImmediate(SVOp, true);
04881 }
04882 
04883 
04884 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
04885   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04886   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04887     llvm_unreachable("Illegal extract subvector for VEXTRACT");
04888 
04889   uint64_t Index =
04890     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04891 
04892   MVT VecVT = N->getOperand(0).getSimpleValueType();
04893   MVT ElVT = VecVT.getVectorElementType();
04894 
04895   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04896   return Index / NumElemsPerChunk;
04897 }
04898 
04899 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
04900   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04901   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04902     llvm_unreachable("Illegal insert subvector for VINSERT");
04903 
04904   uint64_t Index =
04905     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04906 
04907   MVT VecVT = N->getSimpleValueType(0);
04908   MVT ElVT = VecVT.getVectorElementType();
04909 
04910   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04911   return Index / NumElemsPerChunk;
04912 }
04913 
04914 /// getExtractVEXTRACT128Immediate - Return the appropriate immediate
04915 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
04916 /// and VINSERTI128 instructions.
04917 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
04918   return getExtractVEXTRACTImmediate(N, 128);
04919 }
04920 
04921 /// getExtractVEXTRACT256Immediate - Return the appropriate immediate
04922 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
04923 /// and VINSERTI64x4 instructions.
04924 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
04925   return getExtractVEXTRACTImmediate(N, 256);
04926 }
04927 
04928 /// getInsertVINSERT128Immediate - Return the appropriate immediate
04929 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
04930 /// and VINSERTI128 instructions.
04931 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
04932   return getInsertVINSERTImmediate(N, 128);
04933 }
04934 
04935 /// getInsertVINSERT256Immediate - Return the appropriate immediate
04936 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
04937 /// and VINSERTI64x4 instructions.
04938 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
04939   return getInsertVINSERTImmediate(N, 256);
04940 }
04941 
04942 /// isZero - Returns true if Elt is a constant integer zero
04943 static bool isZero(SDValue V) {
04944   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
04945   return C && C->isNullValue();
04946 }
04947 
04948 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
04949 /// constant +0.0.
04950 bool X86::isZeroNode(SDValue Elt) {
04951   if (isZero(Elt))
04952     return true;
04953   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
04954     return CFP->getValueAPF().isPosZero();
04955   return false;
04956 }
04957 
04958 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
04959 /// match movhlps. The lower half elements should come from upper half of
04960 /// V1 (and in order), and the upper half elements should come from the upper
04961 /// half of V2 (and in order).
04962 static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
04963   if (!VT.is128BitVector())
04964     return false;
04965   if (VT.getVectorNumElements() != 4)
04966     return false;
04967   for (unsigned i = 0, e = 2; i != e; ++i)
04968     if (!isUndefOrEqual(Mask[i], i+2))
04969       return false;
04970   for (unsigned i = 2; i != 4; ++i)
04971     if (!isUndefOrEqual(Mask[i], i+4))
04972       return false;
04973   return true;
04974 }
04975 
04976 /// isScalarLoadToVector - Returns true if the node is a scalar load that
04977 /// is promoted to a vector. It also returns the LoadSDNode by reference if
04978 /// required.
04979 static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
04980   if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
04981     return false;
04982   N = N->getOperand(0).getNode();
04983   if (!ISD::isNON_EXTLoad(N))
04984     return false;
04985   if (LD)
04986     *LD = cast<LoadSDNode>(N);
04987   return true;
04988 }
04989 
04990 // Test whether the given value is a vector value which will be legalized
04991 // into a load.
04992 static bool WillBeConstantPoolLoad(SDNode *N) {
04993   if (N->getOpcode() != ISD::BUILD_VECTOR)
04994     return false;
04995 
04996   // Check for any non-constant elements.
04997   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
04998     switch (N->getOperand(i).getNode()->getOpcode()) {
04999     case ISD::UNDEF:
05000     case ISD::ConstantFP:
05001     case ISD::Constant:
05002       break;
05003     default:
05004       return false;
05005     }
05006 
05007   // Vectors of all-zeros and all-ones are materialized with special
05008   // instructions rather than being loaded.
05009   return !ISD::isBuildVectorAllZeros(N) &&
05010          !ISD::isBuildVectorAllOnes(N);
05011 }
05012 
05013 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
05014 /// match movlp{s|d}. The lower half elements should come from lower half of
05015 /// V1 (and in order), and the upper half elements should come from the upper
05016 /// half of V2 (and in order). And since V1 will become the source of the
05017 /// MOVLP, it must be either a vector load or a scalar load to vector.
05018 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
05019                                ArrayRef<int> Mask, MVT VT) {
05020   if (!VT.is128BitVector())
05021     return false;
05022 
05023   if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
05024     return false;
05025   // Is V2 is a vector load, don't do this transformation. We will try to use
05026   // load folding shufps op.
05027   if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
05028     return false;
05029 
05030   unsigned NumElems = VT.getVectorNumElements();
05031 
05032   if (NumElems != 2 && NumElems != 4)
05033     return false;
05034   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
05035     if (!isUndefOrEqual(Mask[i], i))
05036       return false;
05037   for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
05038     if (!isUndefOrEqual(Mask[i], i+NumElems))
05039       return false;
05040   return true;
05041 }
05042 
05043 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
05044 /// to an zero vector.
05045 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
05046 static bool isZeroShuffle(ShuffleVectorSDNode *N) {
05047   SDValue V1 = N->getOperand(0);
05048   SDValue V2 = N->getOperand(1);
05049   unsigned NumElems = N->getValueType(0).getVectorNumElements();
05050   for (unsigned i = 0; i != NumElems; ++i) {
05051     int Idx = N->getMaskElt(i);
05052     if (Idx >= (int)NumElems) {
05053       unsigned Opc = V2.getOpcode();
05054       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
05055         continue;
05056       if (Opc != ISD::BUILD_VECTOR ||
05057           !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
05058         return false;
05059     } else if (Idx >= 0) {
05060       unsigned Opc = V1.getOpcode();
05061       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
05062         continue;
05063       if (Opc != ISD::BUILD_VECTOR ||
05064           !X86::isZeroNode(V1.getOperand(Idx)))
05065         return false;
05066     }
05067   }
05068   return true;
05069 }
05070 
05071 /// getZeroVector - Returns a vector of specified type with all zero elements.
05072 ///
05073 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
05074                              SelectionDAG &DAG, SDLoc dl) {
05075   assert(VT.isVector() && "Expected a vector type");
05076 
05077   // Always build SSE zero vectors as <4 x i32> bitcasted
05078   // to their dest type. This ensures they get CSE'd.
05079   SDValue Vec;
05080   if (VT.is128BitVector()) {  // SSE
05081     if (Subtarget->hasSSE2()) {  // SSE2
05082       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05083       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05084     } else { // SSE1
05085       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
05086       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
05087     }
05088   } else if (VT.is256BitVector()) { // AVX
05089     if (Subtarget->hasInt256()) { // AVX2
05090       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05091       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05092       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
05093     } else {
05094       // 256-bit logic and arithmetic instructions in AVX are all
05095       // floating-point, no support for integer ops. Emit fp zeroed vectors.
05096       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
05097       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05098       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
05099     }
05100   } else if (VT.is512BitVector()) { // AVX-512
05101       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
05102       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
05103                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05104       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
05105   } else if (VT.getScalarType() == MVT::i1) {
05106     assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
05107     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
05108     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05109     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05110   } else
05111     llvm_unreachable("Unexpected vector type");
05112 
05113   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
05114 }
05115 
05116 /// getOnesVector - Returns a vector of specified type with all bits set.
05117 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
05118 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
05119 /// Then bitcast to their original type, ensuring they get CSE'd.
05120 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
05121                              SDLoc dl) {
05122   assert(VT.isVector() && "Expected a vector type");
05123 
05124   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
05125   SDValue Vec;
05126   if (VT.is256BitVector()) {
05127     if (HasInt256) { // AVX2
05128       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
05129       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
05130     } else { // AVX
05131       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05132       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
05133     }
05134   } else if (VT.is128BitVector()) {
05135     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
05136   } else
05137     llvm_unreachable("Unexpected vector type");
05138 
05139   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
05140 }
05141 
05142 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
05143 /// that point to V2 points to its first element.
05144 static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
05145   for (unsigned i = 0; i != NumElems; ++i) {
05146     if (Mask[i] > (int)NumElems) {
05147       Mask[i] = NumElems;
05148     }
05149   }
05150 }
05151 
05152 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
05153 /// operation of specified width.
05154 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
05155                        SDValue V2) {
05156   unsigned NumElems = VT.getVectorNumElements();
05157   SmallVector<int, 8> Mask;
05158   Mask.push_back(NumElems);
05159   for (unsigned i = 1; i != NumElems; ++i)
05160     Mask.push_back(i);
05161   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05162 }
05163 
05164 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
05165 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05166                           SDValue V2) {
05167   unsigned NumElems = VT.getVectorNumElements();
05168   SmallVector<int, 8> Mask;
05169   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
05170     Mask.push_back(i);
05171     Mask.push_back(i + NumElems);
05172   }
05173   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05174 }
05175 
05176 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
05177 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
05178                           SDValue V2) {
05179   unsigned NumElems = VT.getVectorNumElements();
05180   SmallVector<int, 8> Mask;
05181   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
05182     Mask.push_back(i + Half);
05183     Mask.push_back(i + NumElems + Half);
05184   }
05185   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
05186 }
05187 
05188 // PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
05189 // a generic shuffle instruction because the target has no such instructions.
05190 // Generate shuffles which repeat i16 and i8 several times until they can be
05191 // represented by v4f32 and then be manipulated by target suported shuffles.
05192 static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
05193   MVT VT = V.getSimpleValueType();
05194   int NumElems = VT.getVectorNumElements();
05195   SDLoc dl(V);
05196 
05197   while (NumElems > 4) {
05198     if (EltNo < NumElems/2) {
05199       V = getUnpackl(DAG, dl, VT, V, V);
05200     } else {
05201       V = getUnpackh(DAG, dl, VT, V, V);
05202       EltNo -= NumElems/2;
05203     }
05204     NumElems >>= 1;
05205   }
05206   return V;
05207 }
05208 
05209 /// getLegalSplat - Generate a legal splat with supported x86 shuffles
05210 static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
05211   MVT VT = V.getSimpleValueType();
05212   SDLoc dl(V);
05213 
05214   if (VT.is128BitVector()) {
05215     V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
05216     int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
05217     V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
05218                              &SplatMask[0]);
05219   } else if (VT.is256BitVector()) {
05220     // To use VPERMILPS to splat scalars, the second half of indicies must
05221     // refer to the higher part, which is a duplication of the lower one,
05222     // because VPERMILPS can only handle in-lane permutations.
05223     int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
05224                          EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
05225 
05226     V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
05227     V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
05228                              &SplatMask[0]);
05229   } else
05230     llvm_unreachable("Vector size not supported");
05231 
05232   return DAG.getNode(ISD::BITCAST, dl, VT, V);
05233 }
05234 
05235 /// PromoteSplat - Splat is promoted to target supported vector shuffles.
05236 static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
05237   MVT SrcVT = SV->getSimpleValueType(0);
05238   SDValue V1 = SV->getOperand(0);
05239   SDLoc dl(SV);
05240 
05241   int EltNo = SV->getSplatIndex();
05242   int NumElems = SrcVT.getVectorNumElements();
05243   bool Is256BitVec = SrcVT.is256BitVector();
05244 
05245   assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
05246          "Unknown how to promote splat for type");
05247 
05248   // Extract the 128-bit part containing the splat element and update
05249   // the splat element index when it refers to the higher register.
05250   if (Is256BitVec) {
05251     V1 = Extract128BitVector(V1, EltNo, DAG, dl);
05252     if (EltNo >= NumElems/2)
05253       EltNo -= NumElems/2;
05254   }
05255 
05256   // All i16 and i8 vector types can't be used directly by a generic shuffle
05257   // instruction because the target has no such instruction. Generate shuffles
05258   // which repeat i16 and i8 several times until they fit in i32, and then can
05259   // be manipulated by target suported shuffles.
05260   MVT EltVT = SrcVT.getVectorElementType();
05261   if (EltVT == MVT::i8 || EltVT == MVT::i16)
05262     V1 = PromoteSplati8i16(V1, DAG, EltNo);
05263 
05264   // Recreate the 256-bit vector and place the same 128-bit vector
05265   // into the low and high part. This is necessary because we want
05266   // to use VPERM* to shuffle the vectors
05267   if (Is256BitVec) {
05268     V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
05269   }
05270 
05271   return getLegalSplat(DAG, V1, EltNo);
05272 }
05273 
05274 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
05275 /// vector of zero or undef vector.  This produces a shuffle where the low
05276 /// element of V2 is swizzled into the zero/undef vector, landing at element
05277 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
05278 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
05279                                            bool IsZero,
05280                                            const X86Subtarget *Subtarget,
05281                                            SelectionDAG &DAG) {
05282   MVT VT = V2.getSimpleValueType();
05283   SDValue V1 = IsZero
05284     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
05285   unsigned NumElems = VT.getVectorNumElements();
05286   SmallVector<int, 16> MaskVec;
05287   for (unsigned i = 0; i != NumElems; ++i)
05288     // If this is the insertion idx, put the low elt of V2 here.
05289     MaskVec.push_back(i == Idx ? NumElems : i);
05290   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
05291 }
05292 
05293 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
05294 /// target specific opcode. Returns true if the Mask could be calculated. Sets
05295 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
05296 /// shuffles which use a single input multiple times, and in those cases it will
05297 /// adjust the mask to only have indices within that single input.
05298 static bool getTargetShuffleMask(SDNode *N, MVT VT,
05299                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
05300   unsigned NumElems = VT.getVectorNumElements();
05301   SDValue ImmN;
05302 
05303   IsUnary = false;
05304   bool IsFakeUnary = false;
05305   switch(N->getOpcode()) {
05306   case X86ISD::SHUFP:
05307     ImmN = N->getOperand(N->getNumOperands()-1);
05308     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05309     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05310     break;
05311   case X86ISD::UNPCKH:
05312     DecodeUNPCKHMask(VT, Mask);
05313     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05314     break;
05315   case X86ISD::UNPCKL:
05316     DecodeUNPCKLMask(VT, Mask);
05317     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05318     break;
05319   case X86ISD::MOVHLPS:
05320     DecodeMOVHLPSMask(NumElems, Mask);
05321     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05322     break;
05323   case X86ISD::MOVLHPS:
05324     DecodeMOVLHPSMask(NumElems, Mask);
05325     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
05326     break;
05327   case X86ISD::PALIGNR:
05328     ImmN = N->getOperand(N->getNumOperands()-1);
05329     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05330     break;
05331   case X86ISD::PSHUFD:
05332   case X86ISD::VPERMILP:
05333     ImmN = N->getOperand(N->getNumOperands()-1);
05334     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05335     IsUnary = true;
05336     break;
05337   case X86ISD::PSHUFHW:
05338     ImmN = N->getOperand(N->getNumOperands()-1);
05339     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05340     IsUnary = true;
05341     break;
05342   case X86ISD::PSHUFLW:
05343     ImmN = N->getOperand(N->getNumOperands()-1);
05344     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05345     IsUnary = true;
05346     break;
05347   case X86ISD::PSHUFB: {
05348     IsUnary = true;
05349     SDValue MaskNode = N->getOperand(1);
05350     while (MaskNode->getOpcode() == ISD::BITCAST)
05351       MaskNode = MaskNode->getOperand(0);
05352 
05353     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
05354       // If we have a build-vector, then things are easy.
05355       EVT VT = MaskNode.getValueType();
05356       assert(VT.isVector() &&
05357              "Can't produce a non-vector with a build_vector!");
05358       if (!VT.isInteger())
05359         return false;
05360 
05361       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
05362 
05363       SmallVector<uint64_t, 32> RawMask;
05364       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
05365         auto *CN = dyn_cast<ConstantSDNode>(MaskNode->getOperand(i));
05366         if (!CN)
05367           return false;
05368         APInt MaskElement = CN->getAPIntValue();
05369 
05370         // We now have to decode the element which could be any integer size and
05371         // extract each byte of it.
05372         for (int j = 0; j < NumBytesPerElement; ++j) {
05373           // Note that this is x86 and so always little endian: the low byte is
05374           // the first byte of the mask.
05375           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
05376           MaskElement = MaskElement.lshr(8);
05377         }
05378       }
05379       DecodePSHUFBMask(RawMask, Mask);
05380       break;
05381     }
05382 
05383     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
05384     if (!MaskLoad)
05385       return false;
05386 
05387     SDValue Ptr = MaskLoad->getBasePtr();
05388     if (Ptr->getOpcode() == X86ISD::Wrapper)
05389       Ptr = Ptr->getOperand(0);
05390 
05391     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
05392     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
05393       return false;
05394 
05395     if (auto *C = dyn_cast<ConstantDataSequential>(MaskCP->getConstVal())) {
05396       // FIXME: Support AVX-512 here.
05397       if (!C->getType()->isVectorTy() ||
05398           (C->getNumElements() != 16 && C->getNumElements() != 32))
05399         return false;
05400 
05401       assert(C->getType()->isVectorTy() && "Expected a vector constant.");
05402       DecodePSHUFBMask(C, Mask);
05403       break;
05404     }
05405 
05406     return false;
05407   }
05408   case X86ISD::VPERMI:
05409     ImmN = N->getOperand(N->getNumOperands()-1);
05410     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05411     IsUnary = true;
05412     break;
05413   case X86ISD::MOVSS:
05414   case X86ISD::MOVSD: {
05415     // The index 0 always comes from the first element of the second source,
05416     // this is why MOVSS and MOVSD are used in the first place. The other
05417     // elements come from the other positions of the first source vector
05418     Mask.push_back(NumElems);
05419     for (unsigned i = 1; i != NumElems; ++i) {
05420       Mask.push_back(i);
05421     }
05422     break;
05423   }
05424   case X86ISD::VPERM2X128:
05425     ImmN = N->getOperand(N->getNumOperands()-1);
05426     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
05427     if (Mask.empty()) return false;
05428     break;
05429   case X86ISD::MOVDDUP:
05430   case X86ISD::MOVLHPD:
05431   case X86ISD::MOVLPD:
05432   case X86ISD::MOVLPS:
05433   case X86ISD::MOVSHDUP:
05434   case X86ISD::MOVSLDUP:
05435     // Not yet implemented
05436     return false;
05437   default: llvm_unreachable("unknown target shuffle node");
05438   }
05439 
05440   // If we have a fake unary shuffle, the shuffle mask is spread across two
05441   // inputs that are actually the same node. Re-map the mask to always point
05442   // into the first input.
05443   if (IsFakeUnary)
05444     for (int &M : Mask)
05445       if (M >= (int)Mask.size())
05446         M -= Mask.size();
05447 
05448   return true;
05449 }
05450 
05451 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
05452 /// element of the result of the vector shuffle.
05453 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
05454                                    unsigned Depth) {
05455   if (Depth == 6)
05456     return SDValue();  // Limit search depth.
05457 
05458   SDValue V = SDValue(N, 0);
05459   EVT VT = V.getValueType();
05460   unsigned Opcode = V.getOpcode();
05461 
05462   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
05463   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
05464     int Elt = SV->getMaskElt(Index);
05465 
05466     if (Elt < 0)
05467       return DAG.getUNDEF(VT.getVectorElementType());
05468 
05469     unsigned NumElems = VT.getVectorNumElements();
05470     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
05471                                          : SV->getOperand(1);
05472     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
05473   }
05474 
05475   // Recurse into target specific vector shuffles to find scalars.
05476   if (isTargetShuffle(Opcode)) {
05477     MVT ShufVT = V.getSimpleValueType();
05478     unsigned NumElems = ShufVT.getVectorNumElements();
05479     SmallVector<int, 16> ShuffleMask;
05480     bool IsUnary;
05481 
05482     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
05483       return SDValue();
05484 
05485     int Elt = ShuffleMask[Index];
05486     if (Elt < 0)
05487       return DAG.getUNDEF(ShufVT.getVectorElementType());
05488 
05489     SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
05490                                          : N->getOperand(1);
05491     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
05492                                Depth+1);
05493   }
05494 
05495   // Actual nodes that may contain scalar elements
05496   if (Opcode == ISD::BITCAST) {
05497     V = V.getOperand(0);
05498     EVT SrcVT = V.getValueType();
05499     unsigned NumElems = VT.getVectorNumElements();
05500 
05501     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
05502       return SDValue();
05503   }
05504 
05505   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
05506     return (Index == 0) ? V.getOperand(0)
05507                         : DAG.getUNDEF(VT.getVectorElementType());
05508 
05509   if (V.getOpcode() == ISD::BUILD_VECTOR)
05510     return V.getOperand(Index);
05511 
05512   return SDValue();
05513 }
05514 
05515 /// getNumOfConsecutiveZeros - Return the number of elements of a vector
05516 /// shuffle operation which come from a consecutively from a zero. The
05517 /// search can start in two different directions, from left or right.
05518 /// We count undefs as zeros until PreferredNum is reached.
05519 static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
05520                                          unsigned NumElems, bool ZerosFromLeft,
05521                                          SelectionDAG &DAG,
05522                                          unsigned PreferredNum = -1U) {
05523   unsigned NumZeros = 0;
05524   for (unsigned i = 0; i != NumElems; ++i) {
05525     unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
05526     SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
05527     if (!Elt.getNode())
05528       break;
05529 
05530     if (X86::isZeroNode(Elt))
05531       ++NumZeros;
05532     else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
05533       NumZeros = std::min(NumZeros + 1, PreferredNum);
05534     else
05535       break;
05536   }
05537 
05538   return NumZeros;
05539 }
05540 
05541 /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
05542 /// correspond consecutively to elements from one of the vector operands,
05543 /// starting from its index OpIdx. Also tell OpNum which source vector operand.
05544 static
05545 bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
05546                               unsigned MaskI, unsigned MaskE, unsigned OpIdx,
05547                               unsigned NumElems, unsigned &OpNum) {
05548   bool SeenV1 = false;
05549   bool SeenV2 = false;
05550 
05551   for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
05552     int Idx = SVOp->getMaskElt(i);
05553     // Ignore undef indicies
05554     if (Idx < 0)
05555       continue;
05556 
05557     if (Idx < (int)NumElems)
05558       SeenV1 = true;
05559     else
05560       SeenV2 = true;
05561 
05562     // Only accept consecutive elements from the same vector
05563     if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
05564       return false;
05565   }
05566 
05567   OpNum = SeenV1 ? 0 : 1;
05568   return true;
05569 }
05570 
05571 /// isVectorShiftRight - Returns true if the shuffle can be implemented as a
05572 /// logical left shift of a vector.
05573 static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05574                                bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05575   unsigned NumElems =
05576     SVOp->getSimpleValueType(0).getVectorNumElements();
05577   unsigned NumZeros = getNumOfConsecutiveZeros(
05578       SVOp, NumElems, false /* check zeros from right */, DAG,
05579       SVOp->getMaskElt(0));
05580   unsigned OpSrc;
05581 
05582   if (!NumZeros)
05583     return false;
05584 
05585   // Considering the elements in the mask that are not consecutive zeros,
05586   // check if they consecutively come from only one of the source vectors.
05587   //
05588   //               V1 = {X, A, B, C}     0
05589   //                         \  \  \    /
05590   //   vector_shuffle V1, V2 <1, 2, 3, X>
05591   //
05592   if (!isShuffleMaskConsecutive(SVOp,
05593             0,                   // Mask Start Index
05594             NumElems-NumZeros,   // Mask End Index(exclusive)
05595             NumZeros,            // Where to start looking in the src vector
05596             NumElems,            // Number of elements in vector
05597             OpSrc))              // Which source operand ?
05598     return false;
05599 
05600   isLeft = false;
05601   ShAmt = NumZeros;
05602   ShVal = SVOp->getOperand(OpSrc);
05603   return true;
05604 }
05605 
05606 /// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
05607 /// logical left shift of a vector.
05608 static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05609                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05610   unsigned NumElems =
05611     SVOp->getSimpleValueType(0).getVectorNumElements();
05612   unsigned NumZeros = getNumOfConsecutiveZeros(
05613       SVOp, NumElems, true /* check zeros from left */, DAG,
05614       NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
05615   unsigned OpSrc;
05616 
05617   if (!NumZeros)
05618     return false;
05619 
05620   // Considering the elements in the mask that are not consecutive zeros,
05621   // check if they consecutively come from only one of the source vectors.
05622   //
05623   //                           0    { A, B, X, X } = V2
05624   //                          / \    /  /
05625   //   vector_shuffle V1, V2 <X, X, 4, 5>
05626   //
05627   if (!isShuffleMaskConsecutive(SVOp,
05628             NumZeros,     // Mask Start Index
05629             NumElems,     // Mask End Index(exclusive)
05630             0,            // Where to start looking in the src vector
05631             NumElems,     // Number of elements in vector
05632             OpSrc))       // Which source operand ?
05633     return false;
05634 
05635   isLeft = true;
05636   ShAmt = NumZeros;
05637   ShVal = SVOp->getOperand(OpSrc);
05638   return true;
05639 }
05640 
05641 /// isVectorShift - Returns true if the shuffle can be implemented as a
05642 /// logical left or right shift of a vector.
05643 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
05644                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
05645   // Although the logic below support any bitwidth size, there are no
05646   // shift instructions which handle more than 128-bit vectors.
05647   if (!SVOp->getSimpleValueType(0).is128BitVector())
05648     return false;
05649 
05650   if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
05651       isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
05652     return true;
05653 
05654   return false;
05655 }
05656 
05657 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
05658 ///
05659 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
05660                                        unsigned NumNonZero, unsigned NumZero,
05661                                        SelectionDAG &DAG,
05662                                        const X86Subtarget* Subtarget,
05663                                        const TargetLowering &TLI) {
05664   if (NumNonZero > 8)
05665     return SDValue();
05666 
05667   SDLoc dl(Op);
05668   SDValue V;
05669   bool First = true;
05670   for (unsigned i = 0; i < 16; ++i) {
05671     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
05672     if (ThisIsNonZero && First) {
05673       if (NumZero)
05674         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05675       else
05676         V = DAG.getUNDEF(MVT::v8i16);
05677       First = false;
05678     }
05679 
05680     if ((i & 1) != 0) {
05681       SDValue ThisElt, LastElt;
05682       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
05683       if (LastIsNonZero) {
05684         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
05685                               MVT::i16, Op.getOperand(i-1));
05686       }
05687       if (ThisIsNonZero) {
05688         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
05689         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
05690                               ThisElt, DAG.getConstant(8, MVT::i8));
05691         if (LastIsNonZero)
05692           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
05693       } else
05694         ThisElt = LastElt;
05695 
05696       if (ThisElt.getNode())
05697         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
05698                         DAG.getIntPtrConstant(i/2));
05699     }
05700   }
05701 
05702   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
05703 }
05704 
05705 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
05706 ///
05707 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
05708                                      unsigned NumNonZero, unsigned NumZero,
05709                                      SelectionDAG &DAG,
05710                                      const X86Subtarget* Subtarget,
05711                                      const TargetLowering &TLI) {
05712   if (NumNonZero > 4)
05713     return SDValue();
05714 
05715   SDLoc dl(Op);
05716   SDValue V;
05717   bool First = true;
05718   for (unsigned i = 0; i < 8; ++i) {
05719     bool isNonZero = (NonZeros & (1 << i)) != 0;
05720     if (isNonZero) {
05721       if (First) {
05722         if (NumZero)
05723           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05724         else
05725           V = DAG.getUNDEF(MVT::v8i16);
05726         First = false;
05727       }
05728       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
05729                       MVT::v8i16, V, Op.getOperand(i),
05730                       DAG.getIntPtrConstant(i));
05731     }
05732   }
05733 
05734   return V;
05735 }
05736 
05737 /// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
05738 static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
05739                                      unsigned NonZeros, unsigned NumNonZero,
05740                                      unsigned NumZero, SelectionDAG &DAG,
05741                                      const X86Subtarget *Subtarget,
05742                                      const TargetLowering &TLI) {
05743   // We know there's at least one non-zero element
05744   unsigned FirstNonZeroIdx = 0;
05745   SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05746   while (FirstNonZero.getOpcode() == ISD::UNDEF ||
05747          X86::isZeroNode(FirstNonZero)) {
05748     ++FirstNonZeroIdx;
05749     FirstNonZero = Op->getOperand(FirstNonZeroIdx);
05750   }
05751 
05752   if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05753       !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
05754     return SDValue();
05755 
05756   SDValue V = FirstNonZero.getOperand(0);
05757   MVT VVT = V.getSimpleValueType();
05758   if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
05759     return SDValue();
05760 
05761   unsigned FirstNonZeroDst =
05762       cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
05763   unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
05764   unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
05765   unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
05766 
05767   for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
05768     SDValue Elem = Op.getOperand(Idx);
05769     if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
05770       continue;
05771 
05772     // TODO: What else can be here? Deal with it.
05773     if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
05774       return SDValue();
05775 
05776     // TODO: Some optimizations are still possible here
05777     // ex: Getting one element from a vector, and the rest from another.
05778     if (Elem.getOperand(0) != V)
05779       return SDValue();
05780 
05781     unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
05782     if (Dst == Idx)
05783       ++CorrectIdx;
05784     else if (IncorrectIdx == -1U) {
05785       IncorrectIdx = Idx;
05786       IncorrectDst = Dst;
05787     } else
05788       // There was already one element with an incorrect index.
05789       // We can't optimize this case to an insertps.
05790       return SDValue();
05791   }
05792 
05793   if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
05794     SDLoc dl(Op);
05795     EVT VT = Op.getSimpleValueType();
05796     unsigned ElementMoveMask = 0;
05797     if (IncorrectIdx == -1U)
05798       ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
05799     else
05800       ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
05801 
05802     SDValue InsertpsMask =
05803         DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
05804     return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
05805   }
05806 
05807   return SDValue();
05808 }
05809 
05810 /// getVShift - Return a vector logical shift node.
05811 ///
05812 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
05813                          unsigned NumBits, SelectionDAG &DAG,
05814                          const TargetLowering &TLI, SDLoc dl) {
05815   assert(VT.is128BitVector() && "Unknown type for VShift");
05816   EVT ShVT = MVT::v2i64;
05817   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
05818   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
05819   return DAG.getNode(ISD::BITCAST, dl, VT,
05820                      DAG.getNode(Opc, dl, ShVT, SrcOp,
05821                              DAG.getConstant(NumBits,
05822                                   TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
05823 }
05824 
05825 static SDValue
05826 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
05827 
05828   // Check if the scalar load can be widened into a vector load. And if
05829   // the address is "base + cst" see if the cst can be "absorbed" into
05830   // the shuffle mask.
05831   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
05832     SDValue Ptr = LD->getBasePtr();
05833     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
05834       return SDValue();
05835     EVT PVT = LD->getValueType(0);
05836     if (PVT != MVT::i32 && PVT != MVT::f32)
05837       return SDValue();
05838 
05839     int FI = -1;
05840     int64_t Offset = 0;
05841     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
05842       FI = FINode->getIndex();
05843       Offset = 0;
05844     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
05845                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
05846       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
05847       Offset = Ptr.getConstantOperandVal(1);
05848       Ptr = Ptr.getOperand(0);
05849     } else {
05850       return SDValue();
05851     }
05852 
05853     // FIXME: 256-bit vector instructions don't require a strict alignment,
05854     // improve this code to support it better.
05855     unsigned RequiredAlign = VT.getSizeInBits()/8;
05856     SDValue Chain = LD->getChain();
05857     // Make sure the stack object alignment is at least 16 or 32.
05858     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
05859     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
05860       if (MFI->isFixedObjectIndex(FI)) {
05861         // Can't change the alignment. FIXME: It's possible to compute
05862         // the exact stack offset and reference FI + adjust offset instead.
05863         // If someone *really* cares about this. That's the way to implement it.
05864         return SDValue();
05865       } else {
05866         MFI->setObjectAlignment(FI, RequiredAlign);
05867       }
05868     }
05869 
05870     // (Offset % 16 or 32) must be multiple of 4. Then address is then
05871     // Ptr + (Offset & ~15).
05872     if (Offset < 0)
05873       return SDValue();
05874     if ((Offset % RequiredAlign) & 3)
05875       return SDValue();
05876     int64_t StartOffset = Offset & ~(RequiredAlign-1);
05877     if (StartOffset)
05878       Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
05879                         Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
05880 
05881     int EltNo = (Offset - StartOffset) >> 2;
05882     unsigned NumElems = VT.getVectorNumElements();
05883 
05884     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
05885     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
05886                              LD->getPointerInfo().getWithOffset(StartOffset),
05887                              false, false, false, 0);
05888 
05889     SmallVector<int, 8> Mask;
05890     for (unsigned i = 0; i != NumElems; ++i)
05891       Mask.push_back(EltNo);
05892 
05893     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
05894   }
05895 
05896   return SDValue();
05897 }
05898 
05899 /// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
05900 /// vector of type 'VT', see if the elements can be replaced by a single large
05901 /// load which has the same value as a build_vector whose operands are 'elts'.
05902 ///
05903 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
05904 ///
05905 /// FIXME: we'd also like to handle the case where the last elements are zero
05906 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
05907 /// There's even a handy isZeroNode for that purpose.
05908 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
05909                                         SDLoc &DL, SelectionDAG &DAG,
05910                                         bool isAfterLegalize) {
05911   EVT EltVT = VT.getVectorElementType();
05912   unsigned NumElems = Elts.size();
05913 
05914   LoadSDNode *LDBase = nullptr;
05915   unsigned LastLoadedElt = -1U;
05916 
05917   // For each element in the initializer, see if we've found a load or an undef.
05918   // If we don't find an initial load element, or later load elements are
05919   // non-consecutive, bail out.
05920   for (unsigned i = 0; i < NumElems; ++i) {
05921     SDValue Elt = Elts[i];
05922 
05923     if (!Elt.getNode() ||
05924         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
05925       return SDValue();
05926     if (!LDBase) {
05927       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
05928         return SDValue();
05929       LDBase = cast<LoadSDNode>(Elt.getNode());
05930       LastLoadedElt = i;
05931       continue;
05932     }
05933     if (Elt.getOpcode() == ISD::UNDEF)
05934       continue;
05935 
05936     LoadSDNode *LD = cast<LoadSDNode>(Elt);
05937     if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
05938       return SDValue();
05939     LastLoadedElt = i;
05940   }
05941 
05942   // If we have found an entire vector of loads and undefs, then return a large
05943   // load of the entire vector width starting at the base pointer.  If we found
05944   // consecutive loads for the low half, generate a vzext_load node.
05945   if (LastLoadedElt == NumElems - 1) {
05946 
05947     if (isAfterLegalize &&
05948         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
05949       return SDValue();
05950 
05951     SDValue NewLd = SDValue();
05952 
05953     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
05954       NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05955                           LDBase->getPointerInfo(),
05956                           LDBase->isVolatile(), LDBase->isNonTemporal(),
05957                           LDBase->isInvariant(), 0);
05958     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05959                         LDBase->getPointerInfo(),
05960                         LDBase->isVolatile(), LDBase->isNonTemporal(),
05961                         LDBase->isInvariant(), LDBase->getAlignment());
05962 
05963     if (LDBase->hasAnyUseOfValue(1)) {
05964       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05965                                      SDValue(LDBase, 1),
05966                                      SDValue(NewLd.getNode(), 1));
05967       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05968       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05969                              SDValue(NewLd.getNode(), 1));
05970     }
05971 
05972     return NewLd;
05973   }
05974   if (NumElems == 4 && LastLoadedElt == 1 &&
05975       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
05976     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
05977     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
05978     SDValue ResNode =
05979         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
05980                                 LDBase->getPointerInfo(),
05981                                 LDBase->getAlignment(),
05982                                 false/*isVolatile*/, true/*ReadMem*/,
05983                                 false/*WriteMem*/);
05984 
05985     // Make sure the newly-created LOAD is in the same position as LDBase in
05986     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
05987     // update uses of LDBase's output chain to use the TokenFactor.
05988     if (LDBase->hasAnyUseOfValue(1)) {
05989       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05990                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
05991       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05992       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05993                              SDValue(ResNode.getNode(), 1));
05994     }
05995 
05996     return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
05997   }
05998   return SDValue();
05999 }
06000 
06001 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
06002 /// to generate a splat value for the following cases:
06003 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
06004 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
06005 /// a scalar load, or a constant.
06006 /// The VBROADCAST node is returned when a pattern is found,
06007 /// or SDValue() otherwise.
06008 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
06009                                     SelectionDAG &DAG) {
06010   if (!Subtarget->hasFp256())
06011     return SDValue();
06012 
06013   MVT VT = Op.getSimpleValueType();
06014   SDLoc dl(Op);
06015 
06016   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
06017          "Unsupported vector type for broadcast.");
06018 
06019   SDValue Ld;
06020   bool ConstSplatVal;
06021 
06022   switch (Op.getOpcode()) {
06023     default:
06024       // Unknown pattern found.
06025       return SDValue();
06026 
06027     case ISD::BUILD_VECTOR: {
06028       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
06029       BitVector UndefElements;
06030       SDValue Splat = BVOp->getSplatValue(&UndefElements);
06031 
06032       // We need a splat of a single value to use broadcast, and it doesn't
06033       // make any sense if the value is only in one element of the vector.
06034       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
06035         return SDValue();
06036 
06037       Ld = Splat;
06038       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
06039                        Ld.getOpcode() == ISD::ConstantFP);
06040 
06041       // Make sure that all of the users of a non-constant load are from the
06042       // BUILD_VECTOR node.
06043       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
06044         return SDValue();
06045       break;
06046     }
06047 
06048     case ISD::VECTOR_SHUFFLE: {
06049       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
06050 
06051       // Shuffles must have a splat mask where the first element is
06052       // broadcasted.
06053       if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
06054         return SDValue();
06055 
06056       SDValue Sc = Op.getOperand(0);
06057       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
06058           Sc.getOpcode() != ISD::BUILD_VECTOR) {
06059 
06060         if (!Subtarget->hasInt256())
06061           return SDValue();
06062 
06063         // Use the register form of the broadcast instruction available on AVX2.
06064         if (VT.getSizeInBits() >= 256)
06065           Sc = Extract128BitVector(Sc, 0, DAG, dl);
06066         return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
06067       }
06068 
06069       Ld = Sc.getOperand(0);
06070       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
06071                        Ld.getOpcode() == ISD::ConstantFP);
06072 
06073       // The scalar_to_vector node and the suspected
06074       // load node must have exactly one user.
06075       // Constants may have multiple users.
06076 
06077       // AVX-512 has register version of the broadcast
06078       bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
06079         Ld.getValueType().getSizeInBits() >= 32;
06080       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
06081           !hasRegVer))
06082         return SDValue();
06083       break;
06084     }
06085   }
06086 
06087   bool IsGE256 = (VT.getSizeInBits() >= 256);
06088 
06089   // Handle the broadcasting a single constant scalar from the constant pool
06090   // into a vector. On Sandybridge it is still better to load a constant vector
06091   // from the constant pool and not to broadcast it from a scalar.
06092   if (ConstSplatVal && Subtarget->hasInt256()) {
06093     EVT CVT = Ld.getValueType();
06094     assert(!CVT.isVector() && "Must not broadcast a vector type");
06095     unsigned ScalarSize = CVT.getSizeInBits();
06096 
06097     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
06098       const Constant *C = nullptr;
06099       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
06100         C = CI->getConstantIntValue();
06101       else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
06102         C = CF->getConstantFPValue();
06103 
06104       assert(C && "Invalid constant type");
06105 
06106       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06107       SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
06108       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
06109       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
06110                        MachinePointerInfo::getConstantPool(),
06111                        false, false, false, Alignment);
06112 
06113       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06114     }
06115   }
06116 
06117   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
06118   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
06119 
06120   // Handle AVX2 in-register broadcasts.
06121   if (!IsLoad && Subtarget->hasInt256() &&
06122       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
06123     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06124 
06125   // The scalar source must be a normal load.
06126   if (!IsLoad)
06127     return SDValue();
06128 
06129   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
06130     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06131 
06132   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
06133   // double since there is no vbroadcastsd xmm
06134   if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
06135     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
06136       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
06137   }
06138 
06139   // Unsupported broadcast.
06140   return SDValue();
06141 }
06142 
06143 /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
06144 /// underlying vector and index.
06145 ///
06146 /// Modifies \p ExtractedFromVec to the real vector and returns the real
06147 /// index.
06148 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
06149                                          SDValue ExtIdx) {
06150   int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
06151   if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
06152     return Idx;
06153 
06154   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
06155   // lowered this:
06156   //   (extract_vector_elt (v8f32 %vreg1), Constant<6>)
06157   // to:
06158   //   (extract_vector_elt (vector_shuffle<2,u,u,u>
06159   //                           (extract_subvector (v8f32 %vreg0), Constant<4>),
06160   //                           undef)
06161   //                       Constant<0>)
06162   // In this case the vector is the extract_subvector expression and the index
06163   // is 2, as specified by the shuffle.
06164   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
06165   SDValue ShuffleVec = SVOp->getOperand(0);
06166   MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
06167   assert(ShuffleVecVT.getVectorElementType() ==
06168          ExtractedFromVec.getSimpleValueType().getVectorElementType());
06169 
06170   int ShuffleIdx = SVOp->getMaskElt(Idx);
06171   if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
06172     ExtractedFromVec = ShuffleVec;
06173     return ShuffleIdx;
06174   }
06175   return Idx;
06176 }
06177 
06178 static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
06179   MVT VT = Op.getSimpleValueType();
06180 
06181   // Skip if insert_vec_elt is not supported.
06182   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06183   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
06184     return SDValue();
06185 
06186   SDLoc DL(Op);
06187   unsigned NumElems = Op.getNumOperands();
06188 
06189   SDValue VecIn1;
06190   SDValue VecIn2;
06191   SmallVector<unsigned, 4> InsertIndices;
06192   SmallVector<int, 8> Mask(NumElems, -1);
06193 
06194   for (unsigned i = 0; i != NumElems; ++i) {
06195     unsigned Opc = Op.getOperand(i).getOpcode();
06196 
06197     if (Opc == ISD::UNDEF)
06198       continue;
06199 
06200     if (Opc != ISD::EXTRACT_VECTOR_ELT) {
06201       // Quit if more than 1 elements need inserting.
06202       if (InsertIndices.size() > 1)
06203         return SDValue();
06204 
06205       InsertIndices.push_back(i);
06206       continue;
06207     }
06208 
06209     SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
06210     SDValue ExtIdx = Op.getOperand(i).getOperand(1);
06211     // Quit if non-constant index.
06212     if (!isa<ConstantSDNode>(ExtIdx))
06213       return SDValue();
06214     int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
06215 
06216     // Quit if extracted from vector of different type.
06217     if (ExtractedFromVec.getValueType() != VT)
06218       return SDValue();
06219 
06220     if (!VecIn1.getNode())
06221       VecIn1 = ExtractedFromVec;
06222     else if (VecIn1 != ExtractedFromVec) {
06223       if (!VecIn2.getNode())
06224         VecIn2 = ExtractedFromVec;
06225       else if (VecIn2 != ExtractedFromVec)
06226         // Quit if more than 2 vectors to shuffle
06227         return SDValue();
06228     }
06229 
06230     if (ExtractedFromVec == VecIn1)
06231       Mask[i] = Idx;
06232     else if (ExtractedFromVec == VecIn2)
06233       Mask[i] = Idx + NumElems;
06234   }
06235 
06236   if (!VecIn1.getNode())
06237     return SDValue();
06238 
06239   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
06240   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
06241   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
06242     unsigned Idx = InsertIndices[i];
06243     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
06244                      DAG.getIntPtrConstant(Idx));
06245   }
06246 
06247   return NV;
06248 }
06249 
06250 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
06251 SDValue
06252 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
06253 
06254   MVT VT = Op.getSimpleValueType();
06255   assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
06256          "Unexpected type in LowerBUILD_VECTORvXi1!");
06257 
06258   SDLoc dl(Op);
06259   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06260     SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
06261     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06262     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06263   }
06264 
06265   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
06266     SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
06267     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
06268     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
06269   }
06270 
06271   bool AllContants = true;
06272   uint64_t Immediate = 0;
06273   int NonConstIdx = -1;
06274   bool IsSplat = true;
06275   unsigned NumNonConsts = 0;
06276   unsigned NumConsts = 0;
06277   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
06278     SDValue In = Op.getOperand(idx);
06279     if (In.getOpcode() == ISD::UNDEF)
06280       continue;
06281     if (!isa<ConstantSDNode>(In)) {
06282       AllContants = false;
06283       NonConstIdx = idx;
06284       NumNonConsts++;
06285     }
06286     else {
06287       NumConsts++;
06288       if (cast<ConstantSDNode>(In)->getZExtValue())
06289       Immediate |= (1ULL << idx);
06290     }
06291     if (In != Op.getOperand(0))
06292       IsSplat = false;
06293   }
06294 
06295   if (AllContants) {
06296     SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
06297       DAG.getConstant(Immediate, MVT::i16));
06298     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
06299                        DAG.getIntPtrConstant(0));
06300   }
06301 
06302   if (NumNonConsts == 1 && NonConstIdx != 0) {
06303     SDValue DstVec;
06304     if (NumConsts) {
06305       SDValue VecAsImm = DAG.getConstant(Immediate,
06306                                          MVT::getIntegerVT(VT.getSizeInBits()));
06307       DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
06308     }
06309     else 
06310       DstVec = DAG.getUNDEF(VT);
06311     return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
06312                        Op.getOperand(NonConstIdx),
06313                        DAG.getIntPtrConstant(NonConstIdx));
06314   }
06315   if (!IsSplat && (NonConstIdx != 0))
06316     llvm_unreachable("Unsupported BUILD_VECTOR operation");
06317   MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
06318   SDValue Select;
06319   if (IsSplat)
06320     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06321                           DAG.getConstant(-1, SelectVT),
06322                           DAG.getConstant(0, SelectVT));
06323   else
06324     Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
06325                          DAG.getConstant((Immediate | 1), SelectVT),
06326                          DAG.getConstant(Immediate, SelectVT));
06327   return DAG.getNode(ISD::BITCAST, dl, VT, Select);
06328 }
06329 
06330 /// \brief Return true if \p N implements a horizontal binop and return the
06331 /// operands for the horizontal binop into V0 and V1.
06332 /// 
06333 /// This is a helper function of PerformBUILD_VECTORCombine.
06334 /// This function checks that the build_vector \p N in input implements a
06335 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
06336 /// operation to match.
06337 /// For example, if \p Opcode is equal to ISD::ADD, then this function
06338 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
06339 /// is equal to ISD::SUB, then this function checks if this is a horizontal
06340 /// arithmetic sub.
06341 ///
06342 /// This function only analyzes elements of \p N whose indices are
06343 /// in range [BaseIdx, LastIdx).
06344 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
06345                               SelectionDAG &DAG,
06346                               unsigned BaseIdx, unsigned LastIdx,
06347                               SDValue &V0, SDValue &V1) {
06348   EVT VT = N->getValueType(0);
06349 
06350   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
06351   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
06352          "Invalid Vector in input!");
06353   
06354   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
06355   bool CanFold = true;
06356   unsigned ExpectedVExtractIdx = BaseIdx;
06357   unsigned NumElts = LastIdx - BaseIdx;
06358   V0 = DAG.getUNDEF(VT);
06359   V1 = DAG.getUNDEF(VT);
06360 
06361   // Check if N implements a horizontal binop.
06362   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
06363     SDValue Op = N->getOperand(i + BaseIdx);
06364 
06365     // Skip UNDEFs.
06366     if (Op->getOpcode() == ISD::UNDEF) {
06367       // Update the expected vector extract index.
06368       if (i * 2 == NumElts)
06369         ExpectedVExtractIdx = BaseIdx;
06370       ExpectedVExtractIdx += 2;
06371       continue;
06372     }
06373 
06374     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
06375 
06376     if (!CanFold)
06377       break;
06378 
06379     SDValue Op0 = Op.getOperand(0);
06380     SDValue Op1 = Op.getOperand(1);
06381 
06382     // Try to match the following pattern:
06383     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
06384     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06385         Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
06386         Op0.getOperand(0) == Op1.getOperand(0) &&
06387         isa<ConstantSDNode>(Op0.getOperand(1)) &&
06388         isa<ConstantSDNode>(Op1.getOperand(1)));
06389     if (!CanFold)
06390       break;
06391 
06392     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06393     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
06394 
06395     if (i * 2 < NumElts) {
06396       if (V0.getOpcode() == ISD::UNDEF)
06397         V0 = Op0.getOperand(0);
06398     } else {
06399       if (V1.getOpcode() == ISD::UNDEF)
06400         V1 = Op0.getOperand(0);
06401       if (i * 2 == NumElts)
06402         ExpectedVExtractIdx = BaseIdx;
06403     }
06404 
06405     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
06406     if (I0 == ExpectedVExtractIdx)
06407       CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
06408     else if (IsCommutable && I1 == ExpectedVExtractIdx) {
06409       // Try to match the following dag sequence:
06410       // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
06411       CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
06412     } else
06413       CanFold = false;
06414 
06415     ExpectedVExtractIdx += 2;
06416   }
06417 
06418   return CanFold;
06419 }
06420 
06421 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
06422 /// a concat_vector. 
06423 ///
06424 /// This is a helper function of PerformBUILD_VECTORCombine.
06425 /// This function expects two 256-bit vectors called V0 and V1.
06426 /// At first, each vector is split into two separate 128-bit vectors.
06427 /// Then, the resulting 128-bit vectors are used to implement two
06428 /// horizontal binary operations. 
06429 ///
06430 /// The kind of horizontal binary operation is defined by \p X86Opcode.
06431 ///
06432 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
06433 /// the two new horizontal binop.
06434 /// When Mode is set, the first horizontal binop dag node would take as input
06435 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
06436 /// horizontal binop dag node would take as input the lower 128-bit of V1
06437 /// and the upper 128-bit of V1.
06438 ///   Example:
06439 ///     HADD V0_LO, V0_HI
06440 ///     HADD V1_LO, V1_HI
06441 ///
06442 /// Otherwise, the first horizontal binop dag node takes as input the lower
06443 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
06444 /// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
06445 ///   Example:
06446 ///     HADD V0_LO, V1_LO
06447 ///     HADD V0_HI, V1_HI
06448 ///
06449 /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
06450 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
06451 /// the upper 128-bits of the result.
06452 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
06453                                      SDLoc DL, SelectionDAG &DAG,
06454                                      unsigned X86Opcode, bool Mode,
06455                                      bool isUndefLO, bool isUndefHI) {
06456   EVT VT = V0.getValueType();
06457   assert(VT.is256BitVector() && VT == V1.getValueType() &&
06458          "Invalid nodes in input!");
06459 
06460   unsigned NumElts = VT.getVectorNumElements();
06461   SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
06462   SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
06463   SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
06464   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
06465   EVT NewVT = V0_LO.getValueType();
06466 
06467   SDValue LO = DAG.getUNDEF(NewVT);
06468   SDValue HI = DAG.getUNDEF(NewVT);
06469 
06470   if (Mode) {
06471     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06472     if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
06473       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
06474     if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
06475       HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
06476   } else {
06477     // Don't emit a horizontal binop if the result is expected to be UNDEF.
06478     if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
06479                        V1_LO->getOpcode() != ISD::UNDEF))
06480       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
06481 
06482     if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
06483                        V1_HI->getOpcode() != ISD::UNDEF))
06484       HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
06485   }
06486 
06487   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
06488 }
06489 
06490 /// \brief Try to fold a build_vector that performs an 'addsub' into the
06491 /// sequence of 'vadd + vsub + blendi'.
06492 static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
06493                            const X86Subtarget *Subtarget) {
06494   SDLoc DL(BV);
06495   EVT VT = BV->getValueType(0);
06496   unsigned NumElts = VT.getVectorNumElements();
06497   SDValue InVec0 = DAG.getUNDEF(VT);
06498   SDValue InVec1 = DAG.getUNDEF(VT);
06499 
06500   assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
06501           VT == MVT::v2f64) && "build_vector with an invalid type found!");
06502 
06503   // Don't try to emit a VSELECT that cannot be lowered into a blend.
06504   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
06505   if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
06506     return SDValue();
06507 
06508   // Odd-numbered elements in the input build vector are obtained from
06509   // adding two integer/float elements.
06510   // Even-numbered elements in the input build vector are obtained from
06511   // subtracting two integer/float elements.
06512   unsigned ExpectedOpcode = ISD::FSUB;
06513   unsigned NextExpectedOpcode = ISD::FADD;
06514   bool AddFound = false;
06515   bool SubFound = false;
06516 
06517   for (unsigned i = 0, e = NumElts; i != e; i++) {
06518     SDValue Op = BV->getOperand(i);
06519       
06520     // Skip 'undef' values.
06521     unsigned Opcode = Op.getOpcode();
06522     if (Opcode == ISD::UNDEF) {
06523       std::swap(ExpectedOpcode, NextExpectedOpcode);
06524       continue;
06525     }
06526       
06527     // Early exit if we found an unexpected opcode.
06528     if (Opcode != ExpectedOpcode)
06529       return SDValue();
06530 
06531     SDValue Op0 = Op.getOperand(0);
06532     SDValue Op1 = Op.getOperand(1);
06533 
06534     // Try to match the following pattern:
06535     // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
06536     // Early exit if we cannot match that sequence.
06537     if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06538         Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
06539         !isa<ConstantSDNode>(Op0.getOperand(1)) ||
06540         !isa<ConstantSDNode>(Op1.getOperand(1)) ||
06541         Op0.getOperand(1) != Op1.getOperand(1))
06542       return SDValue();
06543 
06544     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
06545     if (I0 != i)
06546       return SDValue();
06547 
06548     // We found a valid add/sub node. Update the information accordingly.
06549     if (i & 1)
06550       AddFound = true;
06551     else
06552       SubFound = true;
06553 
06554     // Update InVec0 and InVec1.
06555     if (InVec0.getOpcode() == ISD::UNDEF)
06556       InVec0 = Op0.getOperand(0);
06557     if (InVec1.getOpcode() == ISD::UNDEF)
06558       InVec1 = Op1.getOperand(0);
06559 
06560     // Make sure that operands in input to each add/sub node always
06561     // come from a same pair of vectors.
06562     if (InVec0 != Op0.getOperand(0)) {
06563       if (ExpectedOpcode == ISD::FSUB)
06564         return SDValue();
06565 
06566       // FADD is commutable. Try to commute the operands
06567       // and then test again.
06568       std::swap(Op0, Op1);
06569       if (InVec0 != Op0.getOperand(0))
06570         return SDValue();
06571     }
06572 
06573     if (InVec1 != Op1.getOperand(0))
06574       return SDValue();
06575 
06576     // Update the pair of expected opcodes.
06577     std::swap(ExpectedOpcode, NextExpectedOpcode);
06578   }
06579 
06580   // Don't try to fold this build_vector into a VSELECT if it has
06581   // too many UNDEF operands.
06582   if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
06583       InVec1.getOpcode() != ISD::UNDEF) {
06584     // Emit a sequence of vector add and sub followed by a VSELECT.
06585     // The new VSELECT will be lowered into a BLENDI.
06586     // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
06587     // and emit a single ADDSUB instruction.
06588     SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
06589     SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
06590 
06591     // Construct the VSELECT mask.
06592     EVT MaskVT = VT.changeVectorElementTypeToInteger();
06593     EVT SVT = MaskVT.getVectorElementType();
06594     unsigned SVTBits = SVT.getSizeInBits();
06595     SmallVector<SDValue, 8> Ops;
06596 
06597     for (unsigned i = 0, e = NumElts; i != e; ++i) {
06598       APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
06599                             APInt::getAllOnesValue(SVTBits);
06600       SDValue Constant = DAG.getConstant(Value, SVT);
06601       Ops.push_back(Constant);
06602     }
06603 
06604     SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
06605     return DAG.getSelect(DL, VT, Mask, Sub, Add);
06606   }
06607   
06608   return SDValue();
06609 }
06610 
06611 static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
06612                                           const X86Subtarget *Subtarget) {
06613   SDLoc DL(N);
06614   EVT VT = N->getValueType(0);
06615   unsigned NumElts = VT.getVectorNumElements();
06616   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
06617   SDValue InVec0, InVec1;
06618 
06619   // Try to match an ADDSUB.
06620   if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
06621       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
06622     SDValue Value = matchAddSub(BV, DAG, Subtarget);
06623     if (Value.getNode())
06624       return Value;
06625   }
06626 
06627   // Try to match horizontal ADD/SUB.
06628   unsigned NumUndefsLO = 0;
06629   unsigned NumUndefsHI = 0;
06630   unsigned Half = NumElts/2;
06631 
06632   // Count the number of UNDEF operands in the build_vector in input.
06633   for (unsigned i = 0, e = Half; i != e; ++i)
06634     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06635       NumUndefsLO++;
06636 
06637   for (unsigned i = Half, e = NumElts; i != e; ++i)
06638     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
06639       NumUndefsHI++;
06640 
06641   // Early exit if this is either a build_vector of all UNDEFs or all the
06642   // operands but one are UNDEF.
06643   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
06644     return SDValue();
06645 
06646   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
06647     // Try to match an SSE3 float HADD/HSUB.
06648     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06649       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06650     
06651     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06652       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06653   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
06654     // Try to match an SSSE3 integer HADD/HSUB.
06655     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06656       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
06657     
06658     if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06659       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
06660   }
06661   
06662   if (!Subtarget->hasAVX())
06663     return SDValue();
06664 
06665   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
06666     // Try to match an AVX horizontal add/sub of packed single/double
06667     // precision floating point values from 256-bit vectors.
06668     SDValue InVec2, InVec3;
06669     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
06670         isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
06671         ((InVec0.getOpcode() == ISD::UNDEF ||
06672           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06673         ((InVec1.getOpcode() == ISD::UNDEF ||
06674           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06675       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
06676 
06677     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
06678         isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
06679         ((InVec0.getOpcode() == ISD::UNDEF ||
06680           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06681         ((InVec1.getOpcode() == ISD::UNDEF ||
06682           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06683       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
06684   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
06685     // Try to match an AVX2 horizontal add/sub of signed integers.
06686     SDValue InVec2, InVec3;
06687     unsigned X86Opcode;
06688     bool CanFold = true;
06689 
06690     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
06691         isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
06692         ((InVec0.getOpcode() == ISD::UNDEF ||
06693           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06694         ((InVec1.getOpcode() == ISD::UNDEF ||
06695           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06696       X86Opcode = X86ISD::HADD;
06697     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
06698         isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
06699         ((InVec0.getOpcode() == ISD::UNDEF ||
06700           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
06701         ((InVec1.getOpcode() == ISD::UNDEF ||
06702           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
06703       X86Opcode = X86ISD::HSUB;
06704     else
06705       CanFold = false;
06706 
06707     if (CanFold) {
06708       // Fold this build_vector into a single horizontal add/sub.
06709       // Do this only if the target has AVX2.
06710       if (Subtarget->hasAVX2())
06711         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
06712  
06713       // Do not try to expand this build_vector into a pair of horizontal
06714       // add/sub if we can emit a pair of scalar add/sub.
06715       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06716         return SDValue();
06717 
06718       // Convert this build_vector into a pair of horizontal binop followed by
06719       // a concat vector.
06720       bool isUndefLO = NumUndefsLO == Half;
06721       bool isUndefHI = NumUndefsHI == Half;
06722       return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
06723                                    isUndefLO, isUndefHI);
06724     }
06725   }
06726 
06727   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
06728        VT == MVT::v16i16) && Subtarget->hasAVX()) {
06729     unsigned X86Opcode;
06730     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
06731       X86Opcode = X86ISD::HADD;
06732     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
06733       X86Opcode = X86ISD::HSUB;
06734     else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
06735       X86Opcode = X86ISD::FHADD;
06736     else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
06737       X86Opcode = X86ISD::FHSUB;
06738     else
06739       return SDValue();
06740 
06741     // Don't try to expand this build_vector into a pair of horizontal add/sub
06742     // if we can simply emit a pair of scalar add/sub.
06743     if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
06744       return SDValue();
06745 
06746     // Convert this build_vector into two horizontal add/sub followed by
06747     // a concat vector.
06748     bool isUndefLO = NumUndefsLO == Half;
06749     bool isUndefHI = NumUndefsHI == Half;
06750     return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
06751                                  isUndefLO, isUndefHI);
06752   }
06753 
06754   return SDValue();
06755 }
06756 
06757 SDValue
06758 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
06759   SDLoc dl(Op);
06760 
06761   MVT VT = Op.getSimpleValueType();
06762   MVT ExtVT = VT.getVectorElementType();
06763   unsigned NumElems = Op.getNumOperands();
06764 
06765   // Generate vectors for predicate vectors.
06766   if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
06767     return LowerBUILD_VECTORvXi1(Op, DAG);
06768 
06769   // Vectors containing all zeros can be matched by pxor and xorps later
06770   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
06771     // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
06772     // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
06773     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
06774       return Op;
06775 
06776     return getZeroVector(VT, Subtarget, DAG, dl);
06777   }
06778 
06779   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
06780   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
06781   // vpcmpeqd on 256-bit vectors.
06782   if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
06783     if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
06784       return Op;
06785 
06786     if (!VT.is512BitVector())
06787       return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
06788   }
06789 
06790   SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
06791   if (Broadcast.getNode())
06792     return Broadcast;
06793 
06794   unsigned EVTBits = ExtVT.getSizeInBits();
06795 
06796   unsigned NumZero  = 0;
06797   unsigned NumNonZero = 0;
06798   unsigned NonZeros = 0;
06799   bool IsAllConstants = true;
06800   SmallSet<SDValue, 8> Values;
06801   for (unsigned i = 0; i < NumElems; ++i) {
06802     SDValue Elt = Op.getOperand(i);
06803     if (Elt.getOpcode() == ISD::UNDEF)
06804       continue;
06805     Values.insert(Elt);
06806     if (Elt.getOpcode() != ISD::Constant &&
06807         Elt.getOpcode() != ISD::ConstantFP)
06808       IsAllConstants = false;
06809     if (X86::isZeroNode(Elt))
06810       NumZero++;
06811     else {
06812       NonZeros |= (1 << i);
06813       NumNonZero++;
06814     }
06815   }
06816 
06817   // All undef vector. Return an UNDEF.  All zero vectors were handled above.
06818   if (NumNonZero == 0)
06819     return DAG.getUNDEF(VT);
06820 
06821   // Special case for single non-zero, non-undef, element.
06822   if (NumNonZero == 1) {
06823     unsigned Idx = countTrailingZeros(NonZeros);
06824     SDValue Item = Op.getOperand(Idx);
06825 
06826     // If this is an insertion of an i64 value on x86-32, and if the top bits of
06827     // the value are obviously zero, truncate the value to i32 and do the
06828     // insertion that way.  Only do this if the value is non-constant or if the
06829     // value is a constant being inserted into element 0.  It is cheaper to do
06830     // a constant pool load than it is to do a movd + shuffle.
06831     if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
06832         (!IsAllConstants || Idx == 0)) {
06833       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
06834         // Handle SSE only.
06835         assert(VT == MVT::v2i64 && "Expected an SSE value type!");
06836         EVT VecVT = MVT::v4i32;
06837         unsigned VecElts = 4;
06838 
06839         // Truncate the value (which may itself be a constant) to i32, and
06840         // convert it to a vector with movd (S2V+shuffle to zero extend).
06841         Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
06842         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
06843         Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06844 
06845         // Now we have our 32-bit value zero extended in the low element of
06846         // a vector.  If Idx != 0, swizzle it into place.
06847         if (Idx != 0) {
06848           SmallVector<int, 4> Mask;
06849           Mask.push_back(Idx);
06850           for (unsigned i = 1; i != VecElts; ++i)
06851             Mask.push_back(i);
06852           Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
06853                                       &Mask[0]);
06854         }
06855         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06856       }
06857     }
06858 
06859     // If we have a constant or non-constant insertion into the low element of
06860     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
06861     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
06862     // depending on what the source datatype is.
06863     if (Idx == 0) {
06864       if (NumZero == 0)
06865         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06866 
06867       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
06868           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
06869         if (VT.is256BitVector() || VT.is512BitVector()) {
06870           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
06871           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
06872                              Item, DAG.getIntPtrConstant(0));
06873         }
06874         assert(VT.is128BitVector() && "Expected an SSE value type!");
06875         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06876         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
06877         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06878       }
06879 
06880       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
06881         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
06882         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
06883         if (VT.is256BitVector()) {
06884           SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
06885           Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
06886         } else {
06887           assert(VT.is128BitVector() && "Expected an SSE value type!");
06888           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
06889         }
06890         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
06891       }
06892     }
06893 
06894     // Is it a vector logical left shift?
06895     if (NumElems == 2 && Idx == 1 &&
06896         X86::isZeroNode(Op.getOperand(0)) &&
06897         !X86::isZeroNode(Op.getOperand(1))) {
06898       unsigned NumBits = VT.getSizeInBits();
06899       return getVShift(true, VT,
06900                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
06901                                    VT, Op.getOperand(1)),
06902                        NumBits/2, DAG, *this, dl);
06903     }
06904 
06905     if (IsAllConstants) // Otherwise, it's better to do a constpool load.
06906       return SDValue();
06907 
06908     // Otherwise, if this is a vector with i32 or f32 elements, and the element
06909     // is a non-constant being inserted into an element other than the low one,
06910     // we can't use a constant pool load.  Instead, use SCALAR_TO_VECTOR (aka
06911     // movd/movss) to move this into the low element, then shuffle it into
06912     // place.
06913     if (EVTBits == 32) {
06914       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
06915 
06916       // Turn it into a shuffle of zero and zero-extended scalar to vector.
06917       Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
06918       SmallVector<int, 8> MaskVec;
06919       for (unsigned i = 0; i != NumElems; ++i)
06920         MaskVec.push_back(i == Idx ? 0 : 1);
06921       return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
06922     }
06923   }
06924 
06925   // Splat is obviously ok. Let legalizer expand it to a shuffle.
06926   if (Values.size() == 1) {
06927     if (EVTBits == 32) {
06928       // Instead of a shuffle like this:
06929       // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
06930       // Check if it's possible to issue this instead.
06931       // shuffle (vload ptr)), undef, <1, 1, 1, 1>
06932       unsigned Idx = countTrailingZeros(NonZeros);
06933       SDValue Item = Op.getOperand(Idx);
06934       if (Op.getNode()->isOnlyUserOf(Item.getNode()))
06935         return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
06936     }
06937     return SDValue();
06938   }
06939 
06940   // A vector full of immediates; various special cases are already
06941   // handled, so this is best done with a single constant-pool load.
06942   if (IsAllConstants)
06943     return SDValue();
06944 
06945   // For AVX-length vectors, build the individual 128-bit pieces and use
06946   // shuffles to put them in place.
06947   if (VT.is256BitVector() || VT.is512BitVector()) {
06948     SmallVector<SDValue, 64> V;
06949     for (unsigned i = 0; i != NumElems; ++i)
06950       V.push_back(Op.getOperand(i));
06951 
06952     EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
06953 
06954     // Build both the lower and upper subvector.
06955     SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06956                                 makeArrayRef(&V[0], NumElems/2));
06957     SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
06958                                 makeArrayRef(&V[NumElems / 2], NumElems/2));
06959 
06960     // Recreate the wider vector with the lower and upper part.
06961     if (VT.is256BitVector())
06962       return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06963     return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
06964   }
06965 
06966   // Let legalizer expand 2-wide build_vectors.
06967   if (EVTBits == 64) {
06968     if (NumNonZero == 1) {
06969       // One half is zero or undef.
06970       unsigned Idx = countTrailingZeros(NonZeros);
06971       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
06972                                  Op.getOperand(Idx));
06973       return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
06974     }
06975     return SDValue();
06976   }
06977 
06978   // If element VT is < 32 bits, convert it to inserts into a zero vector.
06979   if (EVTBits == 8 && NumElems == 16) {
06980     SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
06981                                         Subtarget, *this);
06982     if (V.getNode()) return V;
06983   }
06984 
06985   if (EVTBits == 16 && NumElems == 8) {
06986     SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
06987                                       Subtarget, *this);
06988     if (V.getNode()) return V;
06989   }
06990 
06991   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
06992   if (EVTBits == 32 && NumElems == 4) {
06993     SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
06994                                       NumZero, DAG, Subtarget, *this);
06995     if (V.getNode())
06996       return V;
06997   }
06998 
06999   // If element VT is == 32 bits, turn it into a number of shuffles.
07000   SmallVector<SDValue, 8> V(NumElems);
07001   if (NumElems == 4 && NumZero > 0) {
07002     for (unsigned i = 0; i < 4; ++i) {
07003       bool isZero = !(NonZeros & (1 << i));
07004       if (isZero)
07005         V[i] = getZeroVector(VT, Subtarget, DAG, dl);
07006       else
07007         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
07008     }
07009 
07010     for (unsigned i = 0; i < 2; ++i) {
07011       switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
07012         default: break;
07013         case 0:
07014           V[i] = V[i*2];  // Must be a zero vector.
07015           break;
07016         case 1:
07017           V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
07018           break;
07019         case 2:
07020           V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
07021           break;
07022         case 3:
07023           V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
07024           break;
07025       }
07026     }
07027 
07028     bool Reverse1 = (NonZeros & 0x3) == 2;
07029     bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
07030     int MaskVec[] = {
07031       Reverse1 ? 1 : 0,
07032       Reverse1 ? 0 : 1,
07033       static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
07034       static_cast<int>(Reverse2 ? NumElems   : NumElems+1)
07035     };
07036     return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
07037   }
07038 
07039   if (Values.size() > 1 && VT.is128BitVector()) {
07040     // Check for a build vector of consecutive loads.
07041     for (unsigned i = 0; i < NumElems; ++i)
07042       V[i] = Op.getOperand(i);
07043 
07044     // Check for elements which are consecutive loads.
07045     SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
07046     if (LD.getNode())
07047       return LD;
07048 
07049     // Check for a build vector from mostly shuffle plus few inserting.
07050     SDValue Sh = buildFromShuffleMostly(Op, DAG);
07051     if (Sh.getNode())
07052       return Sh;
07053 
07054     // For SSE 4.1, use insertps to put the high elements into the low element.
07055     if (getSubtarget()->hasSSE41()) {
07056       SDValue Result;
07057       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
07058         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
07059       else
07060         Result = DAG.getUNDEF(VT);
07061 
07062       for (unsigned i = 1; i < NumElems; ++i) {
07063         if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
07064         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
07065                              Op.getOperand(i), DAG.getIntPtrConstant(i));
07066       }
07067       return Result;
07068     }
07069 
07070     // Otherwise, expand into a number of unpckl*, start by extending each of
07071     // our (non-undef) elements to the full vector width with the element in the
07072     // bottom slot of the vector (which generates no code for SSE).
07073     for (unsigned i = 0; i < NumElems; ++i) {
07074       if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
07075         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
07076       else
07077         V[i] = DAG.getUNDEF(VT);
07078     }
07079 
07080     // Next, we iteratively mix elements, e.g. for v4f32:
07081     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
07082     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
07083     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
07084     unsigned EltStride = NumElems >> 1;
07085     while (EltStride != 0) {
07086       for (unsigned i = 0; i < EltStride; ++i) {
07087         // If V[i+EltStride] is undef and this is the first round of mixing,
07088         // then it is safe to just drop this shuffle: V[i] is already in the
07089         // right place, the one element (since it's the first round) being
07090         // inserted as undef can be dropped.  This isn't safe for successive
07091         // rounds because they will permute elements within both vectors.
07092         if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
07093             EltStride == NumElems/2)
07094           continue;
07095 
07096         V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
07097       }
07098       EltStride >>= 1;
07099     }
07100     return V[0];
07101   }
07102   return SDValue();
07103 }
07104 
07105 // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
07106 // to create 256-bit vectors from two other 128-bit ones.
07107 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
07108   SDLoc dl(Op);
07109   MVT ResVT = Op.getSimpleValueType();
07110 
07111   assert((ResVT.is256BitVector() ||
07112           ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
07113 
07114   SDValue V1 = Op.getOperand(0);
07115   SDValue V2 = Op.getOperand(1);
07116   unsigned NumElems = ResVT.getVectorNumElements();
07117   if(ResVT.is256BitVector())
07118     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
07119 
07120   if (Op.getNumOperands() == 4) {
07121     MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
07122                                 ResVT.getVectorNumElements()/2);
07123     SDValue V3 = Op.getOperand(2);
07124     SDValue V4 = Op.getOperand(3);
07125     return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
07126       Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
07127   }
07128   return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
07129 }
07130 
07131 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
07132   MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
07133   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
07134          (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
07135           Op.getNumOperands() == 4)));
07136 
07137   // AVX can use the vinsertf128 instruction to create 256-bit vectors
07138   // from two other 128-bit ones.
07139 
07140   // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
07141   return LowerAVXCONCAT_VECTORS(Op, DAG);
07142 }
07143 
07144 
07145 //===---------------------------------------------------------------------