LLVM  mainline
X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86FrameLowering.h"
00019 #include "X86InstrBuilder.h"
00020 #include "X86MachineFunctionInfo.h"
00021 #include "X86TargetMachine.h"
00022 #include "X86TargetObjectFile.h"
00023 #include "llvm/ADT/SmallBitVector.h"
00024 #include "llvm/ADT/SmallSet.h"
00025 #include "llvm/ADT/Statistic.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/ADT/StringSwitch.h"
00028 #include "llvm/CodeGen/IntrinsicLowering.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineFunction.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00033 #include "llvm/CodeGen/MachineModuleInfo.h"
00034 #include "llvm/CodeGen/MachineRegisterInfo.h"
00035 #include "llvm/CodeGen/WinEHFuncInfo.h"
00036 #include "llvm/IR/CallSite.h"
00037 #include "llvm/IR/CallingConv.h"
00038 #include "llvm/IR/Constants.h"
00039 #include "llvm/IR/DerivedTypes.h"
00040 #include "llvm/IR/Function.h"
00041 #include "llvm/IR/GlobalAlias.h"
00042 #include "llvm/IR/GlobalVariable.h"
00043 #include "llvm/IR/Instructions.h"
00044 #include "llvm/IR/Intrinsics.h"
00045 #include "llvm/MC/MCAsmInfo.h"
00046 #include "llvm/MC/MCContext.h"
00047 #include "llvm/MC/MCExpr.h"
00048 #include "llvm/MC/MCSymbol.h"
00049 #include "llvm/Support/CommandLine.h"
00050 #include "llvm/Support/Debug.h"
00051 #include "llvm/Support/ErrorHandling.h"
00052 #include "llvm/Support/MathExtras.h"
00053 #include "llvm/Target/TargetOptions.h"
00054 #include "X86IntrinsicsInfo.h"
00055 #include <bitset>
00056 #include <numeric>
00057 #include <cctype>
00058 using namespace llvm;
00059 
00060 #define DEBUG_TYPE "x86-isel"
00061 
00062 STATISTIC(NumTailCalls, "Number of tail calls");
00063 
00064 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00065     "x86-experimental-vector-widening-legalization", cl::init(false),
00066     cl::desc("Enable an experimental vector type legalization through widening "
00067              "rather than promotion."),
00068     cl::Hidden);
00069 
00070 // Forward declarations.
00071 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
00072                        SDValue V2);
00073 
00074 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
00075                                      const X86Subtarget &STI)
00076     : TargetLowering(TM), Subtarget(&STI) {
00077   X86ScalarSSEf64 = Subtarget->hasSSE2();
00078   X86ScalarSSEf32 = Subtarget->hasSSE1();
00079   TD = getDataLayout();
00080 
00081   // Set up the TargetLowering object.
00082   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
00083 
00084   // X86 is weird. It always uses i8 for shift amounts and setcc results.
00085   setBooleanContents(ZeroOrOneBooleanContent);
00086   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00087   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00088 
00089   // For 64-bit, since we have so many registers, use the ILP scheduler.
00090   // For 32-bit, use the register pressure specific scheduling.
00091   // For Atom, always use ILP scheduling.
00092   if (Subtarget->isAtom())
00093     setSchedulingPreference(Sched::ILP);
00094   else if (Subtarget->is64Bit())
00095     setSchedulingPreference(Sched::ILP);
00096   else
00097     setSchedulingPreference(Sched::RegPressure);
00098   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
00099   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00100 
00101   // Bypass expensive divides on Atom when compiling with O2.
00102   if (TM.getOptLevel() >= CodeGenOpt::Default) {
00103     if (Subtarget->hasSlowDivide32())
00104       addBypassSlowDiv(32, 8);
00105     if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
00106       addBypassSlowDiv(64, 16);
00107   }
00108 
00109   if (Subtarget->isTargetKnownWindowsMSVC()) {
00110     // Setup Windows compiler runtime calls.
00111     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00112     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00113     setLibcallName(RTLIB::SREM_I64, "_allrem");
00114     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00115     setLibcallName(RTLIB::MUL_I64, "_allmul");
00116     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00117     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00118     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00119     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00120     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00121 
00122     // The _ftol2 runtime function has an unusual calling conv, which
00123     // is modeled by a special pseudo-instruction.
00124     setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
00125     setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
00126     setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
00127     setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
00128   }
00129 
00130   if (Subtarget->isTargetDarwin()) {
00131     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00132     setUseUnderscoreSetJmp(false);
00133     setUseUnderscoreLongJmp(false);
00134   } else if (Subtarget->isTargetWindowsGNU()) {
00135     // MS runtime is weird: it exports _setjmp, but longjmp!
00136     setUseUnderscoreSetJmp(true);
00137     setUseUnderscoreLongJmp(false);
00138   } else {
00139     setUseUnderscoreSetJmp(true);
00140     setUseUnderscoreLongJmp(true);
00141   }
00142 
00143   // Set up the register classes.
00144   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00145   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00146   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00147   if (Subtarget->is64Bit())
00148     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00149 
00150   for (MVT VT : MVT::integer_valuetypes())
00151     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00152 
00153   // We don't accept any truncstore of integer registers.
00154   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00155   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00156   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00157   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00158   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00159   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00160 
00161   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00162 
00163   // SETOEQ and SETUNE require checking two conditions.
00164   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00165   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00166   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00167   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00168   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00169   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00170 
00171   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00172   // operation.
00173   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00174   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00175   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00176 
00177   if (Subtarget->is64Bit()) {
00178     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00179     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00180   } else if (!Subtarget->useSoftFloat()) {
00181     // We have an algorithm for SSE2->double, and we turn this into a
00182     // 64-bit FILD followed by conditional FADD for other targets.
00183     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00184     // We have an algorithm for SSE2, and we turn this into a 64-bit
00185     // FILD for other targets.
00186     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00187   }
00188 
00189   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00190   // this operation.
00191   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00192   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00193 
00194   if (!Subtarget->useSoftFloat()) {
00195     // SSE has no i16 to fp conversion, only i32
00196     if (X86ScalarSSEf32) {
00197       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00198       // f32 and f64 cases are Legal, f80 case is not
00199       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00200     } else {
00201       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00202       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00203     }
00204   } else {
00205     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00206     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00207   }
00208 
00209   // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00210   // are Legal, f80 is custom lowered.
00211   setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00212   setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00213 
00214   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00215   // this operation.
00216   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00217   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00218 
00219   if (X86ScalarSSEf32) {
00220     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00221     // f32 and f64 cases are Legal, f80 case is not
00222     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00223   } else {
00224     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00225     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00226   }
00227 
00228   // Handle FP_TO_UINT by promoting the destination to a larger signed
00229   // conversion.
00230   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00231   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00232   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00233 
00234   if (Subtarget->is64Bit()) {
00235     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
00236     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00237   } else if (!Subtarget->useSoftFloat()) {
00238     // Since AVX is a superset of SSE3, only check for SSE here.
00239     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00240       // Expand FP_TO_UINT into a select.
00241       // FIXME: We would like to use a Custom expander here eventually to do
00242       // the optimal thing for SSE vs. the default expansion in the legalizer.
00243       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00244     else
00245       // With SSE3 we can use fisttpll to convert to a signed i64; without
00246       // SSE, we're stuck with a fistpll.
00247       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00248   }
00249 
00250   if (isTargetFTOL()) {
00251     // Use the _ftol2 runtime function, which has a pseudo-instruction
00252     // to handle its weird calling convention.
00253     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00254   }
00255 
00256   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00257   if (!X86ScalarSSEf64) {
00258     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00259     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00260     if (Subtarget->is64Bit()) {
00261       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00262       // Without SSE, i64->f64 goes through memory.
00263       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00264     }
00265   }
00266 
00267   // Scalar integer divide and remainder are lowered to use operations that
00268   // produce two results, to match the available instructions. This exposes
00269   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00270   // into a single instruction.
00271   //
00272   // Scalar integer multiply-high is also lowered to use two-result
00273   // operations, to match the available instructions. However, plain multiply
00274   // (low) operations are left as Legal, as there are single-result
00275   // instructions for this in x86. Using the two-result multiply instructions
00276   // when both high and low results are needed must be arranged by dagcombine.
00277   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00278     MVT VT = IntVTs[i];
00279     setOperationAction(ISD::MULHS, VT, Expand);
00280     setOperationAction(ISD::MULHU, VT, Expand);
00281     setOperationAction(ISD::SDIV, VT, Expand);
00282     setOperationAction(ISD::UDIV, VT, Expand);
00283     setOperationAction(ISD::SREM, VT, Expand);
00284     setOperationAction(ISD::UREM, VT, Expand);
00285 
00286     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00287     setOperationAction(ISD::ADDC, VT, Custom);
00288     setOperationAction(ISD::ADDE, VT, Custom);
00289     setOperationAction(ISD::SUBC, VT, Custom);
00290     setOperationAction(ISD::SUBE, VT, Custom);
00291   }
00292 
00293   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00294   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00295   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00296   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00297   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00298   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00299   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00300   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00301   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00302   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00303   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00304   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00305   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00306   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00307   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00308   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00309   if (Subtarget->is64Bit())
00310     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00311   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00312   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00313   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00314   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00315   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
00316   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00317   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00318   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00319 
00320   // Promote the i8 variants and force them on up to i32 which has a shorter
00321   // encoding.
00322   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00323   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00324   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00325   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00326   if (Subtarget->hasBMI()) {
00327     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00328     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00329     if (Subtarget->is64Bit())
00330       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00331   } else {
00332     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00333     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00334     if (Subtarget->is64Bit())
00335       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00336   }
00337 
00338   if (Subtarget->hasLZCNT()) {
00339     // When promoting the i8 variants, force them to i32 for a shorter
00340     // encoding.
00341     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00342     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00343     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00344     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00345     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00346     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00347     if (Subtarget->is64Bit())
00348       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00349   } else {
00350     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00351     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00352     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00353     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00354     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00355     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00356     if (Subtarget->is64Bit()) {
00357       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00358       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00359     }
00360   }
00361 
00362   // Special handling for half-precision floating point conversions.
00363   // If we don't have F16C support, then lower half float conversions
00364   // into library calls.
00365   if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
00366     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00367     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00368   }
00369 
00370   // There's never any support for operations beyond MVT::f32.
00371   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00372   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00373   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00374   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00375 
00376   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
00377   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
00378   setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
00379   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00380   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00381   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00382 
00383   if (Subtarget->hasPOPCNT()) {
00384     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00385   } else {
00386     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00387     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00388     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00389     if (Subtarget->is64Bit())
00390       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00391   }
00392 
00393   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00394 
00395   if (!Subtarget->hasMOVBE())
00396     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00397 
00398   // These should be promoted to a larger select which is supported.
00399   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00400   // X86 wants to expand cmov itself.
00401   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00402   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00403   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00404   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00405   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00406   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00407   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00408   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00409   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00410   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00411   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00412   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00413   if (Subtarget->is64Bit()) {
00414     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00415     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00416   }
00417   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00418   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00419   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00420   // support continuation, user-level threading, and etc.. As a result, no
00421   // other SjLj exception interfaces are implemented and please don't build
00422   // your own exception handling based on them.
00423   // LLVM/Clang supports zero-cost DWARF exception handling.
00424   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00425   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00426 
00427   // Darwin ABI issue.
00428   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00429   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00430   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00431   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00432   if (Subtarget->is64Bit())
00433     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00434   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00435   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00436   if (Subtarget->is64Bit()) {
00437     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00438     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00439     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00440     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00441     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00442   }
00443   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00444   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00445   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00446   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00447   if (Subtarget->is64Bit()) {
00448     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00449     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00450     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00451   }
00452 
00453   if (Subtarget->hasSSE1())
00454     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00455 
00456   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00457 
00458   // Expand certain atomics
00459   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00460     MVT VT = IntVTs[i];
00461     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00462     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00463     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00464   }
00465 
00466   if (Subtarget->hasCmpxchg16b()) {
00467     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00468   }
00469 
00470   // FIXME - use subtarget debug flags
00471   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00472       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00473     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00474   }
00475 
00476   if (Subtarget->is64Bit()) {
00477     setExceptionPointerRegister(X86::RAX);
00478     setExceptionSelectorRegister(X86::RDX);
00479   } else {
00480     setExceptionPointerRegister(X86::EAX);
00481     setExceptionSelectorRegister(X86::EDX);
00482   }
00483   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00484   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00485 
00486   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00487   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00488 
00489   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00490   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00491 
00492   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00493   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00494   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00495   if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
00496     // TargetInfo::X86_64ABIBuiltinVaList
00497     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00498     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00499   } else {
00500     // TargetInfo::CharPtrBuiltinVaList
00501     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00502     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00503   }
00504 
00505   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00506   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00507 
00508   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
00509 
00510   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
00511   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
00512   setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
00513 
00514   if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
00515     // f32 and f64 use SSE.
00516     // Set up the FP register classes.
00517     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00518     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00519 
00520     // Use ANDPD to simulate FABS.
00521     setOperationAction(ISD::FABS , MVT::f64, Custom);
00522     setOperationAction(ISD::FABS , MVT::f32, Custom);
00523 
00524     // Use XORP to simulate FNEG.
00525     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00526     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00527 
00528     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00529     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00530     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00531 
00532     // Lower this to FGETSIGNx86 plus an AND.
00533     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00534     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00535 
00536     // We don't support sin/cos/fmod
00537     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00538     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00539     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00540     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00541     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00542     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00543 
00544     // Expand FP immediates into loads from the stack, except for the special
00545     // cases we handle.
00546     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00547     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00548   } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
00549     // Use SSE for f32, x87 for f64.
00550     // Set up the FP register classes.
00551     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00552     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00553 
00554     // Use ANDPS to simulate FABS.
00555     setOperationAction(ISD::FABS , MVT::f32, Custom);
00556 
00557     // Use XORP to simulate FNEG.
00558     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00559 
00560     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00561 
00562     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00563     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00564     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00565 
00566     // We don't support sin/cos/fmod
00567     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00568     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00569     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00570 
00571     // Special cases we handle for FP constants.
00572     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00573     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00574     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00575     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00576     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00577 
00578     if (!TM.Options.UnsafeFPMath) {
00579       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00580       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00581       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00582     }
00583   } else if (!Subtarget->useSoftFloat()) {
00584     // f32 and f64 in x87.
00585     // Set up the FP register classes.
00586     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00587     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00588 
00589     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00590     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00591     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00592     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00593 
00594     if (!TM.Options.UnsafeFPMath) {
00595       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00596       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00597       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00598       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00599       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00600       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00601     }
00602     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00603     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00604     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00605     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00606     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00607     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00608     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00609     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00610   }
00611 
00612   // We don't support FMA.
00613   setOperationAction(ISD::FMA, MVT::f64, Expand);
00614   setOperationAction(ISD::FMA, MVT::f32, Expand);
00615 
00616   // Long double always uses X87.
00617   if (!Subtarget->useSoftFloat()) {
00618     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00619     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00620     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00621     {
00622       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00623       addLegalFPImmediate(TmpFlt);  // FLD0
00624       TmpFlt.changeSign();
00625       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00626 
00627       bool ignored;
00628       APFloat TmpFlt2(+1.0);
00629       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00630                       &ignored);
00631       addLegalFPImmediate(TmpFlt2);  // FLD1
00632       TmpFlt2.changeSign();
00633       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00634     }
00635 
00636     if (!TM.Options.UnsafeFPMath) {
00637       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00638       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00639       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00640     }
00641 
00642     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00643     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00644     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00645     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00646     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00647     setOperationAction(ISD::FMA, MVT::f80, Expand);
00648   }
00649 
00650   // Always use a library call for pow.
00651   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00652   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00653   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00654 
00655   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00656   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00657   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00658   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00659   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00660   setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
00661   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
00662 
00663   // First set operation action for all vector types to either promote
00664   // (for widening) or expand (for scalarization). Then we will selectively
00665   // turn on ones that can be effectively codegen'd.
00666   for (MVT VT : MVT::vector_valuetypes()) {
00667     setOperationAction(ISD::ADD , VT, Expand);
00668     setOperationAction(ISD::SUB , VT, Expand);
00669     setOperationAction(ISD::FADD, VT, Expand);
00670     setOperationAction(ISD::FNEG, VT, Expand);
00671     setOperationAction(ISD::FSUB, VT, Expand);
00672     setOperationAction(ISD::MUL , VT, Expand);
00673     setOperationAction(ISD::FMUL, VT, Expand);
00674     setOperationAction(ISD::SDIV, VT, Expand);
00675     setOperationAction(ISD::UDIV, VT, Expand);
00676     setOperationAction(ISD::FDIV, VT, Expand);
00677     setOperationAction(ISD::SREM, VT, Expand);
00678     setOperationAction(ISD::UREM, VT, Expand);
00679     setOperationAction(ISD::LOAD, VT, Expand);
00680     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00681     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00682     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00683     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00684     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00685     setOperationAction(ISD::FABS, VT, Expand);
00686     setOperationAction(ISD::FSIN, VT, Expand);
00687     setOperationAction(ISD::FSINCOS, VT, Expand);
00688     setOperationAction(ISD::FCOS, VT, Expand);
00689     setOperationAction(ISD::FSINCOS, VT, Expand);
00690     setOperationAction(ISD::FREM, VT, Expand);
00691     setOperationAction(ISD::FMA,  VT, Expand);
00692     setOperationAction(ISD::FPOWI, VT, Expand);
00693     setOperationAction(ISD::FSQRT, VT, Expand);
00694     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00695     setOperationAction(ISD::FFLOOR, VT, Expand);
00696     setOperationAction(ISD::FCEIL, VT, Expand);
00697     setOperationAction(ISD::FTRUNC, VT, Expand);
00698     setOperationAction(ISD::FRINT, VT, Expand);
00699     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00700     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00701     setOperationAction(ISD::MULHS, VT, Expand);
00702     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00703     setOperationAction(ISD::MULHU, VT, Expand);
00704     setOperationAction(ISD::SDIVREM, VT, Expand);
00705     setOperationAction(ISD::UDIVREM, VT, Expand);
00706     setOperationAction(ISD::FPOW, VT, Expand);
00707     setOperationAction(ISD::CTPOP, VT, Expand);
00708     setOperationAction(ISD::CTTZ, VT, Expand);
00709     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00710     setOperationAction(ISD::CTLZ, VT, Expand);
00711     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00712     setOperationAction(ISD::SHL, VT, Expand);
00713     setOperationAction(ISD::SRA, VT, Expand);
00714     setOperationAction(ISD::SRL, VT, Expand);
00715     setOperationAction(ISD::ROTL, VT, Expand);
00716     setOperationAction(ISD::ROTR, VT, Expand);
00717     setOperationAction(ISD::BSWAP, VT, Expand);
00718     setOperationAction(ISD::SETCC, VT, Expand);
00719     setOperationAction(ISD::FLOG, VT, Expand);
00720     setOperationAction(ISD::FLOG2, VT, Expand);
00721     setOperationAction(ISD::FLOG10, VT, Expand);
00722     setOperationAction(ISD::FEXP, VT, Expand);
00723     setOperationAction(ISD::FEXP2, VT, Expand);
00724     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00725     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00726     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00727     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00728     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00729     setOperationAction(ISD::TRUNCATE, VT, Expand);
00730     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00731     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00732     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00733     setOperationAction(ISD::VSELECT, VT, Expand);
00734     setOperationAction(ISD::SELECT_CC, VT, Expand);
00735     for (MVT InnerVT : MVT::vector_valuetypes()) {
00736       setTruncStoreAction(InnerVT, VT, Expand);
00737 
00738       setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
00739       setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
00740 
00741       // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
00742       // types, we have to deal with them whether we ask for Expansion or not.
00743       // Setting Expand causes its own optimisation problems though, so leave
00744       // them legal.
00745       if (VT.getVectorElementType() == MVT::i1)
00746         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00747 
00748       // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
00749       // split/scalarized right now.
00750       if (VT.getVectorElementType() == MVT::f16)
00751         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00752     }
00753   }
00754 
00755   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00756   // with -msoft-float, disable use of MMX as well.
00757   if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
00758     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00759     // No operations on x86mmx supported, everything uses intrinsics.
00760   }
00761 
00762   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00763   // into smaller operations.
00764   for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
00765     setOperationAction(ISD::MULHS,              MMXTy,      Expand);
00766     setOperationAction(ISD::AND,                MMXTy,      Expand);
00767     setOperationAction(ISD::OR,                 MMXTy,      Expand);
00768     setOperationAction(ISD::XOR,                MMXTy,      Expand);
00769     setOperationAction(ISD::SCALAR_TO_VECTOR,   MMXTy,      Expand);
00770     setOperationAction(ISD::SELECT,             MMXTy,      Expand);
00771     setOperationAction(ISD::BITCAST,            MMXTy,      Expand);
00772   }
00773   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00774 
00775   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
00776     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00777 
00778     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00779     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00780     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00781     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00782     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00783     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00784     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00785     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00786     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00787     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00788     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
00789     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00790     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00791     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
00792   }
00793 
00794   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
00795     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00796 
00797     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
00798     // registers cannot be used even for integer operations.
00799     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00800     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00801     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00802     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00803 
00804     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00805     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00806     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00807     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00808     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
00809     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00810     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00811     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00812     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00813     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00814     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00815     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00816     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00817     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00818     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00819     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00820     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00821     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00822     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00823     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00824     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00825     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00826     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00827 
00828     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00829     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00830     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00831     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00832 
00833     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00834     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00835     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00836     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00837     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00838 
00839     setOperationAction(ISD::CTPOP,              MVT::v16i8, Custom);
00840     setOperationAction(ISD::CTPOP,              MVT::v8i16, Custom);
00841     setOperationAction(ISD::CTPOP,              MVT::v4i32, Custom);
00842     setOperationAction(ISD::CTPOP,              MVT::v2i64, Custom);
00843 
00844     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
00845     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
00846       MVT VT = (MVT::SimpleValueType)i;
00847       // Do not attempt to custom lower non-power-of-2 vectors
00848       if (!isPowerOf2_32(VT.getVectorNumElements()))
00849         continue;
00850       // Do not attempt to custom lower non-128-bit vectors
00851       if (!VT.is128BitVector())
00852         continue;
00853       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
00854       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
00855       setOperationAction(ISD::VSELECT,            VT, Custom);
00856       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00857     }
00858 
00859     // We support custom legalizing of sext and anyext loads for specific
00860     // memory vector types which we can load as a scalar (or sequence of
00861     // scalars) and extend in-register to a legal 128-bit vector type. For sext
00862     // loads these must work with a single scalar load.
00863     for (MVT VT : MVT::integer_vector_valuetypes()) {
00864       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
00865       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
00866       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
00867       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
00868       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
00869       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
00870       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
00871       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
00872       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
00873     }
00874 
00875     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
00876     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
00877     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
00878     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
00879     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
00880     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
00881     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
00882     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00883 
00884     if (Subtarget->is64Bit()) {
00885       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
00886       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00887     }
00888 
00889     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
00890     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
00891       MVT VT = (MVT::SimpleValueType)i;
00892 
00893       // Do not attempt to promote non-128-bit vectors
00894       if (!VT.is128BitVector())
00895         continue;
00896 
00897       setOperationAction(ISD::AND,    VT, Promote);
00898       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
00899       setOperationAction(ISD::OR,     VT, Promote);
00900       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
00901       setOperationAction(ISD::XOR,    VT, Promote);
00902       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
00903       setOperationAction(ISD::LOAD,   VT, Promote);
00904       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
00905       setOperationAction(ISD::SELECT, VT, Promote);
00906       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
00907     }
00908 
00909     // Custom lower v2i64 and v2f64 selects.
00910     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
00911     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
00912     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
00913     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
00914 
00915     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
00916     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
00917 
00918     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
00919 
00920     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
00921     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
00922     // As there is no 64-bit GPR available, we need build a special custom
00923     // sequence to convert from v2i32 to v2f32.
00924     if (!Subtarget->is64Bit())
00925       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
00926 
00927     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
00928     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
00929 
00930     for (MVT VT : MVT::fp_vector_valuetypes())
00931       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
00932 
00933     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
00934     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
00935     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
00936   }
00937 
00938   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
00939     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
00940       setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
00941       setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
00942       setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
00943       setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
00944       setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
00945     }
00946 
00947     // FIXME: Do we need to handle scalar-to-vector here?
00948     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
00949 
00950     // We directly match byte blends in the backend as they match the VSELECT
00951     // condition form.
00952     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
00953 
00954     // SSE41 brings specific instructions for doing vector sign extend even in
00955     // cases where we don't have SRA.
00956     for (MVT VT : MVT::integer_vector_valuetypes()) {
00957       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
00958       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
00959       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
00960     }
00961 
00962     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
00963     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
00964     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
00965     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
00966     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
00967     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
00968     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
00969 
00970     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
00971     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
00972     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
00973     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
00974     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
00975     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
00976 
00977     // i8 and i16 vectors are custom because the source register and source
00978     // source memory operand types are not the same width.  f32 vectors are
00979     // custom since the immediate controlling the insert encodes additional
00980     // information.
00981     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
00982     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00983     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00984     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00985 
00986     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
00987     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
00988     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
00989     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00990 
00991     // FIXME: these should be Legal, but that's only for the case where
00992     // the index is constant.  For now custom expand to deal with that.
00993     if (Subtarget->is64Bit()) {
00994       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
00995       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00996     }
00997   }
00998 
00999   if (Subtarget->hasSSE2()) {
01000     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
01001     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
01002     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
01003 
01004     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01005     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01006 
01007     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01008     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01009 
01010     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01011     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01012 
01013     // In the customized shift lowering, the legal cases in AVX2 will be
01014     // recognized.
01015     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01016     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01017 
01018     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01019     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01020 
01021     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01022   }
01023 
01024   if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
01025     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01026     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01027     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01028     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01029     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01030     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01031 
01032     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01033     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01034     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01035 
01036     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01037     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01038     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01039     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01040     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01041     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01042     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01043     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01044     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01045     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01046     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01047     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01048 
01049     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01050     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01051     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01052     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01053     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01054     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01055     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01056     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01057     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01058     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01059     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01060     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01061 
01062     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01063     // even though v8i16 is a legal type.
01064     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01065     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01066     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01067 
01068     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01069     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01070     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01071 
01072     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01073     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01074 
01075     for (MVT VT : MVT::fp_vector_valuetypes())
01076       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
01077 
01078     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01079     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01080 
01081     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01082     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01083 
01084     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01085     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01086 
01087     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01088     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01089     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01090     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01091 
01092     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01093     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01094     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01095 
01096     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01097     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01098     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01099     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01100     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01101     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01102     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01103     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01104     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01105     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01106     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01107     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01108 
01109     setOperationAction(ISD::CTPOP,             MVT::v32i8, Custom);
01110     setOperationAction(ISD::CTPOP,             MVT::v16i16, Custom);
01111     setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
01112     setOperationAction(ISD::CTPOP,             MVT::v4i64, Custom);
01113 
01114     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
01115       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01116       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01117       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01118       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01119       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01120       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01121     }
01122 
01123     if (Subtarget->hasInt256()) {
01124       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01125       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01126       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01127       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01128 
01129       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01130       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01131       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01132       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01133 
01134       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01135       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01136       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01137       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01138 
01139       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01140       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01141       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01142       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01143 
01144       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
01145       // when we have a 256bit-wide blend with immediate.
01146       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
01147 
01148       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
01149       setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01150       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01151       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01152       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01153       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01154       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01155 
01156       setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01157       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01158       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01159       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01160       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01161       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01162     } else {
01163       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01164       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01165       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01166       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01167 
01168       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01169       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01170       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01171       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01172 
01173       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01174       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01175       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01176       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01177     }
01178 
01179     // In the customized shift lowering, the legal cases in AVX2 will be
01180     // recognized.
01181     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01182     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01183 
01184     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01185     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01186 
01187     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01188 
01189     // Custom lower several nodes for 256-bit types.
01190     for (MVT VT : MVT::vector_valuetypes()) {
01191       if (VT.getScalarSizeInBits() >= 32) {
01192         setOperationAction(ISD::MLOAD,  VT, Legal);
01193         setOperationAction(ISD::MSTORE, VT, Legal);
01194       }
01195       // Extract subvector is special because the value type
01196       // (result) is 128-bit but the source is 256-bit wide.
01197       if (VT.is128BitVector()) {
01198         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01199       }
01200       // Do not attempt to custom lower other non-256-bit vectors
01201       if (!VT.is256BitVector())
01202         continue;
01203 
01204       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01205       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01206       setOperationAction(ISD::VSELECT,            VT, Custom);
01207       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01208       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01209       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01210       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01211       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01212     }
01213 
01214     if (Subtarget->hasInt256())
01215       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01216 
01217 
01218     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01219     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
01220       MVT VT = (MVT::SimpleValueType)i;
01221 
01222       // Do not attempt to promote non-256-bit vectors
01223       if (!VT.is256BitVector())
01224         continue;
01225 
01226       setOperationAction(ISD::AND,    VT, Promote);
01227       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01228       setOperationAction(ISD::OR,     VT, Promote);
01229       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01230       setOperationAction(ISD::XOR,    VT, Promote);
01231       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01232       setOperationAction(ISD::LOAD,   VT, Promote);
01233       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01234       setOperationAction(ISD::SELECT, VT, Promote);
01235       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01236     }
01237   }
01238 
01239   if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
01240     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01241     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01242     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01243     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01244 
01245     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01246     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01247     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01248 
01249     for (MVT VT : MVT::fp_vector_valuetypes())
01250       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
01251 
01252     setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
01253     setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
01254     setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
01255     setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
01256     setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
01257     setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
01258     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
01259     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
01260     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
01261     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
01262     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
01263     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
01264 
01265     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01266     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01267     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01268     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01269     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01270     setOperationAction(ISD::SUB,                MVT::i1,    Custom);
01271     setOperationAction(ISD::ADD,                MVT::i1,    Custom);
01272     setOperationAction(ISD::MUL,                MVT::i1,    Custom);
01273     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01274     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01275     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01276     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01277     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01278 
01279     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01280     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01281     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01282     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01283     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01284     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01285 
01286     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01287     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01288     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01289     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01290     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01291     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01292     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01293     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01294 
01295     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
01296     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
01297     setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
01298     setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
01299     if (Subtarget->is64Bit()) {
01300       setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
01301       setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
01302       setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
01303       setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
01304     }
01305     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01306     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01307     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01308     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01309     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01310     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
01311     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
01312     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i8,  Promote);
01313     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Promote);
01314     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01315     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01316     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01317     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
01318     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
01319     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01320     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01321 
01322     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01323     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01324     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01325     if (Subtarget->hasDQI()) {
01326       setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
01327       setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
01328     }
01329     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01330     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01331     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01332     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01333     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01334     setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
01335     setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
01336     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01337     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01338     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01339     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01340     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01341     if (Subtarget->hasDQI()) {
01342       setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
01343       setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
01344     }
01345     setOperationAction(ISD::FFLOOR,             MVT::v16f32, Legal);
01346     setOperationAction(ISD::FFLOOR,             MVT::v8f64, Legal);
01347     setOperationAction(ISD::FCEIL,              MVT::v16f32, Legal);
01348     setOperationAction(ISD::FCEIL,              MVT::v8f64, Legal);
01349     setOperationAction(ISD::FTRUNC,             MVT::v16f32, Legal);
01350     setOperationAction(ISD::FTRUNC,             MVT::v8f64, Legal);
01351     setOperationAction(ISD::FRINT,              MVT::v16f32, Legal);
01352     setOperationAction(ISD::FRINT,              MVT::v8f64, Legal);
01353     setOperationAction(ISD::FNEARBYINT,         MVT::v16f32, Legal);
01354     setOperationAction(ISD::FNEARBYINT,         MVT::v8f64, Legal);
01355 
01356     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01357     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01358     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01359     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01360     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
01361 
01362     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01363     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01364 
01365     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01366 
01367     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01368     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01369     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01370     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01371     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01372     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01373     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01374     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01375     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01376     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
01377     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
01378 
01379     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01380     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01381 
01382     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01383     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01384 
01385     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01386 
01387     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01388     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01389 
01390     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01391     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01392 
01393     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01394     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01395 
01396     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01397     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01398     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01399     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01400     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01401     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01402 
01403     if (Subtarget->hasCDI()) {
01404       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
01405       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01406     }
01407     if (Subtarget->hasDQI()) {
01408       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
01409       setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
01410       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
01411     }
01412     // Custom lower several nodes.
01413     for (MVT VT : MVT::vector_valuetypes()) {
01414       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01415       if (EltSize == 1) {
01416         setOperationAction(ISD::AND, VT, Legal);
01417         setOperationAction(ISD::OR,  VT, Legal);
01418         setOperationAction(ISD::XOR,  VT, Legal);
01419       }
01420       if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
01421         setOperationAction(ISD::MGATHER,  VT, Custom);
01422         setOperationAction(ISD::MSCATTER, VT, Custom);
01423       }
01424       // Extract subvector is special because the value type
01425       // (result) is 256/128-bit but the source is 512-bit wide.
01426       if (VT.is128BitVector() || VT.is256BitVector()) {
01427         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01428       }
01429       if (VT.getVectorElementType() == MVT::i1)
01430         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01431 
01432       // Do not attempt to custom lower other non-512-bit vectors
01433       if (!VT.is512BitVector())
01434         continue;
01435 
01436       if (EltSize >= 32) {
01437         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01438         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01439         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01440         setOperationAction(ISD::VSELECT,             VT, Legal);
01441         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01442         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01443         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01444         setOperationAction(ISD::MLOAD,               VT, Legal);
01445         setOperationAction(ISD::MSTORE,              VT, Legal);
01446       }
01447     }
01448     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01449       MVT VT = (MVT::SimpleValueType)i;
01450 
01451       // Do not attempt to promote non-512-bit vectors.
01452       if (!VT.is512BitVector())
01453         continue;
01454 
01455       setOperationAction(ISD::SELECT, VT, Promote);
01456       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01457     }
01458   }// has  AVX-512
01459 
01460   if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
01461     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
01462     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
01463 
01464     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01465     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01466 
01467     setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
01468     setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
01469     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
01470     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
01471     setOperationAction(ISD::ADD,                MVT::v32i16, Legal);
01472     setOperationAction(ISD::ADD,                MVT::v64i8, Legal);
01473     setOperationAction(ISD::SUB,                MVT::v32i16, Legal);
01474     setOperationAction(ISD::SUB,                MVT::v64i8, Legal);
01475     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
01476     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
01477     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
01478     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
01479     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
01480     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
01481     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
01482     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
01483     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
01484     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
01485     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
01486     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
01487     setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
01488     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
01489     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
01490     setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
01491     setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
01492     setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
01493     setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
01494 
01495     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01496       const MVT VT = (MVT::SimpleValueType)i;
01497 
01498       const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01499 
01500       // Do not attempt to promote non-512-bit vectors.
01501       if (!VT.is512BitVector())
01502         continue;
01503 
01504       if (EltSize < 32) {
01505         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01506         setOperationAction(ISD::VSELECT,             VT, Legal);
01507       }
01508     }
01509   }
01510 
01511   if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
01512     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
01513     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
01514 
01515     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
01516     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
01517     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
01518     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
01519     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
01520     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
01521     setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
01522     setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
01523     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
01524     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
01525 
01526     setOperationAction(ISD::AND,                MVT::v8i32, Legal);
01527     setOperationAction(ISD::OR,                 MVT::v8i32, Legal);
01528     setOperationAction(ISD::XOR,                MVT::v8i32, Legal);
01529     setOperationAction(ISD::AND,                MVT::v4i32, Legal);
01530     setOperationAction(ISD::OR,                 MVT::v4i32, Legal);
01531     setOperationAction(ISD::XOR,                MVT::v4i32, Legal);
01532     setOperationAction(ISD::SRA,                MVT::v2i64, Custom);
01533     setOperationAction(ISD::SRA,                MVT::v4i64, Custom);
01534   }
01535 
01536   // We want to custom lower some of our intrinsics.
01537   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01538   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01539   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01540   if (!Subtarget->is64Bit())
01541     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01542 
01543   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01544   // handle type legalization for these operations here.
01545   //
01546   // FIXME: We really should do custom legalization for addition and
01547   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01548   // than generic legalization for 64-bit multiplication-with-overflow, though.
01549   for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
01550     // Add/Sub/Mul with overflow operations are custom lowered.
01551     MVT VT = IntVTs[i];
01552     setOperationAction(ISD::SADDO, VT, Custom);
01553     setOperationAction(ISD::UADDO, VT, Custom);
01554     setOperationAction(ISD::SSUBO, VT, Custom);
01555     setOperationAction(ISD::USUBO, VT, Custom);
01556     setOperationAction(ISD::SMULO, VT, Custom);
01557     setOperationAction(ISD::UMULO, VT, Custom);
01558   }
01559 
01560 
01561   if (!Subtarget->is64Bit()) {
01562     // These libcalls are not available in 32-bit.
01563     setLibcallName(RTLIB::SHL_I128, nullptr);
01564     setLibcallName(RTLIB::SRL_I128, nullptr);
01565     setLibcallName(RTLIB::SRA_I128, nullptr);
01566   }
01567 
01568   // Combine sin / cos into one node or libcall if possible.
01569   if (Subtarget->hasSinCos()) {
01570     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01571     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01572     if (Subtarget->isTargetDarwin()) {
01573       // For MacOSX, we don't want the normal expansion of a libcall to sincos.
01574       // We want to issue a libcall to __sincos_stret to avoid memory traffic.
01575       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01576       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01577     }
01578   }
01579 
01580   if (Subtarget->isTargetWin64()) {
01581     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01582     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01583     setOperationAction(ISD::SREM, MVT::i128, Custom);
01584     setOperationAction(ISD::UREM, MVT::i128, Custom);
01585     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01586     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01587   }
01588 
01589   // We have target-specific dag combine patterns for the following nodes:
01590   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01591   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01592   setTargetDAGCombine(ISD::BITCAST);
01593   setTargetDAGCombine(ISD::VSELECT);
01594   setTargetDAGCombine(ISD::SELECT);
01595   setTargetDAGCombine(ISD::SHL);
01596   setTargetDAGCombine(ISD::SRA);
01597   setTargetDAGCombine(ISD::SRL);
01598   setTargetDAGCombine(ISD::OR);
01599   setTargetDAGCombine(ISD::AND);
01600   setTargetDAGCombine(ISD::ADD);
01601   setTargetDAGCombine(ISD::FADD);
01602   setTargetDAGCombine(ISD::FSUB);
01603   setTargetDAGCombine(ISD::FMA);
01604   setTargetDAGCombine(ISD::SUB);
01605   setTargetDAGCombine(ISD::LOAD);
01606   setTargetDAGCombine(ISD::MLOAD);
01607   setTargetDAGCombine(ISD::STORE);
01608   setTargetDAGCombine(ISD::MSTORE);
01609   setTargetDAGCombine(ISD::ZERO_EXTEND);
01610   setTargetDAGCombine(ISD::ANY_EXTEND);
01611   setTargetDAGCombine(ISD::SIGN_EXTEND);
01612   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01613   setTargetDAGCombine(ISD::SINT_TO_FP);
01614   setTargetDAGCombine(ISD::SETCC);
01615   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
01616   setTargetDAGCombine(ISD::BUILD_VECTOR);
01617   setTargetDAGCombine(ISD::MUL);
01618   setTargetDAGCombine(ISD::XOR);
01619 
01620   computeRegisterProperties(Subtarget->getRegisterInfo());
01621 
01622   // On Darwin, -Os means optimize for size without hurting performance,
01623   // do not reduce the limit.
01624   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01625   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
01626   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01627   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01628   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01629   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01630   setPrefLoopAlignment(4); // 2^4 bytes.
01631 
01632   // Predictable cmov don't hurt on atom because it's in-order.
01633   PredictableSelectIsExpensive = !Subtarget->isAtom();
01634   EnableExtLdPromotion = true;
01635   setPrefFunctionAlignment(4); // 2^4 bytes.
01636 
01637   verifyIntrinsicTables();
01638 }
01639 
01640 // This has so far only been implemented for 64-bit MachO.
01641 bool X86TargetLowering::useLoadStackGuardNode() const {
01642   return Subtarget->isTargetMachO() && Subtarget->is64Bit();
01643 }
01644 
01645 TargetLoweringBase::LegalizeTypeAction
01646 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01647   if (ExperimentalVectorWideningLegalization &&
01648       VT.getVectorNumElements() != 1 &&
01649       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01650     return TypeWidenVector;
01651 
01652   return TargetLoweringBase::getPreferredVectorAction(VT);
01653 }
01654 
01655 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01656   if (!VT.isVector())
01657     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01658 
01659   const unsigned NumElts = VT.getVectorNumElements();
01660   const EVT EltVT = VT.getVectorElementType();
01661   if (VT.is512BitVector()) {
01662     if (Subtarget->hasAVX512())
01663       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01664           EltVT == MVT::f32 || EltVT == MVT::f64)
01665         switch(NumElts) {
01666         case  8: return MVT::v8i1;
01667         case 16: return MVT::v16i1;
01668       }
01669     if (Subtarget->hasBWI())
01670       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01671         switch(NumElts) {
01672         case 32: return MVT::v32i1;
01673         case 64: return MVT::v64i1;
01674       }
01675   }
01676 
01677   if (VT.is256BitVector() || VT.is128BitVector()) {
01678     if (Subtarget->hasVLX())
01679       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01680           EltVT == MVT::f32 || EltVT == MVT::f64)
01681         switch(NumElts) {
01682         case 2: return MVT::v2i1;
01683         case 4: return MVT::v4i1;
01684         case 8: return MVT::v8i1;
01685       }
01686     if (Subtarget->hasBWI() && Subtarget->hasVLX())
01687       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01688         switch(NumElts) {
01689         case  8: return MVT::v8i1;
01690         case 16: return MVT::v16i1;
01691         case 32: return MVT::v32i1;
01692       }
01693   }
01694 
01695   return VT.changeVectorElementTypeToInteger();
01696 }
01697 
01698 /// Helper for getByValTypeAlignment to determine
01699 /// the desired ByVal argument alignment.
01700 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01701   if (MaxAlign == 16)
01702     return;
01703   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01704     if (VTy->getBitWidth() == 128)
01705       MaxAlign = 16;
01706   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01707     unsigned EltAlign = 0;
01708     getMaxByValAlign(ATy->getElementType(), EltAlign);
01709     if (EltAlign > MaxAlign)
01710       MaxAlign = EltAlign;
01711   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01712     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
01713       unsigned EltAlign = 0;
01714       getMaxByValAlign(STy->getElementType(i), EltAlign);
01715       if (EltAlign > MaxAlign)
01716         MaxAlign = EltAlign;
01717       if (MaxAlign == 16)
01718         break;
01719     }
01720   }
01721 }
01722 
01723 /// Return the desired alignment for ByVal aggregate
01724 /// function arguments in the caller parameter area. For X86, aggregates
01725 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01726 /// are at 4-byte boundaries.
01727 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
01728   if (Subtarget->is64Bit()) {
01729     // Max of 8 and alignment of type.
01730     unsigned TyAlign = TD->getABITypeAlignment(Ty);
01731     if (TyAlign > 8)
01732       return TyAlign;
01733     return 8;
01734   }
01735 
01736   unsigned Align = 4;
01737   if (Subtarget->hasSSE1())
01738     getMaxByValAlign(Ty, Align);
01739   return Align;
01740 }
01741 
01742 /// Returns the target specific optimal type for load
01743 /// and store operations as a result of memset, memcpy, and memmove
01744 /// lowering. If DstAlign is zero that means it's safe to destination
01745 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01746 /// means there isn't a need to check it against alignment requirement,
01747 /// probably because the source does not need to be loaded. If 'IsMemset' is
01748 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01749 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01750 /// source is constant so it does not need to be loaded.
01751 /// It returns EVT::Other if the type should be determined using generic
01752 /// target-independent logic.
01753 EVT
01754 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01755                                        unsigned DstAlign, unsigned SrcAlign,
01756                                        bool IsMemset, bool ZeroMemset,
01757                                        bool MemcpyStrSrc,
01758                                        MachineFunction &MF) const {
01759   const Function *F = MF.getFunction();
01760   if ((!IsMemset || ZeroMemset) &&
01761       !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
01762     if (Size >= 16 &&
01763         (Subtarget->isUnalignedMemAccessFast() ||
01764          ((DstAlign == 0 || DstAlign >= 16) &&
01765           (SrcAlign == 0 || SrcAlign >= 16)))) {
01766       if (Size >= 32) {
01767         if (Subtarget->hasInt256())
01768           return MVT::v8i32;
01769         if (Subtarget->hasFp256())
01770           return MVT::v8f32;
01771       }
01772       if (Subtarget->hasSSE2())
01773         return MVT::v4i32;
01774       if (Subtarget->hasSSE1())
01775         return MVT::v4f32;
01776     } else if (!MemcpyStrSrc && Size >= 8 &&
01777                !Subtarget->is64Bit() &&
01778                Subtarget->hasSSE2()) {
01779       // Do not use f64 to lower memcpy if source is string constant. It's
01780       // better to use i32 to avoid the loads.
01781       return MVT::f64;
01782     }
01783   }
01784   if (Subtarget->is64Bit() && Size >= 8)
01785     return MVT::i64;
01786   return MVT::i32;
01787 }
01788 
01789 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
01790   if (VT == MVT::f32)
01791     return X86ScalarSSEf32;
01792   else if (VT == MVT::f64)
01793     return X86ScalarSSEf64;
01794   return true;
01795 }
01796 
01797 bool
01798 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
01799                                                   unsigned,
01800                                                   unsigned,
01801                                                   bool *Fast) const {
01802   if (Fast)
01803     *Fast = Subtarget->isUnalignedMemAccessFast();
01804   return true;
01805 }
01806 
01807 /// Return the entry encoding for a jump table in the
01808 /// current function.  The returned value is a member of the
01809 /// MachineJumpTableInfo::JTEntryKind enum.
01810 unsigned X86TargetLowering::getJumpTableEncoding() const {
01811   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
01812   // symbol.
01813   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
01814       Subtarget->isPICStyleGOT())
01815     return MachineJumpTableInfo::EK_Custom32;
01816 
01817   // Otherwise, use the normal jump table encoding heuristics.
01818   return TargetLowering::getJumpTableEncoding();
01819 }
01820 
01821 bool X86TargetLowering::useSoftFloat() const {
01822   return Subtarget->useSoftFloat();
01823 }
01824 
01825 const MCExpr *
01826 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
01827                                              const MachineBasicBlock *MBB,
01828                                              unsigned uid,MCContext &Ctx) const{
01829   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
01830          Subtarget->isPICStyleGOT());
01831   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
01832   // entries.
01833   return MCSymbolRefExpr::create(MBB->getSymbol(),
01834                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
01835 }
01836 
01837 /// Returns relocation base for the given PIC jumptable.
01838 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
01839                                                     SelectionDAG &DAG) const {
01840   if (!Subtarget->is64Bit())
01841     // This doesn't have SDLoc associated with it, but is not really the
01842     // same as a Register.
01843     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
01844   return Table;
01845 }
01846 
01847 /// This returns the relocation base for the given PIC jumptable,
01848 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
01849 const MCExpr *X86TargetLowering::
01850 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
01851                              MCContext &Ctx) const {
01852   // X86-64 uses RIP relative addressing based on the jump table label.
01853   if (Subtarget->isPICStyleRIPRel())
01854     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
01855 
01856   // Otherwise, the reference is relative to the PIC base.
01857   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
01858 }
01859 
01860 std::pair<const TargetRegisterClass *, uint8_t>
01861 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
01862                                            MVT VT) const {
01863   const TargetRegisterClass *RRC = nullptr;
01864   uint8_t Cost = 1;
01865   switch (VT.SimpleTy) {
01866   default:
01867     return TargetLowering::findRepresentativeClass(TRI, VT);
01868   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
01869     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
01870     break;
01871   case MVT::x86mmx:
01872     RRC = &X86::VR64RegClass;
01873     break;
01874   case MVT::f32: case MVT::f64:
01875   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01876   case MVT::v4f32: case MVT::v2f64:
01877   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
01878   case MVT::v4f64:
01879     RRC = &X86::VR128RegClass;
01880     break;
01881   }
01882   return std::make_pair(RRC, Cost);
01883 }
01884 
01885 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
01886                                                unsigned &Offset) const {
01887   if (!Subtarget->isTargetLinux())
01888     return false;
01889 
01890   if (Subtarget->is64Bit()) {
01891     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
01892     Offset = 0x28;
01893     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
01894       AddressSpace = 256;
01895     else
01896       AddressSpace = 257;
01897   } else {
01898     // %gs:0x14 on i386
01899     Offset = 0x14;
01900     AddressSpace = 256;
01901   }
01902   return true;
01903 }
01904 
01905 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
01906                                             unsigned DestAS) const {
01907   assert(SrcAS != DestAS && "Expected different address spaces!");
01908 
01909   return SrcAS < 256 && DestAS < 256;
01910 }
01911 
01912 //===----------------------------------------------------------------------===//
01913 //               Return Value Calling Convention Implementation
01914 //===----------------------------------------------------------------------===//
01915 
01916 #include "X86GenCallingConv.inc"
01917 
01918 bool
01919 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
01920                                   MachineFunction &MF, bool isVarArg,
01921                         const SmallVectorImpl<ISD::OutputArg> &Outs,
01922                         LLVMContext &Context) const {
01923   SmallVector<CCValAssign, 16> RVLocs;
01924   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
01925   return CCInfo.CheckReturn(Outs, RetCC_X86);
01926 }
01927 
01928 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
01929   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
01930   return ScratchRegs;
01931 }
01932 
01933 SDValue
01934 X86TargetLowering::LowerReturn(SDValue Chain,
01935                                CallingConv::ID CallConv, bool isVarArg,
01936                                const SmallVectorImpl<ISD::OutputArg> &Outs,
01937                                const SmallVectorImpl<SDValue> &OutVals,
01938                                SDLoc dl, SelectionDAG &DAG) const {
01939   MachineFunction &MF = DAG.getMachineFunction();
01940   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
01941 
01942   SmallVector<CCValAssign, 16> RVLocs;
01943   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
01944   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
01945 
01946   SDValue Flag;
01947   SmallVector<SDValue, 6> RetOps;
01948   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
01949   // Operand #1 = Bytes To Pop
01950   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
01951                    MVT::i16));
01952 
01953   // Copy the result values into the output registers.
01954   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01955     CCValAssign &VA = RVLocs[i];
01956     assert(VA.isRegLoc() && "Can only return in registers!");
01957     SDValue ValToCopy = OutVals[i];
01958     EVT ValVT = ValToCopy.getValueType();
01959 
01960     // Promote values to the appropriate types.
01961     if (VA.getLocInfo() == CCValAssign::SExt)
01962       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
01963     else if (VA.getLocInfo() == CCValAssign::ZExt)
01964       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
01965     else if (VA.getLocInfo() == CCValAssign::AExt) {
01966       if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
01967         ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
01968       else
01969         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
01970     }
01971     else if (VA.getLocInfo() == CCValAssign::BCvt)
01972       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
01973 
01974     assert(VA.getLocInfo() != CCValAssign::FPExt &&
01975            "Unexpected FP-extend for return value.");
01976 
01977     // If this is x86-64, and we disabled SSE, we can't return FP values,
01978     // or SSE or MMX vectors.
01979     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
01980          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
01981           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
01982       report_fatal_error("SSE register return with SSE disabled");
01983     }
01984     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
01985     // llvm-gcc has never done it right and no one has noticed, so this
01986     // should be OK for now.
01987     if (ValVT == MVT::f64 &&
01988         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
01989       report_fatal_error("SSE2 register return with SSE2 disabled");
01990 
01991     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
01992     // the RET instruction and handled by the FP Stackifier.
01993     if (VA.getLocReg() == X86::FP0 ||
01994         VA.getLocReg() == X86::FP1) {
01995       // If this is a copy from an xmm register to ST(0), use an FPExtend to
01996       // change the value to the FP stack register class.
01997       if (isScalarFPTypeInSSEReg(VA.getValVT()))
01998         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
01999       RetOps.push_back(ValToCopy);
02000       // Don't emit a copytoreg.
02001       continue;
02002     }
02003 
02004     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
02005     // which is returned in RAX / RDX.
02006     if (Subtarget->is64Bit()) {
02007       if (ValVT == MVT::x86mmx) {
02008         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
02009           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
02010           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
02011                                   ValToCopy);
02012           // If we don't have SSE2 available, convert to v4f32 so the generated
02013           // register is legal.
02014           if (!Subtarget->hasSSE2())
02015             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
02016         }
02017       }
02018     }
02019 
02020     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
02021     Flag = Chain.getValue(1);
02022     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02023   }
02024 
02025   // All x86 ABIs require that for returning structs by value we copy
02026   // the sret argument into %rax/%eax (depending on ABI) for the return.
02027   // We saved the argument into a virtual register in the entry block,
02028   // so now we copy the value out and into %rax/%eax.
02029   //
02030   // Checking Function.hasStructRetAttr() here is insufficient because the IR
02031   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
02032   // false, then an sret argument may be implicitly inserted in the SelDAG. In
02033   // either case FuncInfo->setSRetReturnReg() will have been called.
02034   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
02035     SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
02036 
02037     unsigned RetValReg
02038         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02039           X86::RAX : X86::EAX;
02040     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02041     Flag = Chain.getValue(1);
02042 
02043     // RAX/EAX now acts like a return value.
02044     RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
02045   }
02046 
02047   RetOps[0] = Chain;  // Update chain.
02048 
02049   // Add the flag if we have it.
02050   if (Flag.getNode())
02051     RetOps.push_back(Flag);
02052 
02053   return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
02054 }
02055 
02056 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02057   if (N->getNumValues() != 1)
02058     return false;
02059   if (!N->hasNUsesOfValue(1, 0))
02060     return false;
02061 
02062   SDValue TCChain = Chain;
02063   SDNode *Copy = *N->use_begin();
02064   if (Copy->getOpcode() == ISD::CopyToReg) {
02065     // If the copy has a glue operand, we conservatively assume it isn't safe to
02066     // perform a tail call.
02067     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02068       return false;
02069     TCChain = Copy->getOperand(0);
02070   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02071     return false;
02072 
02073   bool HasRet = false;
02074   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02075        UI != UE; ++UI) {
02076     if (UI->getOpcode() != X86ISD::RET_FLAG)
02077       return false;
02078     // If we are returning more than one value, we can definitely
02079     // not make a tail call see PR19530
02080     if (UI->getNumOperands() > 4)
02081       return false;
02082     if (UI->getNumOperands() == 4 &&
02083         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
02084       return false;
02085     HasRet = true;
02086   }
02087 
02088   if (!HasRet)
02089     return false;
02090 
02091   Chain = TCChain;
02092   return true;
02093 }
02094 
02095 EVT
02096 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02097                                             ISD::NodeType ExtendKind) const {
02098   MVT ReturnMVT;
02099   // TODO: Is this also valid on 32-bit?
02100   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02101     ReturnMVT = MVT::i8;
02102   else
02103     ReturnMVT = MVT::i32;
02104 
02105   EVT MinVT = getRegisterType(Context, ReturnMVT);
02106   return VT.bitsLT(MinVT) ? MinVT : VT;
02107 }
02108 
02109 /// Lower the result values of a call into the
02110 /// appropriate copies out of appropriate physical registers.
02111 ///
02112 SDValue
02113 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02114                                    CallingConv::ID CallConv, bool isVarArg,
02115                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02116                                    SDLoc dl, SelectionDAG &DAG,
02117                                    SmallVectorImpl<SDValue> &InVals) const {
02118 
02119   // Assign locations to each value returned by this call.
02120   SmallVector<CCValAssign, 16> RVLocs;
02121   bool Is64Bit = Subtarget->is64Bit();
02122   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02123                  *DAG.getContext());
02124   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02125 
02126   // Copy all of the result registers out of their specified physreg.
02127   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02128     CCValAssign &VA = RVLocs[i];
02129     EVT CopyVT = VA.getLocVT();
02130 
02131     // If this is x86-64, and we disabled SSE, we can't return FP values
02132     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02133         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02134       report_fatal_error("SSE register return with SSE disabled");
02135     }
02136 
02137     // If we prefer to use the value in xmm registers, copy it out as f80 and
02138     // use a truncate to move it from fp stack reg to xmm reg.
02139     bool RoundAfterCopy = false;
02140     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02141         isScalarFPTypeInSSEReg(VA.getValVT())) {
02142       CopyVT = MVT::f80;
02143       RoundAfterCopy = (CopyVT != VA.getLocVT());
02144     }
02145 
02146     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02147                                CopyVT, InFlag).getValue(1);
02148     SDValue Val = Chain.getValue(0);
02149 
02150     if (RoundAfterCopy)
02151       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02152                         // This truncation won't change the value.
02153                         DAG.getIntPtrConstant(1, dl));
02154 
02155     if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
02156       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
02157 
02158     InFlag = Chain.getValue(2);
02159     InVals.push_back(Val);
02160   }
02161 
02162   return Chain;
02163 }
02164 
02165 //===----------------------------------------------------------------------===//
02166 //                C & StdCall & Fast Calling Convention implementation
02167 //===----------------------------------------------------------------------===//
02168 //  StdCall calling convention seems to be standard for many Windows' API
02169 //  routines and around. It differs from C calling convention just a little:
02170 //  callee should clean up the stack, not caller. Symbols should be also
02171 //  decorated in some fancy way :) It doesn't support any vector arguments.
02172 //  For info on fast calling convention see Fast Calling Convention (tail call)
02173 //  implementation LowerX86_32FastCCCallTo.
02174 
02175 /// CallIsStructReturn - Determines whether a call uses struct return
02176 /// semantics.
02177 enum StructReturnType {
02178   NotStructReturn,
02179   RegStructReturn,
02180   StackStructReturn
02181 };
02182 static StructReturnType
02183 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
02184   if (Outs.empty())
02185     return NotStructReturn;
02186 
02187   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02188   if (!Flags.isSRet())
02189     return NotStructReturn;
02190   if (Flags.isInReg())
02191     return RegStructReturn;
02192   return StackStructReturn;
02193 }
02194 
02195 /// Determines whether a function uses struct return semantics.
02196 static StructReturnType
02197 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
02198   if (Ins.empty())
02199     return NotStructReturn;
02200 
02201   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02202   if (!Flags.isSRet())
02203     return NotStructReturn;
02204   if (Flags.isInReg())
02205     return RegStructReturn;
02206   return StackStructReturn;
02207 }
02208 
02209 /// Make a copy of an aggregate at address specified by "Src" to address
02210 /// "Dst" with size and alignment information specified by the specific
02211 /// parameter attribute. The copy will be passed as a byval function parameter.
02212 static SDValue
02213 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02214                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02215                           SDLoc dl) {
02216   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
02217 
02218   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02219                        /*isVolatile*/false, /*AlwaysInline=*/true,
02220                        /*isTailCall*/false,
02221                        MachinePointerInfo(), MachinePointerInfo());
02222 }
02223 
02224 /// Return true if the calling convention is one that
02225 /// supports tail call optimization.
02226 static bool IsTailCallConvention(CallingConv::ID CC) {
02227   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02228           CC == CallingConv::HiPE);
02229 }
02230 
02231 /// \brief Return true if the calling convention is a C calling convention.
02232 static bool IsCCallConvention(CallingConv::ID CC) {
02233   return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
02234           CC == CallingConv::X86_64_SysV);
02235 }
02236 
02237 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02238   auto Attr =
02239       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
02240   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
02241     return false;
02242 
02243   CallSite CS(CI);
02244   CallingConv::ID CalleeCC = CS.getCallingConv();
02245   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
02246     return false;
02247 
02248   return true;
02249 }
02250 
02251 /// Return true if the function is being made into
02252 /// a tailcall target by changing its ABI.
02253 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
02254                                    bool GuaranteedTailCallOpt) {
02255   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
02256 }
02257 
02258 SDValue
02259 X86TargetLowering::LowerMemArgument(SDValue Chain,
02260                                     CallingConv::ID CallConv,
02261                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02262                                     SDLoc dl, SelectionDAG &DAG,
02263                                     const CCValAssign &VA,
02264                                     MachineFrameInfo *MFI,
02265                                     unsigned i) const {
02266   // Create the nodes corresponding to a load from this parameter slot.
02267   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02268   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
02269       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02270   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02271   EVT ValVT;
02272 
02273   // If value is passed by pointer we have address passed instead of the value
02274   // itself.
02275   bool ExtendedInMem = VA.isExtInLoc() &&
02276     VA.getValVT().getScalarType() == MVT::i1;
02277 
02278   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
02279     ValVT = VA.getLocVT();
02280   else
02281     ValVT = VA.getValVT();
02282 
02283   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02284   // changed with more analysis.
02285   // In case of tail call optimization mark all arguments mutable. Since they
02286   // could be overwritten by lowering of arguments in case of a tail call.
02287   if (Flags.isByVal()) {
02288     unsigned Bytes = Flags.getByValSize();
02289     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02290     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02291     return DAG.getFrameIndex(FI, getPointerTy());
02292   } else {
02293     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02294                                     VA.getLocMemOffset(), isImmutable);
02295     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02296     SDValue Val =  DAG.getLoad(ValVT, dl, Chain, FIN,
02297                                MachinePointerInfo::getFixedStack(FI),
02298                                false, false, false, 0);
02299     return ExtendedInMem ?
02300       DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
02301   }
02302 }
02303 
02304 // FIXME: Get this from tablegen.
02305 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
02306                                                 const X86Subtarget *Subtarget) {
02307   assert(Subtarget->is64Bit());
02308 
02309   if (Subtarget->isCallingConvWin64(CallConv)) {
02310     static const MCPhysReg GPR64ArgRegsWin64[] = {
02311       X86::RCX, X86::RDX, X86::R8,  X86::R9
02312     };
02313     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
02314   }
02315 
02316   static const MCPhysReg GPR64ArgRegs64Bit[] = {
02317     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02318   };
02319   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
02320 }
02321 
02322 // FIXME: Get this from tablegen.
02323 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
02324                                                 CallingConv::ID CallConv,
02325                                                 const X86Subtarget *Subtarget) {
02326   assert(Subtarget->is64Bit());
02327   if (Subtarget->isCallingConvWin64(CallConv)) {
02328     // The XMM registers which might contain var arg parameters are shadowed
02329     // in their paired GPR.  So we only need to save the GPR to their home
02330     // slots.
02331     // TODO: __vectorcall will change this.
02332     return None;
02333   }
02334 
02335   const Function *Fn = MF.getFunction();
02336   bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
02337   bool isSoftFloat = Subtarget->useSoftFloat();
02338   assert(!(isSoftFloat && NoImplicitFloatOps) &&
02339          "SSE register cannot be used when SSE is disabled!");
02340   if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
02341     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
02342     // registers.
02343     return None;
02344 
02345   static const MCPhysReg XMMArgRegs64Bit[] = {
02346     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02347     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02348   };
02349   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
02350 }
02351 
02352 SDValue
02353 X86TargetLowering::LowerFormalArguments(SDValue Chain,
02354                                         CallingConv::ID CallConv,
02355                                         bool isVarArg,
02356                                       const SmallVectorImpl<ISD::InputArg> &Ins,
02357                                         SDLoc dl,
02358                                         SelectionDAG &DAG,
02359                                         SmallVectorImpl<SDValue> &InVals)
02360                                           const {
02361   MachineFunction &MF = DAG.getMachineFunction();
02362   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02363   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
02364 
02365   const Function* Fn = MF.getFunction();
02366   if (Fn->hasExternalLinkage() &&
02367       Subtarget->isTargetCygMing() &&
02368       Fn->getName() == "main")
02369     FuncInfo->setForceFramePointer(true);
02370 
02371   MachineFrameInfo *MFI = MF.getFrameInfo();
02372   bool Is64Bit = Subtarget->is64Bit();
02373   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02374 
02375   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02376          "Var args not supported with calling convention fastcc, ghc or hipe");
02377 
02378   // Assign locations to all of the incoming arguments.
02379   SmallVector<CCValAssign, 16> ArgLocs;
02380   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02381 
02382   // Allocate shadow area for Win64
02383   if (IsWin64)
02384     CCInfo.AllocateStack(32, 8);
02385 
02386   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02387 
02388   unsigned LastVal = ~0U;
02389   SDValue ArgValue;
02390   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02391     CCValAssign &VA = ArgLocs[i];
02392     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02393     // places.
02394     assert(VA.getValNo() != LastVal &&
02395            "Don't support value assigned to multiple locs yet");
02396     (void)LastVal;
02397     LastVal = VA.getValNo();
02398 
02399     if (VA.isRegLoc()) {
02400       EVT RegVT = VA.getLocVT();
02401       const TargetRegisterClass *RC;
02402       if (RegVT == MVT::i32)
02403         RC = &X86::GR32RegClass;
02404       else if (Is64Bit && RegVT == MVT::i64)
02405         RC = &X86::GR64RegClass;
02406       else if (RegVT == MVT::f32)
02407         RC = &X86::FR32RegClass;
02408       else if (RegVT == MVT::f64)
02409         RC = &X86::FR64RegClass;
02410       else if (RegVT.is512BitVector())
02411         RC = &X86::VR512RegClass;
02412       else if (RegVT.is256BitVector())
02413         RC = &X86::VR256RegClass;
02414       else if (RegVT.is128BitVector())
02415         RC = &X86::VR128RegClass;
02416       else if (RegVT == MVT::x86mmx)
02417         RC = &X86::VR64RegClass;
02418       else if (RegVT == MVT::i1)
02419         RC = &X86::VK1RegClass;
02420       else if (RegVT == MVT::v8i1)
02421         RC = &X86::VK8RegClass;
02422       else if (RegVT == MVT::v16i1)
02423         RC = &X86::VK16RegClass;
02424       else if (RegVT == MVT::v32i1)
02425         RC = &X86::VK32RegClass;
02426       else if (RegVT == MVT::v64i1)
02427         RC = &X86::VK64RegClass;
02428       else
02429         llvm_unreachable("Unknown argument type!");
02430 
02431       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02432       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02433 
02434       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02435       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02436       // right size.
02437       if (VA.getLocInfo() == CCValAssign::SExt)
02438         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02439                                DAG.getValueType(VA.getValVT()));
02440       else if (VA.getLocInfo() == CCValAssign::ZExt)
02441         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02442                                DAG.getValueType(VA.getValVT()));
02443       else if (VA.getLocInfo() == CCValAssign::BCvt)
02444         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
02445 
02446       if (VA.isExtInLoc()) {
02447         // Handle MMX values passed in XMM regs.
02448         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
02449           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02450         else
02451           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02452       }
02453     } else {
02454       assert(VA.isMemLoc());
02455       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02456     }
02457 
02458     // If value is passed via pointer - do a load.
02459     if (VA.getLocInfo() == CCValAssign::Indirect)
02460       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02461                              MachinePointerInfo(), false, false, false, 0);
02462 
02463     InVals.push_back(ArgValue);
02464   }
02465 
02466   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02467     // All x86 ABIs require that for returning structs by value we copy the
02468     // sret argument into %rax/%eax (depending on ABI) for the return. Save
02469     // the argument into a virtual register so that we can access it from the
02470     // return points.
02471     if (Ins[i].Flags.isSRet()) {
02472       unsigned Reg = FuncInfo->getSRetReturnReg();
02473       if (!Reg) {
02474         MVT PtrTy = getPointerTy();
02475         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02476         FuncInfo->setSRetReturnReg(Reg);
02477       }
02478       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02479       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02480       break;
02481     }
02482   }
02483 
02484   unsigned StackSize = CCInfo.getNextStackOffset();
02485   // Align stack specially for tail calls.
02486   if (FuncIsMadeTailCallSafe(CallConv,
02487                              MF.getTarget().Options.GuaranteedTailCallOpt))
02488     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02489 
02490   // If the function takes variable number of arguments, make a frame index for
02491   // the start of the first vararg value... for expansion of llvm.va_start. We
02492   // can skip this if there are no va_start calls.
02493   if (MFI->hasVAStart() &&
02494       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02495                    CallConv != CallingConv::X86_ThisCall))) {
02496     FuncInfo->setVarArgsFrameIndex(
02497         MFI->CreateFixedObject(1, StackSize, true));
02498   }
02499 
02500   MachineModuleInfo &MMI = MF.getMMI();
02501   const Function *WinEHParent = nullptr;
02502   if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
02503     WinEHParent = MMI.getWinEHParent(Fn);
02504   bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
02505   bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
02506 
02507   // Figure out if XMM registers are in use.
02508   assert(!(Subtarget->useSoftFloat() &&
02509            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
02510          "SSE register cannot be used when SSE is disabled!");
02511 
02512   // 64-bit calling conventions support varargs and register parameters, so we
02513   // have to do extra work to spill them in the prologue.
02514   if (Is64Bit && isVarArg && MFI->hasVAStart()) {
02515     // Find the first unallocated argument registers.
02516     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
02517     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
02518     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
02519     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
02520     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02521            "SSE register cannot be used when SSE is disabled!");
02522 
02523     // Gather all the live in physical registers.
02524     SmallVector<SDValue, 6> LiveGPRs;
02525     SmallVector<SDValue, 8> LiveXMMRegs;
02526     SDValue ALVal;
02527     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
02528       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
02529       LiveGPRs.push_back(
02530           DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
02531     }
02532     if (!ArgXMMs.empty()) {
02533       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02534       ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
02535       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
02536         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
02537         LiveXMMRegs.push_back(
02538             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
02539       }
02540     }
02541 
02542     if (IsWin64) {
02543       // Get to the caller-allocated home save location.  Add 8 to account
02544       // for the return address.
02545       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02546       FuncInfo->setRegSaveFrameIndex(
02547           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02548       // Fixup to set vararg frame on shadow area (4 x i64).
02549       if (NumIntRegs < 4)
02550         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02551     } else {
02552       // For X86-64, if there are vararg parameters that are passed via
02553       // registers, then we must store them to their spots on the stack so
02554       // they may be loaded by deferencing the result of va_next.
02555       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02556       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
02557       FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
02558           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
02559     }
02560 
02561     // Store the integer parameter registers.
02562     SmallVector<SDValue, 8> MemOps;
02563     SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02564                                       getPointerTy());
02565     unsigned Offset = FuncInfo->getVarArgsGPOffset();
02566     for (SDValue Val : LiveGPRs) {
02567       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
02568                                 DAG.getIntPtrConstant(Offset, dl));
02569       SDValue Store =
02570         DAG.getStore(Val.getValue(1), dl, Val, FIN,
02571                      MachinePointerInfo::getFixedStack(
02572                        FuncInfo->getRegSaveFrameIndex(), Offset),
02573                      false, false, 0);
02574       MemOps.push_back(Store);
02575       Offset += 8;
02576     }
02577 
02578     if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
02579       // Now store the XMM (fp + vector) parameter registers.
02580       SmallVector<SDValue, 12> SaveXMMOps;
02581       SaveXMMOps.push_back(Chain);
02582       SaveXMMOps.push_back(ALVal);
02583       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02584                              FuncInfo->getRegSaveFrameIndex(), dl));
02585       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02586                              FuncInfo->getVarArgsFPOffset(), dl));
02587       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
02588                         LiveXMMRegs.end());
02589       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02590                                    MVT::Other, SaveXMMOps));
02591     }
02592 
02593     if (!MemOps.empty())
02594       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02595   } else if (IsWinEHOutlined) {
02596     // Get to the caller-allocated home save location.  Add 8 to account
02597     // for the return address.
02598     int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02599     FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
02600         /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
02601 
02602     MMI.getWinEHFuncInfo(Fn)
02603         .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
02604         FuncInfo->getRegSaveFrameIndex();
02605 
02606     // Store the second integer parameter (rdx) into rsp+16 relative to the
02607     // stack pointer at the entry of the function.
02608     SDValue RSFIN =
02609         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
02610     unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
02611     SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
02612     Chain = DAG.getStore(
02613         Val.getValue(1), dl, Val, RSFIN,
02614         MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
02615         /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
02616   }
02617 
02618   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
02619     // Find the largest legal vector type.
02620     MVT VecVT = MVT::Other;
02621     // FIXME: Only some x86_32 calling conventions support AVX512.
02622     if (Subtarget->hasAVX512() &&
02623         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
02624                      CallConv == CallingConv::Intel_OCL_BI)))
02625       VecVT = MVT::v16f32;
02626     else if (Subtarget->hasAVX())
02627       VecVT = MVT::v8f32;
02628     else if (Subtarget->hasSSE2())
02629       VecVT = MVT::v4f32;
02630 
02631     // We forward some GPRs and some vector types.
02632     SmallVector<MVT, 2> RegParmTypes;
02633     MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
02634     RegParmTypes.push_back(IntVT);
02635     if (VecVT != MVT::Other)
02636       RegParmTypes.push_back(VecVT);
02637 
02638     // Compute the set of forwarded registers. The rest are scratch.
02639     SmallVectorImpl<ForwardedRegister> &Forwards =
02640         FuncInfo->getForwardedMustTailRegParms();
02641     CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
02642 
02643     // Conservatively forward AL on x86_64, since it might be used for varargs.
02644     if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
02645       unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02646       Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
02647     }
02648 
02649     // Copy all forwards from physical to virtual registers.
02650     for (ForwardedRegister &F : Forwards) {
02651       // FIXME: Can we use a less constrained schedule?
02652       SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02653       F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
02654       Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
02655     }
02656   }
02657 
02658   // Some CCs need callee pop.
02659   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02660                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02661     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02662   } else {
02663     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02664     // If this is an sret function, the return should pop the hidden pointer.
02665     if (!Is64Bit && !IsTailCallConvention(CallConv) &&
02666         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02667         argsAreStructReturn(Ins) == StackStructReturn)
02668       FuncInfo->setBytesToPopOnReturn(4);
02669   }
02670 
02671   if (!Is64Bit) {
02672     // RegSaveFrameIndex is X86-64 only.
02673     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02674     if (CallConv == CallingConv::X86_FastCall ||
02675         CallConv == CallingConv::X86_ThisCall)
02676       // fastcc functions can't have varargs.
02677       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02678   }
02679 
02680   FuncInfo->setArgumentStackSize(StackSize);
02681 
02682   if (IsWinEHParent) {
02683     int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
02684     SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
02685     MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
02686     SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
02687     Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
02688                          MachinePointerInfo::getFixedStack(UnwindHelpFI),
02689                          /*isVolatile=*/true,
02690                          /*isNonTemporal=*/false, /*Alignment=*/0);
02691   }
02692 
02693   return Chain;
02694 }
02695 
02696 SDValue
02697 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
02698                                     SDValue StackPtr, SDValue Arg,
02699                                     SDLoc dl, SelectionDAG &DAG,
02700                                     const CCValAssign &VA,
02701                                     ISD::ArgFlagsTy Flags) const {
02702   unsigned LocMemOffset = VA.getLocMemOffset();
02703   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
02704   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
02705   if (Flags.isByVal())
02706     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
02707 
02708   return DAG.getStore(Chain, dl, Arg, PtrOff,
02709                       MachinePointerInfo::getStack(LocMemOffset),
02710                       false, false, 0);
02711 }
02712 
02713 /// Emit a load of return address if tail call
02714 /// optimization is performed and it is required.
02715 SDValue
02716 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
02717                                            SDValue &OutRetAddr, SDValue Chain,
02718                                            bool IsTailCall, bool Is64Bit,
02719                                            int FPDiff, SDLoc dl) const {
02720   // Adjust the Return address stack slot.
02721   EVT VT = getPointerTy();
02722   OutRetAddr = getReturnAddressFrameIndex(DAG);
02723 
02724   // Load the "old" Return address.
02725   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
02726                            false, false, false, 0);
02727   return SDValue(OutRetAddr.getNode(), 1);
02728 }
02729 
02730 /// Emit a store of the return address if tail call
02731 /// optimization is performed and it is required (FPDiff!=0).
02732 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
02733                                         SDValue Chain, SDValue RetAddrFrIdx,
02734                                         EVT PtrVT, unsigned SlotSize,
02735                                         int FPDiff, SDLoc dl) {
02736   // Store the return address to the appropriate stack slot.
02737   if (!FPDiff) return Chain;
02738   // Calculate the new stack slot for the return address.
02739   int NewReturnAddrFI =
02740     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
02741                                          false);
02742   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
02743   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
02744                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
02745                        false, false, 0);
02746   return Chain;
02747 }
02748 
02749 SDValue
02750 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
02751                              SmallVectorImpl<SDValue> &InVals) const {
02752   SelectionDAG &DAG                     = CLI.DAG;
02753   SDLoc &dl                             = CLI.DL;
02754   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
02755   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
02756   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
02757   SDValue Chain                         = CLI.Chain;
02758   SDValue Callee                        = CLI.Callee;
02759   CallingConv::ID CallConv              = CLI.CallConv;
02760   bool &isTailCall                      = CLI.IsTailCall;
02761   bool isVarArg                         = CLI.IsVarArg;
02762 
02763   MachineFunction &MF = DAG.getMachineFunction();
02764   bool Is64Bit        = Subtarget->is64Bit();
02765   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
02766   StructReturnType SR = callIsStructReturn(Outs);
02767   bool IsSibcall      = false;
02768   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
02769   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
02770 
02771   if (Attr.getValueAsString() == "true")
02772     isTailCall = false;
02773 
02774   if (Subtarget->isPICStyleGOT() &&
02775       !MF.getTarget().Options.GuaranteedTailCallOpt) {
02776     // If we are using a GOT, disable tail calls to external symbols with
02777     // default visibility. Tail calling such a symbol requires using a GOT
02778     // relocation, which forces early binding of the symbol. This breaks code
02779     // that require lazy function symbol resolution. Using musttail or
02780     // GuaranteedTailCallOpt will override this.
02781     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02782     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
02783                G->getGlobal()->hasDefaultVisibility()))
02784       isTailCall = false;
02785   }
02786 
02787   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
02788   if (IsMustTail) {
02789     // Force this to be a tail call.  The verifier rules are enough to ensure
02790     // that we can lower this successfully without moving the return address
02791     // around.
02792     isTailCall = true;
02793   } else if (isTailCall) {
02794     // Check if it's really possible to do a tail call.
02795     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
02796                     isVarArg, SR != NotStructReturn,
02797                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
02798                     Outs, OutVals, Ins, DAG);
02799 
02800     // Sibcalls are automatically detected tailcalls which do not require
02801     // ABI changes.
02802     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
02803       IsSibcall = true;
02804 
02805     if (isTailCall)
02806       ++NumTailCalls;
02807   }
02808 
02809   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02810          "Var args not supported with calling convention fastcc, ghc or hipe");
02811 
02812   // Analyze operands of the call, assigning locations to each operand.
02813   SmallVector<CCValAssign, 16> ArgLocs;
02814   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02815 
02816   // Allocate shadow area for Win64
02817   if (IsWin64)
02818     CCInfo.AllocateStack(32, 8);
02819 
02820   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
02821 
02822   // Get a count of how many bytes are to be pushed on the stack.
02823   unsigned NumBytes = CCInfo.getNextStackOffset();
02824   if (IsSibcall)
02825     // This is a sibcall. The memory operands are available in caller's
02826     // own caller's stack.
02827     NumBytes = 0;
02828   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
02829            IsTailCallConvention(CallConv))
02830     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
02831 
02832   int FPDiff = 0;
02833   if (isTailCall && !IsSibcall && !IsMustTail) {
02834     // Lower arguments at fp - stackoffset + fpdiff.
02835     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
02836 
02837     FPDiff = NumBytesCallerPushed - NumBytes;
02838 
02839     // Set the delta of movement of the returnaddr stackslot.
02840     // But only set if delta is greater than previous delta.
02841     if (FPDiff < X86Info->getTCReturnAddrDelta())
02842       X86Info->setTCReturnAddrDelta(FPDiff);
02843   }
02844 
02845   unsigned NumBytesToPush = NumBytes;
02846   unsigned NumBytesToPop = NumBytes;
02847 
02848   // If we have an inalloca argument, all stack space has already been allocated
02849   // for us and be right at the top of the stack.  We don't support multiple
02850   // arguments passed in memory when using inalloca.
02851   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
02852     NumBytesToPush = 0;
02853     if (!ArgLocs.back().isMemLoc())
02854       report_fatal_error("cannot use inalloca attribute on a register "
02855                          "parameter");
02856     if (ArgLocs.back().getLocMemOffset() != 0)
02857       report_fatal_error("any parameter with the inalloca attribute must be "
02858                          "the only memory argument");
02859   }
02860 
02861   if (!IsSibcall)
02862     Chain = DAG.getCALLSEQ_START(
02863         Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
02864 
02865   SDValue RetAddrFrIdx;
02866   // Load return address for tail calls.
02867   if (isTailCall && FPDiff)
02868     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
02869                                     Is64Bit, FPDiff, dl);
02870 
02871   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
02872   SmallVector<SDValue, 8> MemOpChains;
02873   SDValue StackPtr;
02874 
02875   // Walk the register/memloc assignments, inserting copies/loads.  In the case
02876   // of tail call optimization arguments are handle later.
02877   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
02878   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02879     // Skip inalloca arguments, they have already been written.
02880     ISD::ArgFlagsTy Flags = Outs[i].Flags;
02881     if (Flags.isInAlloca())
02882       continue;
02883 
02884     CCValAssign &VA = ArgLocs[i];
02885     EVT RegVT = VA.getLocVT();
02886     SDValue Arg = OutVals[i];
02887     bool isByVal = Flags.isByVal();
02888 
02889     // Promote the value if needed.
02890     switch (VA.getLocInfo()) {
02891     default: llvm_unreachable("Unknown loc info!");
02892     case CCValAssign::Full: break;
02893     case CCValAssign::SExt:
02894       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02895       break;
02896     case CCValAssign::ZExt:
02897       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
02898       break;
02899     case CCValAssign::AExt:
02900       if (Arg.getValueType().isVector() &&
02901           Arg.getValueType().getScalarType() == MVT::i1)
02902         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02903       else if (RegVT.is128BitVector()) {
02904         // Special case: passing MMX values in XMM registers.
02905         Arg = DAG.getBitcast(MVT::i64, Arg);
02906         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
02907         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
02908       } else
02909         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
02910       break;
02911     case CCValAssign::BCvt:
02912       Arg = DAG.getBitcast(RegVT, Arg);
02913       break;
02914     case CCValAssign::Indirect: {
02915       // Store the argument.
02916       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
02917       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
02918       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
02919                            MachinePointerInfo::getFixedStack(FI),
02920                            false, false, 0);
02921       Arg = SpillSlot;
02922       break;
02923     }
02924     }
02925 
02926     if (VA.isRegLoc()) {
02927       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
02928       if (isVarArg && IsWin64) {
02929         // Win64 ABI requires argument XMM reg to be copied to the corresponding
02930         // shadow reg if callee is a varargs function.
02931         unsigned ShadowReg = 0;
02932         switch (VA.getLocReg()) {
02933         case X86::XMM0: ShadowReg = X86::RCX; break;
02934         case X86::XMM1: ShadowReg = X86::RDX; break;
02935         case X86::XMM2: ShadowReg = X86::R8; break;
02936         case X86::XMM3: ShadowReg = X86::R9; break;
02937         }
02938         if (ShadowReg)
02939           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
02940       }
02941     } else if (!IsSibcall && (!isTailCall || isByVal)) {
02942       assert(VA.isMemLoc());
02943       if (!StackPtr.getNode())
02944         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
02945                                       getPointerTy());
02946       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
02947                                              dl, DAG, VA, Flags));
02948     }
02949   }
02950 
02951   if (!MemOpChains.empty())
02952     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
02953 
02954   if (Subtarget->isPICStyleGOT()) {
02955     // ELF / PIC requires GOT in the EBX register before function calls via PLT
02956     // GOT pointer.
02957     if (!isTailCall) {
02958       RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
02959                DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
02960     } else {
02961       // If we are tail calling and generating PIC/GOT style code load the
02962       // address of the callee into ECX. The value in ecx is used as target of
02963       // the tail jump. This is done to circumvent the ebx/callee-saved problem
02964       // for tail calls on PIC/GOT architectures. Normally we would just put the
02965       // address of GOT into ebx and then call target@PLT. But for tail calls
02966       // ebx would be restored (since ebx is callee saved) before jumping to the
02967       // target@PLT.
02968 
02969       // Note: The actual moving to ECX is done further down.
02970       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02971       if (G && !G->getGlobal()->hasLocalLinkage() &&
02972           G->getGlobal()->hasDefaultVisibility())
02973         Callee = LowerGlobalAddress(Callee, DAG);
02974       else if (isa<ExternalSymbolSDNode>(Callee))
02975         Callee = LowerExternalSymbol(Callee, DAG);
02976     }
02977   }
02978 
02979   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
02980     // From AMD64 ABI document:
02981     // For calls that may call functions that use varargs or stdargs
02982     // (prototype-less calls or calls to functions containing ellipsis (...) in
02983     // the declaration) %al is used as hidden argument to specify the number
02984     // of SSE registers used. The contents of %al do not need to match exactly
02985     // the number of registers, but must be an ubound on the number of SSE
02986     // registers used and is in the range 0 - 8 inclusive.
02987 
02988     // Count the number of XMM registers allocated.
02989     static const MCPhysReg XMMArgRegs[] = {
02990       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02991       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02992     };
02993     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
02994     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02995            && "SSE registers cannot be used when SSE is disabled");
02996 
02997     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
02998                                         DAG.getConstant(NumXMMRegs, dl,
02999                                                         MVT::i8)));
03000   }
03001 
03002   if (isVarArg && IsMustTail) {
03003     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
03004     for (const auto &F : Forwards) {
03005       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
03006       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
03007     }
03008   }
03009 
03010   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
03011   // don't need this because the eligibility check rejects calls that require
03012   // shuffling arguments passed in memory.
03013   if (!IsSibcall && isTailCall) {
03014     // Force all the incoming stack arguments to be loaded from the stack
03015     // before any new outgoing arguments are stored to the stack, because the
03016     // outgoing stack slots may alias the incoming argument stack slots, and
03017     // the alias isn't otherwise explicit. This is slightly more conservative
03018     // than necessary, because it means that each store effectively depends
03019     // on every argument instead of just those arguments it would clobber.
03020     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
03021 
03022     SmallVector<SDValue, 8> MemOpChains2;
03023     SDValue FIN;
03024     int FI = 0;
03025     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03026       CCValAssign &VA = ArgLocs[i];
03027       if (VA.isRegLoc())
03028         continue;
03029       assert(VA.isMemLoc());
03030       SDValue Arg = OutVals[i];
03031       ISD::ArgFlagsTy Flags = Outs[i].Flags;
03032       // Skip inalloca arguments.  They don't require any work.
03033       if (Flags.isInAlloca())
03034         continue;
03035       // Create frame index.
03036       int32_t Offset = VA.getLocMemOffset()+FPDiff;
03037       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
03038       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03039       FIN = DAG.getFrameIndex(FI, getPointerTy());
03040 
03041       if (Flags.isByVal()) {
03042         // Copy relative to framepointer.
03043         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
03044         if (!StackPtr.getNode())
03045           StackPtr = DAG.getCopyFromReg(Chain, dl,
03046                                         RegInfo->getStackRegister(),
03047                                         getPointerTy());
03048         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
03049 
03050         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
03051                                                          ArgChain,
03052                                                          Flags, DAG, dl));
03053       } else {
03054         // Store relative to framepointer.
03055         MemOpChains2.push_back(
03056           DAG.getStore(ArgChain, dl, Arg, FIN,
03057                        MachinePointerInfo::getFixedStack(FI),
03058                        false, false, 0));
03059       }
03060     }
03061 
03062     if (!MemOpChains2.empty())
03063       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03064 
03065     // Store the return address to the appropriate stack slot.
03066     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
03067                                      getPointerTy(), RegInfo->getSlotSize(),
03068                                      FPDiff, dl);
03069   }
03070 
03071   // Build a sequence of copy-to-reg nodes chained together with token chain
03072   // and flag operands which copy the outgoing args into registers.
03073   SDValue InFlag;
03074   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
03075     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
03076                              RegsToPass[i].second, InFlag);
03077     InFlag = Chain.getValue(1);
03078   }
03079 
03080   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
03081     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
03082     // In the 64-bit large code model, we have to make all calls
03083     // through a register, since the call instruction's 32-bit
03084     // pc-relative offset may not be large enough to hold the whole
03085     // address.
03086   } else if (Callee->getOpcode() == ISD::GlobalAddress) {
03087     // If the callee is a GlobalAddress node (quite common, every direct call
03088     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
03089     // it.
03090     GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
03091 
03092     // We should use extra load for direct calls to dllimported functions in
03093     // non-JIT mode.
03094     const GlobalValue *GV = G->getGlobal();
03095     if (!GV->hasDLLImportStorageClass()) {
03096       unsigned char OpFlags = 0;
03097       bool ExtraLoad = false;
03098       unsigned WrapperKind = ISD::DELETED_NODE;
03099 
03100       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
03101       // external symbols most go through the PLT in PIC mode.  If the symbol
03102       // has hidden or protected visibility, or if it is static or local, then
03103       // we don't need to use the PLT - we can directly call it.
03104       if (Subtarget->isTargetELF() &&
03105           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
03106           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
03107         OpFlags = X86II::MO_PLT;
03108       } else if (Subtarget->isPICStyleStubAny() &&
03109                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
03110                  (!Subtarget->getTargetTriple().isMacOSX() ||
03111                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03112         // PC-relative references to external symbols should go through $stub,
03113         // unless we're building with the leopard linker or later, which
03114         // automatically synthesizes these stubs.
03115         OpFlags = X86II::MO_DARWIN_STUB;
03116       } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
03117                  cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
03118         // If the function is marked as non-lazy, generate an indirect call
03119         // which loads from the GOT directly. This avoids runtime overhead
03120         // at the cost of eager binding (and one extra byte of encoding).
03121         OpFlags = X86II::MO_GOTPCREL;
03122         WrapperKind = X86ISD::WrapperRIP;
03123         ExtraLoad = true;
03124       }
03125 
03126       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
03127                                           G->getOffset(), OpFlags);
03128 
03129       // Add a wrapper if needed.
03130       if (WrapperKind != ISD::DELETED_NODE)
03131         Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
03132       // Add extra indirection if needed.
03133       if (ExtraLoad)
03134         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
03135                              MachinePointerInfo::getGOT(),
03136                              false, false, false, 0);
03137     }
03138   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03139     unsigned char OpFlags = 0;
03140 
03141     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
03142     // external symbols should go through the PLT.
03143     if (Subtarget->isTargetELF() &&
03144         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
03145       OpFlags = X86II::MO_PLT;
03146     } else if (Subtarget->isPICStyleStubAny() &&
03147                (!Subtarget->getTargetTriple().isMacOSX() ||
03148                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03149       // PC-relative references to external symbols should go through $stub,
03150       // unless we're building with the leopard linker or later, which
03151       // automatically synthesizes these stubs.
03152       OpFlags = X86II::MO_DARWIN_STUB;
03153     }
03154 
03155     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
03156                                          OpFlags);
03157   } else if (Subtarget->isTarget64BitILP32() &&
03158              Callee->getValueType(0) == MVT::i32) {
03159     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
03160     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
03161   }
03162 
03163   // Returns a chain & a flag for retval copy to use.
03164   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
03165   SmallVector<SDValue, 8> Ops;
03166 
03167   if (!IsSibcall && isTailCall) {
03168     Chain = DAG.getCALLSEQ_END(Chain,
03169                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03170                                DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
03171     InFlag = Chain.getValue(1);
03172   }
03173 
03174   Ops.push_back(Chain);
03175   Ops.push_back(Callee);
03176 
03177   if (isTailCall)
03178     Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
03179 
03180   // Add argument registers to the end of the list so that they are known live
03181   // into the call.
03182   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03183     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03184                                   RegsToPass[i].second.getValueType()));
03185 
03186   // Add a register mask operand representing the call-preserved registers.
03187   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
03188   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
03189   assert(Mask && "Missing call preserved mask for calling convention");
03190   Ops.push_back(DAG.getRegisterMask(Mask));
03191 
03192   if (InFlag.getNode())
03193     Ops.push_back(InFlag);
03194 
03195   if (isTailCall) {
03196     // We used to do:
03197     //// If this is the first return lowered for this function, add the regs
03198     //// to the liveout set for the function.
03199     // This isn't right, although it's probably harmless on x86; liveouts
03200     // should be computed from returns not tail calls.  Consider a void
03201     // function making a tail call to a function returning int.
03202     MF.getFrameInfo()->setHasTailCall();
03203     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03204   }
03205 
03206   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03207   InFlag = Chain.getValue(1);
03208 
03209   // Create the CALLSEQ_END node.
03210   unsigned NumBytesForCalleeToPop;
03211   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03212                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03213     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03214   else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
03215            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03216            SR == StackStructReturn)
03217     // If this is a call to a struct-return function, the callee
03218     // pops the hidden struct pointer, so we have to push it back.
03219     // This is common for Darwin/X86, Linux & Mingw32 targets.
03220     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03221     NumBytesForCalleeToPop = 4;
03222   else
03223     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03224 
03225   // Returns a flag for retval copy to use.
03226   if (!IsSibcall) {
03227     Chain = DAG.getCALLSEQ_END(Chain,
03228                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03229                                DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
03230                                                      true),
03231                                InFlag, dl);
03232     InFlag = Chain.getValue(1);
03233   }
03234 
03235   // Handle result values, copying them out of physregs into vregs that we
03236   // return.
03237   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03238                          Ins, dl, DAG, InVals);
03239 }
03240 
03241 //===----------------------------------------------------------------------===//
03242 //                Fast Calling Convention (tail call) implementation
03243 //===----------------------------------------------------------------------===//
03244 
03245 //  Like std call, callee cleans arguments, convention except that ECX is
03246 //  reserved for storing the tail called function address. Only 2 registers are
03247 //  free for argument passing (inreg). Tail call optimization is performed
03248 //  provided:
03249 //                * tailcallopt is enabled
03250 //                * caller/callee are fastcc
03251 //  On X86_64 architecture with GOT-style position independent code only local
03252 //  (within module) calls are supported at the moment.
03253 //  To keep the stack aligned according to platform abi the function
03254 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03255 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03256 //  If a tail called function callee has more arguments than the caller the
03257 //  caller needs to make sure that there is room to move the RETADDR to. This is
03258 //  achieved by reserving an area the size of the argument delta right after the
03259 //  original RETADDR, but before the saved framepointer or the spilled registers
03260 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03261 //  stack layout:
03262 //    arg1
03263 //    arg2
03264 //    RETADDR
03265 //    [ new RETADDR
03266 //      move area ]
03267 //    (possible EBP)
03268 //    ESI
03269 //    EDI
03270 //    local1 ..
03271 
03272 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
03273 /// for a 16 byte align requirement.
03274 unsigned
03275 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03276                                                SelectionDAG& DAG) const {
03277   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03278   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
03279   unsigned StackAlignment = TFI.getStackAlignment();
03280   uint64_t AlignMask = StackAlignment - 1;
03281   int64_t Offset = StackSize;
03282   unsigned SlotSize = RegInfo->getSlotSize();
03283   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03284     // Number smaller than 12 so just add the difference.
03285     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03286   } else {
03287     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03288     Offset = ((~AlignMask) & Offset) + StackAlignment +
03289       (StackAlignment-SlotSize);
03290   }
03291   return Offset;
03292 }
03293 
03294 /// MatchingStackOffset - Return true if the given stack call argument is
03295 /// already available in the same position (relatively) of the caller's
03296 /// incoming argument stack.
03297 static
03298 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03299                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03300                          const X86InstrInfo *TII) {
03301   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03302   int FI = INT_MAX;
03303   if (Arg.getOpcode() == ISD::CopyFromReg) {
03304     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03305     if (!TargetRegisterInfo::isVirtualRegister(VR))
03306       return false;
03307     MachineInstr *Def = MRI->getVRegDef(VR);
03308     if (!Def)
03309       return false;
03310     if (!Flags.isByVal()) {
03311       if (!TII->isLoadFromStackSlot(Def, FI))
03312         return false;
03313     } else {
03314       unsigned Opcode = Def->getOpcode();
03315       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
03316            Opcode == X86::LEA64_32r) &&
03317           Def->getOperand(1).isFI()) {
03318         FI = Def->getOperand(1).getIndex();
03319         Bytes = Flags.getByValSize();
03320       } else
03321         return false;
03322     }
03323   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03324     if (Flags.isByVal())
03325       // ByVal argument is passed in as a pointer but it's now being
03326       // dereferenced. e.g.
03327       // define @foo(%struct.X* %A) {
03328       //   tail call @bar(%struct.X* byval %A)
03329       // }
03330       return false;
03331     SDValue Ptr = Ld->getBasePtr();
03332     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03333     if (!FINode)
03334       return false;
03335     FI = FINode->getIndex();
03336   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03337     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03338     FI = FINode->getIndex();
03339     Bytes = Flags.getByValSize();
03340   } else
03341     return false;
03342 
03343   assert(FI != INT_MAX);
03344   if (!MFI->isFixedObjectIndex(FI))
03345     return false;
03346   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03347 }
03348 
03349 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03350 /// for tail call optimization. Targets which want to do tail call
03351 /// optimization should implement this function.
03352 bool
03353 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03354                                                      CallingConv::ID CalleeCC,
03355                                                      bool isVarArg,
03356                                                      bool isCalleeStructRet,
03357                                                      bool isCallerStructRet,
03358                                                      Type *RetTy,
03359                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03360                                     const SmallVectorImpl<SDValue> &OutVals,
03361                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03362                                                      SelectionDAG &DAG) const {
03363   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
03364     return false;
03365 
03366   // If -tailcallopt is specified, make fastcc functions tail-callable.
03367   const MachineFunction &MF = DAG.getMachineFunction();
03368   const Function *CallerF = MF.getFunction();
03369 
03370   // If the function return type is x86_fp80 and the callee return type is not,
03371   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03372   // perform a tailcall optimization here.
03373   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03374     return false;
03375 
03376   CallingConv::ID CallerCC = CallerF->getCallingConv();
03377   bool CCMatch = CallerCC == CalleeCC;
03378   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03379   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03380 
03381   // Win64 functions have extra shadow space for argument homing. Don't do the
03382   // sibcall if the caller and callee have mismatched expectations for this
03383   // space.
03384   if (IsCalleeWin64 != IsCallerWin64)
03385     return false;
03386 
03387   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03388     if (IsTailCallConvention(CalleeCC) && CCMatch)
03389       return true;
03390     return false;
03391   }
03392 
03393   // Look for obvious safe cases to perform tail call optimization that do not
03394   // require ABI changes. This is what gcc calls sibcall.
03395 
03396   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03397   // emit a special epilogue.
03398   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03399   if (RegInfo->needsStackRealignment(MF))
03400     return false;
03401 
03402   // Also avoid sibcall optimization if either caller or callee uses struct
03403   // return semantics.
03404   if (isCalleeStructRet || isCallerStructRet)
03405     return false;
03406 
03407   // An stdcall/thiscall caller is expected to clean up its arguments; the
03408   // callee isn't going to do that.
03409   // FIXME: this is more restrictive than needed. We could produce a tailcall
03410   // when the stack adjustment matches. For example, with a thiscall that takes
03411   // only one argument.
03412   if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
03413                    CallerCC == CallingConv::X86_ThisCall))
03414     return false;
03415 
03416   // Do not sibcall optimize vararg calls unless all arguments are passed via
03417   // registers.
03418   if (isVarArg && !Outs.empty()) {
03419 
03420     // Optimizing for varargs on Win64 is unlikely to be safe without
03421     // additional testing.
03422     if (IsCalleeWin64 || IsCallerWin64)
03423       return false;
03424 
03425     SmallVector<CCValAssign, 16> ArgLocs;
03426     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03427                    *DAG.getContext());
03428 
03429     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03430     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03431       if (!ArgLocs[i].isRegLoc())
03432         return false;
03433   }
03434 
03435   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03436   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03437   // this into a sibcall.
03438   bool Unused = false;
03439   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03440     if (!Ins[i].Used) {
03441       Unused = true;
03442       break;
03443     }
03444   }
03445   if (Unused) {
03446     SmallVector<CCValAssign, 16> RVLocs;
03447     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03448                    *DAG.getContext());
03449     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03450     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03451       CCValAssign &VA = RVLocs[i];
03452       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03453         return false;
03454     }
03455   }
03456 
03457   // If the calling conventions do not match, then we'd better make sure the
03458   // results are returned in the same way as what the caller expects.
03459   if (!CCMatch) {
03460     SmallVector<CCValAssign, 16> RVLocs1;
03461     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03462                     *DAG.getContext());
03463     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03464 
03465     SmallVector<CCValAssign, 16> RVLocs2;
03466     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03467                     *DAG.getContext());
03468     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03469 
03470     if (RVLocs1.size() != RVLocs2.size())
03471       return false;
03472     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03473       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03474         return false;
03475       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03476         return false;
03477       if (RVLocs1[i].isRegLoc()) {
03478         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03479           return false;
03480       } else {
03481         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03482           return false;
03483       }
03484     }
03485   }
03486 
03487   // If the callee takes no arguments then go on to check the results of the
03488   // call.
03489   if (!Outs.empty()) {
03490     // Check if stack adjustment is needed. For now, do not do this if any
03491     // argument is passed on the stack.
03492     SmallVector<CCValAssign, 16> ArgLocs;
03493     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03494                    *DAG.getContext());
03495 
03496     // Allocate shadow area for Win64
03497     if (IsCalleeWin64)
03498       CCInfo.AllocateStack(32, 8);
03499 
03500     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03501     if (CCInfo.getNextStackOffset()) {
03502       MachineFunction &MF = DAG.getMachineFunction();
03503       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
03504         return false;
03505 
03506       // Check if the arguments are already laid out in the right way as
03507       // the caller's fixed stack objects.
03508       MachineFrameInfo *MFI = MF.getFrameInfo();
03509       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03510       const X86InstrInfo *TII = Subtarget->getInstrInfo();
03511       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03512         CCValAssign &VA = ArgLocs[i];
03513         SDValue Arg = OutVals[i];
03514         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03515         if (VA.getLocInfo() == CCValAssign::Indirect)
03516           return false;
03517         if (!VA.isRegLoc()) {
03518           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03519                                    MFI, MRI, TII))
03520             return false;
03521         }
03522       }
03523     }
03524 
03525     // If the tailcall address may be in a register, then make sure it's
03526     // possible to register allocate for it. In 32-bit, the call address can
03527     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03528     // callee-saved registers are restored. These happen to be the same
03529     // registers used to pass 'inreg' arguments so watch out for those.
03530     if (!Subtarget->is64Bit() &&
03531         ((!isa<GlobalAddressSDNode>(Callee) &&
03532           !isa<ExternalSymbolSDNode>(Callee)) ||
03533          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03534       unsigned NumInRegs = 0;
03535       // In PIC we need an extra register to formulate the address computation
03536       // for the callee.
03537       unsigned MaxInRegs =
03538         (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03539 
03540       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03541         CCValAssign &VA = ArgLocs[i];
03542         if (!VA.isRegLoc())
03543           continue;
03544         unsigned Reg = VA.getLocReg();
03545         switch (Reg) {
03546         default: break;
03547         case X86::EAX: case X86::EDX: case X86::ECX:
03548           if (++NumInRegs == MaxInRegs)
03549             return false;
03550           break;
03551         }
03552       }
03553     }
03554   }
03555 
03556   return true;
03557 }
03558 
03559 FastISel *
03560 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03561                                   const TargetLibraryInfo *libInfo) const {
03562   return X86::createFastISel(funcInfo, libInfo);
03563 }
03564 
03565 //===----------------------------------------------------------------------===//
03566 //                           Other Lowering Hooks
03567 //===----------------------------------------------------------------------===//
03568 
03569 static bool MayFoldLoad(SDValue Op) {
03570   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03571 }
03572 
03573 static bool MayFoldIntoStore(SDValue Op) {
03574   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03575 }
03576 
03577 static bool isTargetShuffle(unsigned Opcode) {
03578   switch(Opcode) {
03579   default: return false;
03580   case X86ISD::BLENDI:
03581   case X86ISD::PSHUFB:
03582   case X86ISD::PSHUFD:
03583   case X86ISD::PSHUFHW:
03584   case X86ISD::PSHUFLW:
03585   case X86ISD::SHUFP:
03586   case X86ISD::PALIGNR:
03587   case X86ISD::MOVLHPS:
03588   case X86ISD::MOVLHPD:
03589   case X86ISD::MOVHLPS:
03590   case X86ISD::MOVLPS:
03591   case X86ISD::MOVLPD:
03592   case X86ISD::MOVSHDUP:
03593   case X86ISD::MOVSLDUP:
03594   case X86ISD::MOVDDUP:
03595   case X86ISD::MOVSS:
03596   case X86ISD::MOVSD:
03597   case X86ISD::UNPCKL:
03598   case X86ISD::UNPCKH:
03599   case X86ISD::VPERMILPI:
03600   case X86ISD::VPERM2X128:
03601   case X86ISD::VPERMI:
03602     return true;
03603   }
03604 }
03605 
03606 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03607                                     SDValue V1, unsigned TargetMask,
03608                                     SelectionDAG &DAG) {
03609   switch(Opc) {
03610   default: llvm_unreachable("Unknown x86 shuffle node");
03611   case X86ISD::PSHUFD:
03612   case X86ISD::PSHUFHW:
03613   case X86ISD::PSHUFLW:
03614   case X86ISD::VPERMILPI:
03615   case X86ISD::VPERMI:
03616     return DAG.getNode(Opc, dl, VT, V1,
03617                        DAG.getConstant(TargetMask, dl, MVT::i8));
03618   }
03619 }
03620 
03621 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03622                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03623   switch(Opc) {
03624   default: llvm_unreachable("Unknown x86 shuffle node");
03625   case X86ISD::MOVLHPS:
03626   case X86ISD::MOVLHPD:
03627   case X86ISD::MOVHLPS:
03628   case X86ISD::MOVLPS:
03629   case X86ISD::MOVLPD:
03630   case X86ISD::MOVSS:
03631   case X86ISD::MOVSD:
03632   case X86ISD::UNPCKL:
03633   case X86ISD::UNPCKH:
03634     return DAG.getNode(Opc, dl, VT, V1, V2);
03635   }
03636 }
03637 
03638 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03639   MachineFunction &MF = DAG.getMachineFunction();
03640   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03641   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03642   int ReturnAddrIndex = FuncInfo->getRAIndex();
03643 
03644   if (ReturnAddrIndex == 0) {
03645     // Set up a frame object for the return address.
03646     unsigned SlotSize = RegInfo->getSlotSize();
03647     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03648                                                            -(int64_t)SlotSize,
03649                                                            false);
03650     FuncInfo->setRAIndex(ReturnAddrIndex);
03651   }
03652 
03653   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
03654 }
03655 
03656 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03657                                        bool hasSymbolicDisplacement) {
03658   // Offset should fit into 32 bit immediate field.
03659   if (!isInt<32>(Offset))
03660     return false;
03661 
03662   // If we don't have a symbolic displacement - we don't have any extra
03663   // restrictions.
03664   if (!hasSymbolicDisplacement)
03665     return true;
03666 
03667   // FIXME: Some tweaks might be needed for medium code model.
03668   if (M != CodeModel::Small && M != CodeModel::Kernel)
03669     return false;
03670 
03671   // For small code model we assume that latest object is 16MB before end of 31
03672   // bits boundary. We may also accept pretty large negative constants knowing
03673   // that all objects are in the positive half of address space.
03674   if (M == CodeModel::Small && Offset < 16*1024*1024)
03675     return true;
03676 
03677   // For kernel code model we know that all object resist in the negative half
03678   // of 32bits address space. We may not accept negative offsets, since they may
03679   // be just off and we may accept pretty large positive ones.
03680   if (M == CodeModel::Kernel && Offset >= 0)
03681     return true;
03682 
03683   return false;
03684 }
03685 
03686 /// isCalleePop - Determines whether the callee is required to pop its
03687 /// own arguments. Callee pop is necessary to support tail calls.
03688 bool X86::isCalleePop(CallingConv::ID CallingConv,
03689                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
03690   switch (CallingConv) {
03691   default:
03692     return false;
03693   case CallingConv::X86_StdCall:
03694   case CallingConv::X86_FastCall:
03695   case CallingConv::X86_ThisCall:
03696     return !is64Bit;
03697   case CallingConv::Fast:
03698   case CallingConv::GHC:
03699   case CallingConv::HiPE:
03700     if (IsVarArg)
03701       return false;
03702     return TailCallOpt;
03703   }
03704 }
03705 
03706 /// \brief Return true if the condition is an unsigned comparison operation.
03707 static bool isX86CCUnsigned(unsigned X86CC) {
03708   switch (X86CC) {
03709   default: llvm_unreachable("Invalid integer condition!");
03710   case X86::COND_E:     return true;
03711   case X86::COND_G:     return false;
03712   case X86::COND_GE:    return false;
03713   case X86::COND_L:     return false;
03714   case X86::COND_LE:    return false;
03715   case X86::COND_NE:    return true;
03716   case X86::COND_B:     return true;
03717   case X86::COND_A:     return true;
03718   case X86::COND_BE:    return true;
03719   case X86::COND_AE:    return true;
03720   }
03721   llvm_unreachable("covered switch fell through?!");
03722 }
03723 
03724 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
03725 /// specific condition code, returning the condition code and the LHS/RHS of the
03726 /// comparison to make.
03727 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
03728                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
03729   if (!isFP) {
03730     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
03731       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
03732         // X > -1   -> X == 0, jump !sign.
03733         RHS = DAG.getConstant(0, DL, RHS.getValueType());
03734         return X86::COND_NS;
03735       }
03736       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
03737         // X < 0   -> X == 0, jump on sign.
03738         return X86::COND_S;
03739       }
03740       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
03741         // X < 1   -> X <= 0
03742         RHS = DAG.getConstant(0, DL, RHS.getValueType());
03743         return X86::COND_LE;
03744       }
03745     }
03746 
03747     switch (SetCCOpcode) {
03748     default: llvm_unreachable("Invalid integer condition!");
03749     case ISD::SETEQ:  return X86::COND_E;
03750     case ISD::SETGT:  return X86::COND_G;
03751     case ISD::SETGE:  return X86::COND_GE;
03752     case ISD::SETLT:  return X86::COND_L;
03753     case ISD::SETLE:  return X86::COND_LE;
03754     case ISD::SETNE:  return X86::COND_NE;
03755     case ISD::SETULT: return X86::COND_B;
03756     case ISD::SETUGT: return X86::COND_A;
03757     case ISD::SETULE: return X86::COND_BE;
03758     case ISD::SETUGE: return X86::COND_AE;
03759     }
03760   }
03761 
03762   // First determine if it is required or is profitable to flip the operands.
03763 
03764   // If LHS is a foldable load, but RHS is not, flip the condition.
03765   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
03766       !ISD::isNON_EXTLoad(RHS.getNode())) {
03767     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
03768     std::swap(LHS, RHS);
03769   }
03770 
03771   switch (SetCCOpcode) {
03772   default: break;
03773   case ISD::SETOLT:
03774   case ISD::SETOLE:
03775   case ISD::SETUGT:
03776   case ISD::SETUGE:
03777     std::swap(LHS, RHS);
03778     break;
03779   }
03780 
03781   // On a floating point condition, the flags are set as follows:
03782   // ZF  PF  CF   op
03783   //  0 | 0 | 0 | X > Y
03784   //  0 | 0 | 1 | X < Y
03785   //  1 | 0 | 0 | X == Y
03786   //  1 | 1 | 1 | unordered
03787   switch (SetCCOpcode) {
03788   default: llvm_unreachable("Condcode should be pre-legalized away");
03789   case ISD::SETUEQ:
03790   case ISD::SETEQ:   return X86::COND_E;
03791   case ISD::SETOLT:              // flipped
03792   case ISD::SETOGT:
03793   case ISD::SETGT:   return X86::COND_A;
03794   case ISD::SETOLE:              // flipped
03795   case ISD::SETOGE:
03796   case ISD::SETGE:   return X86::COND_AE;
03797   case ISD::SETUGT:              // flipped
03798   case ISD::SETULT:
03799   case ISD::SETLT:   return X86::COND_B;
03800   case ISD::SETUGE:              // flipped
03801   case ISD::SETULE:
03802   case ISD::SETLE:   return X86::COND_BE;
03803   case ISD::SETONE:
03804   case ISD::SETNE:   return X86::COND_NE;
03805   case ISD::SETUO:   return X86::COND_P;
03806   case ISD::SETO:    return X86::COND_NP;
03807   case ISD::SETOEQ:
03808   case ISD::SETUNE:  return X86::COND_INVALID;
03809   }
03810 }
03811 
03812 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
03813 /// code. Current x86 isa includes the following FP cmov instructions:
03814 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
03815 static bool hasFPCMov(unsigned X86CC) {
03816   switch (X86CC) {
03817   default:
03818     return false;
03819   case X86::COND_B:
03820   case X86::COND_BE:
03821   case X86::COND_E:
03822   case X86::COND_P:
03823   case X86::COND_A:
03824   case X86::COND_AE:
03825   case X86::COND_NE:
03826   case X86::COND_NP:
03827     return true;
03828   }
03829 }
03830 
03831 /// isFPImmLegal - Returns true if the target can instruction select the
03832 /// specified FP immediate natively. If false, the legalizer will
03833 /// materialize the FP immediate as a load from a constant pool.
03834 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
03835   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
03836     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
03837       return true;
03838   }
03839   return false;
03840 }
03841 
03842 bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
03843                                               ISD::LoadExtType ExtTy,
03844                                               EVT NewVT) const {
03845   // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
03846   // relocation target a movq or addq instruction: don't let the load shrink.
03847   SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
03848   if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
03849     if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
03850       return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
03851   return true;
03852 }
03853 
03854 /// \brief Returns true if it is beneficial to convert a load of a constant
03855 /// to just the constant itself.
03856 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
03857                                                           Type *Ty) const {
03858   assert(Ty->isIntegerTy());
03859 
03860   unsigned BitSize = Ty->getPrimitiveSizeInBits();
03861   if (BitSize == 0 || BitSize > 64)
03862     return false;
03863   return true;
03864 }
03865 
03866 bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
03867                                                 unsigned Index) const {
03868   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
03869     return false;
03870 
03871   return (Index == 0 || Index == ResVT.getVectorNumElements());
03872 }
03873 
03874 bool X86TargetLowering::isCheapToSpeculateCttz() const {
03875   // Speculate cttz only if we can directly use TZCNT.
03876   return Subtarget->hasBMI();
03877 }
03878 
03879 bool X86TargetLowering::isCheapToSpeculateCtlz() const {
03880   // Speculate ctlz only if we can directly use LZCNT.
03881   return Subtarget->hasLZCNT();
03882 }
03883 
03884 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
03885 /// the specified range (L, H].
03886 static bool isUndefOrInRange(int Val, int Low, int Hi) {
03887   return (Val < 0) || (Val >= Low && Val < Hi);
03888 }
03889 
03890 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
03891 /// specified value.
03892 static bool isUndefOrEqual(int Val, int CmpVal) {
03893   return (Val < 0 || Val == CmpVal);
03894 }
03895 
03896 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
03897 /// from position Pos and ending in Pos+Size, falls within the specified
03898 /// sequential range (Low, Low+Size]. or is undef.
03899 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
03900                                        unsigned Pos, unsigned Size, int Low) {
03901   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
03902     if (!isUndefOrEqual(Mask[i], Low))
03903       return false;
03904   return true;
03905 }
03906 
03907 /// isVEXTRACTIndex - Return true if the specified
03908 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
03909 /// suitable for instruction that extract 128 or 256 bit vectors
03910 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
03911   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
03912   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
03913     return false;
03914 
03915   // The index should be aligned on a vecWidth-bit boundary.
03916   uint64_t Index =
03917     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
03918 
03919   MVT VT = N->getSimpleValueType(0);
03920   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
03921   bool Result = (Index * ElSize) % vecWidth == 0;
03922 
03923   return Result;
03924 }
03925 
03926 /// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
03927 /// operand specifies a subvector insert that is suitable for input to
03928 /// insertion of 128 or 256-bit subvectors
03929 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
03930   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
03931   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
03932     return false;
03933   // The index should be aligned on a vecWidth-bit boundary.
03934   uint64_t Index =
03935     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
03936 
03937   MVT VT = N->getSimpleValueType(0);
03938   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
03939   bool Result = (Index * ElSize) % vecWidth == 0;
03940 
03941   return Result;
03942 }
03943 
03944 bool X86::isVINSERT128Index(SDNode *N) {
03945   return isVINSERTIndex(N, 128);
03946 }
03947 
03948 bool X86::isVINSERT256Index(SDNode *N) {
03949   return isVINSERTIndex(N, 256);
03950 }
03951 
03952 bool X86::isVEXTRACT128Index(SDNode *N) {
03953   return isVEXTRACTIndex(N, 128);
03954 }
03955 
03956 bool X86::isVEXTRACT256Index(SDNode *N) {
03957   return isVEXTRACTIndex(N, 256);
03958 }
03959 
03960 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
03961   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
03962   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
03963     llvm_unreachable("Illegal extract subvector for VEXTRACT");
03964 
03965   uint64_t Index =
03966     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
03967 
03968   MVT VecVT = N->getOperand(0).getSimpleValueType();
03969   MVT ElVT = VecVT.getVectorElementType();
03970 
03971   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
03972   return Index / NumElemsPerChunk;
03973 }
03974 
03975 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
03976   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
03977   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
03978     llvm_unreachable("Illegal insert subvector for VINSERT");
03979 
03980   uint64_t Index =
03981     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
03982 
03983   MVT VecVT = N->getSimpleValueType(0);
03984   MVT ElVT = VecVT.getVectorElementType();
03985 
03986   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
03987   return Index / NumElemsPerChunk;
03988 }
03989 
03990 /// getExtractVEXTRACT128Immediate - Return the appropriate immediate
03991 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
03992 /// and VINSERTI128 instructions.
03993 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
03994   return getExtractVEXTRACTImmediate(N, 128);
03995 }
03996 
03997 /// getExtractVEXTRACT256Immediate - Return the appropriate immediate
03998 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
03999 /// and VINSERTI64x4 instructions.
04000 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
04001   return getExtractVEXTRACTImmediate(N, 256);
04002 }
04003 
04004 /// getInsertVINSERT128Immediate - Return the appropriate immediate
04005 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
04006 /// and VINSERTI128 instructions.
04007 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
04008   return getInsertVINSERTImmediate(N, 128);
04009 }
04010 
04011 /// getInsertVINSERT256Immediate - Return the appropriate immediate
04012 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
04013 /// and VINSERTI64x4 instructions.
04014 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
04015   return getInsertVINSERTImmediate(N, 256);
04016 }
04017 
04018 /// isZero - Returns true if Elt is a constant integer zero
04019 static bool isZero(SDValue V) {
04020   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
04021   return C && C->isNullValue();
04022 }
04023 
04024 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
04025 /// constant +0.0.
04026 bool X86::isZeroNode(SDValue Elt) {
04027   if (isZero(Elt))
04028     return true;
04029   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
04030     return CFP->getValueAPF().isPosZero();
04031   return false;
04032 }
04033 
04034 /// getZeroVector - Returns a vector of specified type with all zero elements.
04035 ///
04036 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
04037                              SelectionDAG &DAG, SDLoc dl) {
04038   assert(VT.isVector() && "Expected a vector type");
04039 
04040   // Always build SSE zero vectors as <4 x i32> bitcasted
04041   // to their dest type. This ensures they get CSE'd.
04042   SDValue Vec;
04043   if (VT.is128BitVector()) {  // SSE
04044     if (Subtarget->hasSSE2()) {  // SSE2
04045       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04046       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04047     } else { // SSE1
04048       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04049       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
04050     }
04051   } else if (VT.is256BitVector()) { // AVX
04052     if (Subtarget->hasInt256()) { // AVX2
04053       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04054       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04055       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04056     } else {
04057       // 256-bit logic and arithmetic instructions in AVX are all
04058       // floating-point, no support for integer ops. Emit fp zeroed vectors.
04059       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04060       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04061       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
04062     }
04063   } else if (VT.is512BitVector()) { // AVX-512
04064       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04065       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
04066                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04067       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
04068   } else if (VT.getScalarType() == MVT::i1) {
04069 
04070     assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
04071             && "Unexpected vector type");
04072     assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
04073             && "Unexpected vector type");
04074     SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
04075     SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
04076     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
04077   } else
04078     llvm_unreachable("Unexpected vector type");
04079 
04080   return DAG.getBitcast(VT, Vec);
04081 }
04082 
04083 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
04084                                 SelectionDAG &DAG, SDLoc dl,
04085                                 unsigned vectorWidth) {
04086   assert((vectorWidth == 128 || vectorWidth == 256) &&
04087          "Unsupported vector width");
04088   EVT VT = Vec.getValueType();
04089   EVT ElVT = VT.getVectorElementType();
04090   unsigned Factor = VT.getSizeInBits()/vectorWidth;
04091   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
04092                                   VT.getVectorNumElements()/Factor);
04093 
04094   // Extract from UNDEF is UNDEF.
04095   if (Vec.getOpcode() == ISD::UNDEF)
04096     return DAG.getUNDEF(ResultVT);
04097 
04098   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
04099   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
04100 
04101   // This is the index of the first element of the vectorWidth-bit chunk
04102   // we want.
04103   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
04104                                * ElemsPerChunk);
04105 
04106   // If the input is a buildvector just emit a smaller one.
04107   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
04108     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
04109                        makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
04110                                     ElemsPerChunk));
04111 
04112   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
04113   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
04114 }
04115 
04116 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
04117 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
04118 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
04119 /// instructions or a simple subregister reference. Idx is an index in the
04120 /// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
04121 /// lowering EXTRACT_VECTOR_ELT operations easier.
04122 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
04123                                    SelectionDAG &DAG, SDLoc dl) {
04124   assert((Vec.getValueType().is256BitVector() ||
04125           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
04126   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
04127 }
04128 
04129 /// Generate a DAG to grab 256-bits from a 512-bit vector.
04130 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
04131                                    SelectionDAG &DAG, SDLoc dl) {
04132   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
04133   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
04134 }
04135 
04136 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
04137                                unsigned IdxVal, SelectionDAG &DAG,
04138                                SDLoc dl, unsigned vectorWidth) {
04139   assert((vectorWidth == 128 || vectorWidth == 256) &&
04140          "Unsupported vector width");
04141   // Inserting UNDEF is Result
04142   if (Vec.getOpcode() == ISD::UNDEF)
04143     return Result;
04144   EVT VT = Vec.getValueType();
04145   EVT ElVT = VT.getVectorElementType();
04146   EVT ResultVT = Result.getValueType();
04147 
04148   // Insert the relevant vectorWidth bits.
04149   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
04150 
04151   // This is the index of the first element of the vectorWidth-bit chunk
04152   // we want.
04153   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
04154                                * ElemsPerChunk);
04155 
04156   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
04157   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
04158 }
04159 
04160 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
04161 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
04162 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
04163 /// simple superregister reference.  Idx is an index in the 128 bits
04164 /// we want.  It need not be aligned to a 128-bit boundary.  That makes
04165 /// lowering INSERT_VECTOR_ELT operations easier.
04166 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04167                                   SelectionDAG &DAG, SDLoc dl) {
04168   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
04169 
04170   // For insertion into the zero index (low half) of a 256-bit vector, it is
04171   // more efficient to generate a blend with immediate instead of an insert*128.
04172   // We are still creating an INSERT_SUBVECTOR below with an undef node to
04173   // extend the subvector to the size of the result vector. Make sure that
04174   // we are not recursing on that node by checking for undef here.
04175   if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
04176       Result.getOpcode() != ISD::UNDEF) {
04177     EVT ResultVT = Result.getValueType();
04178     SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
04179     SDValue Undef = DAG.getUNDEF(ResultVT);
04180     SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
04181                                  Vec, ZeroIndex);
04182 
04183     // The blend instruction, and therefore its mask, depend on the data type.
04184     MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
04185     if (ScalarType.isFloatingPoint()) {
04186       // Choose either vblendps (float) or vblendpd (double).
04187       unsigned ScalarSize = ScalarType.getSizeInBits();
04188       assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
04189       unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
04190       SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
04191       return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
04192     }
04193 
04194     const X86Subtarget &Subtarget =
04195     static_cast<const X86Subtarget &>(DAG.getSubtarget());
04196 
04197     // AVX2 is needed for 256-bit integer blend support.
04198     // Integers must be cast to 32-bit because there is only vpblendd;
04199     // vpblendw can't be used for this because it has a handicapped mask.
04200 
04201     // If we don't have AVX2, then cast to float. Using a wrong domain blend
04202     // is still more efficient than using the wrong domain vinsertf128 that
04203     // will be created by InsertSubVector().
04204     MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
04205 
04206     SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
04207     Vec256 = DAG.getBitcast(CastVT, Vec256);
04208     Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
04209     return DAG.getBitcast(ResultVT, Vec256);
04210   }
04211 
04212   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
04213 }
04214 
04215 static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04216                                   SelectionDAG &DAG, SDLoc dl) {
04217   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
04218   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
04219 }
04220 
04221 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
04222 /// instructions. This is used because creating CONCAT_VECTOR nodes of
04223 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
04224 /// large BUILD_VECTORS.
04225 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
04226                                    unsigned NumElems, SelectionDAG &DAG,
04227                                    SDLoc dl) {
04228   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04229   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
04230 }
04231 
04232 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
04233                                    unsigned NumElems, SelectionDAG &DAG,
04234                                    SDLoc dl) {
04235   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04236   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
04237 }
04238 
04239 /// getOnesVector - Returns a vector of specified type with all bits set.
04240 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
04241 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
04242 /// Then bitcast to their original type, ensuring they get CSE'd.
04243 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
04244                              SDLoc dl) {
04245   assert(VT.isVector() && "Expected a vector type");
04246 
04247   SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
04248   SDValue Vec;
04249   if (VT.is256BitVector()) {
04250     if (HasInt256) { // AVX2
04251       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04252       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04253     } else { // AVX
04254       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04255       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
04256     }
04257   } else if (VT.is128BitVector()) {
04258     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04259   } else
04260     llvm_unreachable("Unexpected vector type");
04261 
04262   return DAG.getBitcast(VT, Vec);
04263 }
04264 
04265 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
04266 /// operation of specified width.
04267 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
04268                        SDValue V2) {
04269   unsigned NumElems = VT.getVectorNumElements();
04270   SmallVector<int, 8> Mask;
04271   Mask.push_back(NumElems);
04272   for (unsigned i = 1; i != NumElems; ++i)
04273     Mask.push_back(i);
04274   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04275 }
04276 
04277 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
04278 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04279                           SDValue V2) {
04280   unsigned NumElems = VT.getVectorNumElements();
04281   SmallVector<int, 8> Mask;
04282   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
04283     Mask.push_back(i);
04284     Mask.push_back(i + NumElems);
04285   }
04286   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04287 }
04288 
04289 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
04290 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04291                           SDValue V2) {
04292   unsigned NumElems = VT.getVectorNumElements();
04293   SmallVector<int, 8> Mask;
04294   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
04295     Mask.push_back(i + Half);
04296     Mask.push_back(i + NumElems + Half);
04297   }
04298   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04299 }
04300 
04301 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
04302 /// vector of zero or undef vector.  This produces a shuffle where the low
04303 /// element of V2 is swizzled into the zero/undef vector, landing at element
04304 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
04305 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
04306                                            bool IsZero,
04307                                            const X86Subtarget *Subtarget,
04308                                            SelectionDAG &DAG) {
04309   MVT VT = V2.getSimpleValueType();
04310   SDValue V1 = IsZero
04311     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
04312   unsigned NumElems = VT.getVectorNumElements();
04313   SmallVector<int, 16> MaskVec;
04314   for (unsigned i = 0; i != NumElems; ++i)
04315     // If this is the insertion idx, put the low elt of V2 here.
04316     MaskVec.push_back(i == Idx ? NumElems : i);
04317   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
04318 }
04319 
04320 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
04321 /// target specific opcode. Returns true if the Mask could be calculated. Sets
04322 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
04323 /// shuffles which use a single input multiple times, and in those cases it will
04324 /// adjust the mask to only have indices within that single input.
04325 static bool getTargetShuffleMask(SDNode *N, MVT VT,
04326                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
04327   unsigned NumElems = VT.getVectorNumElements();
04328   SDValue ImmN;
04329 
04330   IsUnary = false;
04331   bool IsFakeUnary = false;
04332   switch(N->getOpcode()) {
04333   case X86ISD::BLENDI:
04334     ImmN = N->getOperand(N->getNumOperands()-1);
04335     DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04336     break;
04337   case X86ISD::SHUFP:
04338     ImmN = N->getOperand(N->getNumOperands()-1);
04339     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04340     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04341     break;
04342   case X86ISD::UNPCKH:
04343     DecodeUNPCKHMask(VT, Mask);
04344     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04345     break;
04346   case X86ISD::UNPCKL:
04347     DecodeUNPCKLMask(VT, Mask);
04348     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04349     break;
04350   case X86ISD::MOVHLPS:
04351     DecodeMOVHLPSMask(NumElems, Mask);
04352     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04353     break;
04354   case X86ISD::MOVLHPS:
04355     DecodeMOVLHPSMask(NumElems, Mask);
04356     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04357     break;
04358   case X86ISD::PALIGNR:
04359     ImmN = N->getOperand(N->getNumOperands()-1);
04360     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04361     break;
04362   case X86ISD::PSHUFD:
04363   case X86ISD::VPERMILPI:
04364     ImmN = N->getOperand(N->getNumOperands()-1);
04365     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04366     IsUnary = true;
04367     break;
04368   case X86ISD::PSHUFHW:
04369     ImmN = N->getOperand(N->getNumOperands()-1);
04370     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04371     IsUnary = true;
04372     break;
04373   case X86ISD::PSHUFLW:
04374     ImmN = N->getOperand(N->getNumOperands()-1);
04375     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04376     IsUnary = true;
04377     break;
04378   case X86ISD::PSHUFB: {
04379     IsUnary = true;
04380     SDValue MaskNode = N->getOperand(1);
04381     while (MaskNode->getOpcode() == ISD::BITCAST)
04382       MaskNode = MaskNode->getOperand(0);
04383 
04384     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
04385       // If we have a build-vector, then things are easy.
04386       EVT VT = MaskNode.getValueType();
04387       assert(VT.isVector() &&
04388              "Can't produce a non-vector with a build_vector!");
04389       if (!VT.isInteger())
04390         return false;
04391 
04392       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
04393 
04394       SmallVector<uint64_t, 32> RawMask;
04395       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
04396         SDValue Op = MaskNode->getOperand(i);
04397         if (Op->getOpcode() == ISD::UNDEF) {
04398           RawMask.push_back((uint64_t)SM_SentinelUndef);
04399           continue;
04400         }
04401         auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
04402         if (!CN)
04403           return false;
04404         APInt MaskElement = CN->getAPIntValue();
04405 
04406         // We now have to decode the element which could be any integer size and
04407         // extract each byte of it.
04408         for (int j = 0; j < NumBytesPerElement; ++j) {
04409           // Note that this is x86 and so always little endian: the low byte is
04410           // the first byte of the mask.
04411           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
04412           MaskElement = MaskElement.lshr(8);
04413         }
04414       }
04415       DecodePSHUFBMask(RawMask, Mask);
04416       break;
04417     }
04418 
04419     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
04420     if (!MaskLoad)
04421       return false;
04422 
04423     SDValue Ptr = MaskLoad->getBasePtr();
04424     if (Ptr->getOpcode() == X86ISD::Wrapper ||
04425         Ptr->getOpcode() == X86ISD::WrapperRIP)
04426       Ptr = Ptr->getOperand(0);
04427 
04428     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
04429     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
04430       return false;
04431 
04432     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
04433       DecodePSHUFBMask(C, Mask);
04434       if (Mask.empty())
04435         return false;
04436       break;
04437     }
04438 
04439     return false;
04440   }
04441   case X86ISD::VPERMI:
04442     ImmN = N->getOperand(N->getNumOperands()-1);
04443     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04444     IsUnary = true;
04445     break;
04446   case X86ISD::MOVSS:
04447   case X86ISD::MOVSD:
04448     DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
04449     break;
04450   case X86ISD::VPERM2X128:
04451     ImmN = N->getOperand(N->getNumOperands()-1);
04452     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04453     if (Mask.empty()) return false;
04454     break;
04455   case X86ISD::MOVSLDUP:
04456     DecodeMOVSLDUPMask(VT, Mask);
04457     IsUnary = true;
04458     break;
04459   case X86ISD::MOVSHDUP:
04460     DecodeMOVSHDUPMask(VT, Mask);
04461     IsUnary = true;
04462     break;
04463   case X86ISD::MOVDDUP:
04464     DecodeMOVDDUPMask(VT, Mask);
04465     IsUnary = true;
04466     break;
04467   case X86ISD::MOVLHPD:
04468   case X86ISD::MOVLPD:
04469   case X86ISD::MOVLPS:
04470     // Not yet implemented
04471     return false;
04472   default: llvm_unreachable("unknown target shuffle node");
04473   }
04474 
04475   // If we have a fake unary shuffle, the shuffle mask is spread across two
04476   // inputs that are actually the same node. Re-map the mask to always point
04477   // into the first input.
04478   if (IsFakeUnary)
04479     for (int &M : Mask)
04480       if (M >= (int)Mask.size())
04481         M -= Mask.size();
04482 
04483   return true;
04484 }
04485 
04486 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
04487 /// element of the result of the vector shuffle.
04488 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
04489                                    unsigned Depth) {
04490   if (Depth == 6)
04491     return SDValue();  // Limit search depth.
04492 
04493   SDValue V = SDValue(N, 0);
04494   EVT VT = V.getValueType();
04495   unsigned Opcode = V.getOpcode();
04496 
04497   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
04498   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
04499     int Elt = SV->getMaskElt(Index);
04500 
04501     if (Elt < 0)
04502       return DAG.getUNDEF(VT.getVectorElementType());
04503 
04504     unsigned NumElems = VT.getVectorNumElements();
04505     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
04506                                          : SV->getOperand(1);
04507     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
04508   }
04509 
04510   // Recurse into target specific vector shuffles to find scalars.
04511   if (isTargetShuffle(Opcode)) {
04512     MVT ShufVT = V.getSimpleValueType();
04513     unsigned NumElems = ShufVT.getVectorNumElements();
04514     SmallVector<int, 16> ShuffleMask;
04515     bool IsUnary;
04516 
04517     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
04518       return SDValue();
04519 
04520     int Elt = ShuffleMask[Index];
04521     if (Elt < 0)
04522       return DAG.getUNDEF(ShufVT.getVectorElementType());
04523 
04524     SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
04525                                          : N->getOperand(1);
04526     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
04527                                Depth+1);
04528   }
04529 
04530   // Actual nodes that may contain scalar elements
04531   if (Opcode == ISD::BITCAST) {
04532     V = V.getOperand(0);
04533     EVT SrcVT = V.getValueType();
04534     unsigned NumElems = VT.getVectorNumElements();
04535 
04536     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
04537       return SDValue();
04538   }
04539 
04540   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
04541     return (Index == 0) ? V.getOperand(0)
04542                         : DAG.getUNDEF(VT.getVectorElementType());
04543 
04544   if (V.getOpcode() == ISD::BUILD_VECTOR)
04545     return V.getOperand(Index);
04546 
04547   return SDValue();
04548 }
04549 
04550 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
04551 ///
04552 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
04553                                        unsigned NumNonZero, unsigned NumZero,
04554                                        SelectionDAG &DAG,
04555                                        const X86Subtarget* Subtarget,
04556                                        const TargetLowering &TLI) {
04557   if (NumNonZero > 8)
04558     return SDValue();
04559 
04560   SDLoc dl(Op);
04561   SDValue V;
04562   bool First = true;
04563 
04564   // SSE4.1 - use PINSRB to insert each byte directly.
04565   if (Subtarget->hasSSE41()) {
04566     for (unsigned i = 0; i < 16; ++i) {
04567       bool isNonZero = (NonZeros & (1 << i)) != 0;
04568       if (isNonZero) {
04569         if (First) {
04570           if (NumZero)
04571             V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
04572           else
04573             V = DAG.getUNDEF(MVT::v16i8);
04574           First = false;
04575         }
04576         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
04577                         MVT::v16i8, V, Op.getOperand(i),
04578                         DAG.getIntPtrConstant(i, dl));
04579       }
04580     }
04581 
04582     return V;
04583   }
04584 
04585   // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
04586   for (unsigned i = 0; i < 16; ++i) {
04587     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
04588     if (ThisIsNonZero && First) {
04589       if (NumZero)
04590         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
04591       else
04592         V = DAG.getUNDEF(MVT::v8i16);
04593       First = false;
04594     }
04595 
04596     if ((i & 1) != 0) {
04597       SDValue ThisElt, LastElt;
04598       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
04599       if (LastIsNonZero) {
04600         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
04601                               MVT::i16, Op.getOperand(i-1));
04602       }
04603       if (ThisIsNonZero) {
04604         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
04605         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
04606                               ThisElt, DAG.getConstant(8, dl, MVT::i8));
04607         if (LastIsNonZero)
04608           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
04609       } else
04610         ThisElt = LastElt;
04611 
04612       if (ThisElt.getNode())
04613         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
04614                         DAG.getIntPtrConstant(i/2, dl));
04615     }
04616   }
04617 
04618   return DAG.getBitcast(MVT::v16i8, V);
04619 }
04620 
04621 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
04622 ///
04623 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
04624                                      unsigned NumNonZero, unsigned NumZero,
04625                                      SelectionDAG &DAG,
04626                                      const X86Subtarget* Subtarget,
04627                                      const TargetLowering &TLI) {
04628   if (NumNonZero > 4)
04629     return SDValue();
04630 
04631   SDLoc dl(Op);
04632   SDValue V;
04633   bool First = true;
04634   for (unsigned i = 0; i < 8; ++i) {
04635     bool isNonZero = (NonZeros & (1 << i)) != 0;
04636     if (isNonZero) {
04637       if (First) {
04638         if (NumZero)
04639           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
04640         else
04641           V = DAG.getUNDEF(MVT::v8i16);
04642         First = false;
04643       }
04644       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
04645                       MVT::v8i16, V, Op.getOperand(i),
04646                       DAG.getIntPtrConstant(i, dl));
04647     }
04648   }
04649 
04650   return V;
04651 }
04652 
04653 /// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
04654 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
04655                                      const X86Subtarget *Subtarget,
04656                                      const TargetLowering &TLI) {
04657   // Find all zeroable elements.
04658   std::bitset<4> Zeroable;
04659   for (int i=0; i < 4; ++i) {
04660     SDValue Elt = Op->getOperand(i);
04661     Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
04662   }
04663   assert(Zeroable.size() - Zeroable.count() > 1 &&
04664          "We expect at least two non-zero elements!");
04665 
04666   // We only know how to deal with build_vector nodes where elements are either
04667   // zeroable or extract_vector_elt with constant index.
04668   SDValue FirstNonZero;
04669   unsigned FirstNonZeroIdx;
04670   for (unsigned i=0; i < 4; ++i) {
04671     if (Zeroable[i])
04672       continue;
04673     SDValue Elt = Op->getOperand(i);
04674     if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
04675         !isa<ConstantSDNode>(Elt.getOperand(1)))
04676       return SDValue();
04677     // Make sure that this node is extracting from a 128-bit vector.
04678     MVT VT = Elt.getOperand(0).getSimpleValueType();
04679     if (!VT.is128BitVector())
04680       return SDValue();
04681     if (!FirstNonZero.getNode()) {
04682       FirstNonZero = Elt;
04683       FirstNonZeroIdx = i;
04684     }
04685   }
04686 
04687   assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
04688   SDValue V1 = FirstNonZero.getOperand(0);
04689   MVT VT = V1.getSimpleValueType();
04690 
04691   // See if this build_vector can be lowered as a blend with zero.
04692   SDValue Elt;
04693   unsigned EltMaskIdx, EltIdx;
04694   int Mask[4];
04695   for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
04696     if (Zeroable[EltIdx]) {
04697       // The zero vector will be on the right hand side.
04698       Mask[EltIdx] = EltIdx+4;
04699       continue;
04700     }
04701 
04702     Elt = Op->getOperand(EltIdx);
04703     // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
04704     EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
04705     if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
04706       break;
04707     Mask[EltIdx] = EltIdx;
04708   }
04709 
04710   if (EltIdx == 4) {
04711     // Let the shuffle legalizer deal with blend operations.
04712     SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
04713     if (V1.getSimpleValueType() != VT)
04714       V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
04715     return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
04716   }
04717 
04718   // See if we can lower this build_vector to a INSERTPS.
04719   if (!Subtarget->hasSSE41())
04720     return SDValue();
04721 
04722   SDValue V2 = Elt.getOperand(0);
04723   if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
04724     V1 = SDValue();
04725 
04726   bool CanFold = true;
04727   for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
04728     if (Zeroable[i])
04729       continue;
04730 
04731     SDValue Current = Op->getOperand(i);
04732     SDValue SrcVector = Current->getOperand(0);
04733     if (!V1.getNode())
04734       V1 = SrcVector;
04735     CanFold = SrcVector == V1 &&
04736       cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
04737   }
04738 
04739   if (!CanFold)
04740     return SDValue();
04741 
04742   assert(V1.getNode() && "Expected at least two non-zero elements!");
04743   if (V1.getSimpleValueType() != MVT::v4f32)
04744     V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
04745   if (V2.getSimpleValueType() != MVT::v4f32)
04746     V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
04747 
04748   // Ok, we can emit an INSERTPS instruction.
04749   unsigned ZMask = Zeroable.to_ulong();
04750 
04751   unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
04752   assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
04753   SDLoc DL(Op);
04754   SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
04755                                DAG.getIntPtrConstant(InsertPSMask, DL));
04756   return DAG.getBitcast(VT, Result);
04757 }
04758 
04759 /// Return a vector logical shift node.
04760 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
04761                          unsigned NumBits, SelectionDAG &DAG,
04762                          const TargetLowering &TLI, SDLoc dl) {
04763   assert(VT.is128BitVector() && "Unknown type for VShift");
04764   MVT ShVT = MVT::v2i64;
04765   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
04766   SrcOp = DAG.getBitcast(ShVT, SrcOp);
04767   MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
04768   assert(NumBits % 8 == 0 && "Only support byte sized shifts");
04769   SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
04770   return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
04771 }
04772 
04773 static SDValue
04774 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
04775 
04776   // Check if the scalar load can be widened into a vector load. And if
04777   // the address is "base + cst" see if the cst can be "absorbed" into
04778   // the shuffle mask.
04779   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
04780     SDValue Ptr = LD->getBasePtr();
04781     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
04782       return SDValue();
04783     EVT PVT = LD->getValueType(0);
04784     if (PVT != MVT::i32 && PVT != MVT::f32)
04785       return SDValue();
04786 
04787     int FI = -1;
04788     int64_t Offset = 0;
04789     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
04790       FI = FINode->getIndex();
04791       Offset = 0;
04792     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
04793                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
04794       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
04795       Offset = Ptr.getConstantOperandVal(1);
04796       Ptr = Ptr.getOperand(0);
04797     } else {
04798       return SDValue();
04799     }
04800 
04801     // FIXME: 256-bit vector instructions don't require a strict alignment,
04802     // improve this code to support it better.
04803     unsigned RequiredAlign = VT.getSizeInBits()/8;
04804     SDValue Chain = LD->getChain();
04805     // Make sure the stack object alignment is at least 16 or 32.
04806     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
04807     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
04808       if (MFI->isFixedObjectIndex(FI)) {
04809         // Can't change the alignment. FIXME: It's possible to compute
04810         // the exact stack offset and reference FI + adjust offset instead.
04811         // If someone *really* cares about this. That's the way to implement it.
04812         return SDValue();
04813       } else {
04814         MFI->setObjectAlignment(FI, RequiredAlign);
04815       }
04816     }
04817 
04818     // (Offset % 16 or 32) must be multiple of 4. Then address is then
04819     // Ptr + (Offset & ~15).
04820     if (Offset < 0)
04821       return SDValue();
04822     if ((Offset % RequiredAlign) & 3)
04823       return SDValue();
04824     int64_t StartOffset = Offset & ~(RequiredAlign-1);
04825     if (StartOffset) {
04826       SDLoc DL(Ptr);
04827       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
04828                         DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
04829     }
04830 
04831     int EltNo = (Offset - StartOffset) >> 2;
04832     unsigned NumElems = VT.getVectorNumElements();
04833 
04834     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
04835     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
04836                              LD->getPointerInfo().getWithOffset(StartOffset),
04837                              false, false, false, 0);
04838 
04839     SmallVector<int, 8> Mask(NumElems, EltNo);
04840 
04841     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
04842   }
04843 
04844   return SDValue();
04845 }
04846 
04847 /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
04848 /// elements can be replaced by a single large load which has the same value as
04849 /// a build_vector or insert_subvector whose loaded operands are 'Elts'.
04850 ///
04851 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
04852 ///
04853 /// FIXME: we'd also like to handle the case where the last elements are zero
04854 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
04855 /// There's even a handy isZeroNode for that purpose.
04856 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
04857                                         SDLoc &DL, SelectionDAG &DAG,
04858                                         bool isAfterLegalize) {
04859   unsigned NumElems = Elts.size();
04860 
04861   LoadSDNode *LDBase = nullptr;
04862   unsigned LastLoadedElt = -1U;
04863 
04864   // For each element in the initializer, see if we've found a load or an undef.
04865   // If we don't find an initial load element, or later load elements are
04866   // non-consecutive, bail out.
04867   for (unsigned i = 0; i < NumElems; ++i) {
04868     SDValue Elt = Elts[i];
04869     // Look through a bitcast.
04870     if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST)
04871       Elt = Elt.getOperand(0);
04872     if (!Elt.getNode() ||
04873         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
04874       return SDValue();
04875     if (!LDBase) {
04876       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
04877         return SDValue();
04878       LDBase = cast<LoadSDNode>(Elt.getNode());
04879       LastLoadedElt = i;
04880       continue;
04881     }
04882     if (Elt.getOpcode() == ISD::UNDEF)
04883       continue;
04884 
04885     LoadSDNode *LD = cast<LoadSDNode>(Elt);
04886     EVT LdVT = Elt.getValueType();
04887     // Each loaded element must be the correct fractional portion of the
04888     // requested vector load.
04889     if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems)
04890       return SDValue();
04891     if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i))
04892       return SDValue();
04893     LastLoadedElt = i;
04894   }
04895 
04896   // If we have found an entire vector of loads and undefs, then return a large
04897   // load of the entire vector width starting at the base pointer.  If we found
04898   // consecutive loads for the low half, generate a vzext_load node.
04899   if (LastLoadedElt == NumElems - 1) {
04900     assert(LDBase && "Did not find base load for merging consecutive loads");
04901     EVT EltVT = LDBase->getValueType(0);
04902     // Ensure that the input vector size for the merged loads matches the
04903     // cumulative size of the input elements.
04904     if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
04905       return SDValue();
04906 
04907     if (isAfterLegalize &&
04908         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
04909       return SDValue();
04910 
04911     SDValue NewLd = SDValue();
04912 
04913     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
04914                         LDBase->getPointerInfo(), LDBase->isVolatile(),
04915                         LDBase->isNonTemporal(), LDBase->isInvariant(),
04916                         LDBase->getAlignment());
04917 
04918     if (LDBase->hasAnyUseOfValue(1)) {
04919       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
04920                                      SDValue(LDBase, 1),
04921                                      SDValue(NewLd.getNode(), 1));
04922       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
04923       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
04924                              SDValue(NewLd.getNode(), 1));
04925     }
04926 
04927     return NewLd;
04928   }
04929 
04930   //TODO: The code below fires only for for loading the low v2i32 / v2f32
04931   //of a v4i32 / v4f32. It's probably worth generalizing.
04932   EVT EltVT = VT.getVectorElementType();
04933   if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
04934       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
04935     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
04936     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
04937     SDValue ResNode =
04938         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
04939                                 LDBase->getPointerInfo(),
04940                                 LDBase->getAlignment(),
04941                                 false/*isVolatile*/, true/*ReadMem*/,
04942                                 false/*WriteMem*/);
04943 
04944     // Make sure the newly-created LOAD is in the same position as LDBase in
04945     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
04946     // update uses of LDBase's output chain to use the TokenFactor.
04947     if (LDBase->hasAnyUseOfValue(1)) {
04948       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
04949                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
04950       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
04951       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
04952                              SDValue(ResNode.getNode(), 1));
04953     }
04954 
04955     return DAG.getBitcast(VT, ResNode);
04956   }
04957   return SDValue();
04958 }
04959 
04960 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
04961 /// to generate a splat value for the following cases:
04962 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
04963 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
04964 /// a scalar load, or a constant.
04965 /// The VBROADCAST node is returned when a pattern is found,
04966 /// or SDValue() otherwise.
04967 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
04968                                     SelectionDAG &DAG) {
04969   // VBROADCAST requires AVX.
04970   // TODO: Splats could be generated for non-AVX CPUs using SSE
04971   // instructions, but there's less potential gain for only 128-bit vectors.
04972   if (!Subtarget->hasAVX())
04973     return SDValue();
04974 
04975   MVT VT = Op.getSimpleValueType();
04976   SDLoc dl(Op);
04977 
04978   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
04979          "Unsupported vector type for broadcast.");
04980 
04981   SDValue Ld;
04982   bool ConstSplatVal;
04983 
04984   switch (Op.getOpcode()) {
04985     default:
04986       // Unknown pattern found.
04987       return SDValue();
04988 
04989     case ISD::BUILD_VECTOR: {
04990       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
04991       BitVector UndefElements;
04992       SDValue Splat = BVOp->getSplatValue(&UndefElements);
04993 
04994       // We need a splat of a single value to use broadcast, and it doesn't
04995       // make any sense if the value is only in one element of the vector.
04996       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
04997         return SDValue();
04998 
04999       Ld = Splat;
05000       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05001                        Ld.getOpcode() == ISD::ConstantFP);
05002 
05003       // Make sure that all of the users of a non-constant load are from the
05004       // BUILD_VECTOR node.
05005       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
05006         return SDValue();
05007       break;
05008     }
05009 
05010     case ISD::VECTOR_SHUFFLE: {
05011       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
05012 
05013       // Shuffles must have a splat mask where the first element is
05014       // broadcasted.
05015       if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
05016         return SDValue();
05017 
05018       SDValue Sc = Op.getOperand(0);
05019       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
05020           Sc.getOpcode() != ISD::BUILD_VECTOR) {
05021 
05022         if (!Subtarget->hasInt256())
05023           return SDValue();
05024 
05025         // Use the register form of the broadcast instruction available on AVX2.
05026         if (VT.getSizeInBits() >= 256)
05027           Sc = Extract128BitVector(Sc, 0, DAG, dl);
05028         return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
05029       }
05030 
05031       Ld = Sc.getOperand(0);
05032       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05033                        Ld.getOpcode() == ISD::ConstantFP);
05034 
05035       // The scalar_to_vector node and the suspected
05036       // load node must have exactly one user.
05037       // Constants may have multiple users.
05038 
05039       // AVX-512 has register version of the broadcast
05040       bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
05041         Ld.getValueType().getSizeInBits() >= 32;
05042       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
05043           !hasRegVer))
05044         return SDValue();
05045       break;
05046     }
05047   }
05048 
05049   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
05050   bool IsGE256 = (VT.getSizeInBits() >= 256);
05051 
05052   // When optimizing for size, generate up to 5 extra bytes for a broadcast
05053   // instruction to save 8 or more bytes of constant pool data.
05054   // TODO: If multiple splats are generated to load the same constant,
05055   // it may be detrimental to overall size. There needs to be a way to detect
05056   // that condition to know if this is truly a size win.
05057   const Function *F = DAG.getMachineFunction().getFunction();
05058   bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
05059 
05060   // Handle broadcasting a single constant scalar from the constant pool
05061   // into a vector.
05062   // On Sandybridge (no AVX2), it is still better to load a constant vector
05063   // from the constant pool and not to broadcast it from a scalar.
05064   // But override that restriction when optimizing for size.
05065   // TODO: Check if splatting is recommended for other AVX-capable CPUs.
05066   if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
05067     EVT CVT = Ld.getValueType();
05068     assert(!CVT.isVector() && "Must not broadcast a vector type");
05069 
05070     // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
05071     // For size optimization, also splat v2f64 and v2i64, and for size opt
05072     // with AVX2, also splat i8 and i16.
05073     // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
05074     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
05075         (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
05076       const Constant *C = nullptr;
05077       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
05078         C = CI->getConstantIntValue();
05079       else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
05080         C = CF->getConstantFPValue();
05081 
05082       assert(C && "Invalid constant type");
05083 
05084       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
05085       SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
05086       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
05087       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
05088                        MachinePointerInfo::getConstantPool(),
05089                        false, false, false, Alignment);
05090 
05091       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05092     }
05093   }
05094 
05095   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
05096 
05097   // Handle AVX2 in-register broadcasts.
05098   if (!IsLoad && Subtarget->hasInt256() &&
05099       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
05100     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05101 
05102   // The scalar source must be a normal load.
05103   if (!IsLoad)
05104     return SDValue();
05105 
05106   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
05107       (Subtarget->hasVLX() && ScalarSize == 64))
05108     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05109 
05110   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
05111   // double since there is no vbroadcastsd xmm
05112   if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
05113     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
05114       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05115   }
05116 
05117   // Unsupported broadcast.
05118   return SDValue();
05119 }
05120 
05121 /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
05122 /// underlying vector and index.
05123 ///
05124 /// Modifies \p ExtractedFromVec to the real vector and returns the real
05125 /// index.
05126 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
05127                                          SDValue ExtIdx) {
05128   int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
05129   if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
05130     return Idx;
05131 
05132   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
05133   // lowered this:
05134   //   (extract_vector_elt (v8f32 %vreg1), Constant<6>)
05135   // to:
05136   //   (extract_vector_elt (vector_shuffle<2,u,u,u>
05137   //                           (extract_subvector (v8f32 %vreg0), Constant<4>),
05138   //                           undef)
05139   //                       Constant<0>)
05140   // In this case the vector is the extract_subvector expression and the index
05141   // is 2, as specified by the shuffle.
05142   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
05143   SDValue ShuffleVec = SVOp->getOperand(0);
05144   MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
05145   assert(ShuffleVecVT.getVectorElementType() ==
05146          ExtractedFromVec.getSimpleValueType().getVectorElementType());
05147 
05148   int ShuffleIdx = SVOp->getMaskElt(Idx);
05149   if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
05150     ExtractedFromVec = ShuffleVec;
05151     return ShuffleIdx;
05152   }
05153   return Idx;
05154 }
05155 
05156 static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
05157   MVT VT = Op.getSimpleValueType();
05158 
05159   // Skip if insert_vec_elt is not supported.
05160   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
05161   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
05162     return SDValue();
05163 
05164   SDLoc DL(Op);
05165   unsigned NumElems = Op.getNumOperands();
05166 
05167   SDValue VecIn1;
05168   SDValue VecIn2;
05169   SmallVector<unsigned, 4> InsertIndices;
05170   SmallVector<int, 8> Mask(NumElems, -1);
05171 
05172   for (unsigned i = 0; i != NumElems; ++i) {
05173     unsigned Opc = Op.getOperand(i).getOpcode();
05174 
05175     if (Opc == ISD::UNDEF)
05176       continue;
05177 
05178     if (Opc != ISD::EXTRACT_VECTOR_ELT) {
05179       // Quit if more than 1 elements need inserting.
05180       if (InsertIndices.size() > 1)
05181         return SDValue();
05182 
05183       InsertIndices.push_back(i);
05184       continue;
05185     }
05186 
05187     SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
05188     SDValue ExtIdx = Op.getOperand(i).getOperand(1);
05189     // Quit if non-constant index.
05190     if (!isa<ConstantSDNode>(ExtIdx))
05191       return SDValue();
05192     int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
05193 
05194     // Quit if extracted from vector of different type.
05195     if (ExtractedFromVec.getValueType() != VT)
05196       return SDValue();
05197 
05198     if (!VecIn1.getNode())
05199       VecIn1 = ExtractedFromVec;
05200     else if (VecIn1 != ExtractedFromVec) {
05201       if (!VecIn2.getNode())
05202         VecIn2 = ExtractedFromVec;
05203       else if (VecIn2 != ExtractedFromVec)
05204         // Quit if more than 2 vectors to shuffle
05205         return SDValue();
05206     }
05207 
05208     if (ExtractedFromVec == VecIn1)
05209       Mask[i] = Idx;
05210     else if (ExtractedFromVec == VecIn2)
05211       Mask[i] = Idx + NumElems;
05212   }
05213 
05214   if (!VecIn1.getNode())
05215     return SDValue();
05216 
05217   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
05218   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
05219   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
05220     unsigned Idx = InsertIndices[i];
05221     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
05222                      DAG.getIntPtrConstant(Idx, DL));
05223   }
05224 
05225   return NV;
05226 }
05227 
05228 static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
05229   assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
05230          Op.getScalarValueSizeInBits() == 1 &&
05231          "Can not convert non-constant vector");
05232   uint64_t Immediate = 0;
05233   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
05234     SDValue In = Op.getOperand(idx);
05235     if (In.getOpcode() != ISD::UNDEF)
05236       Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
05237   }
05238   SDLoc dl(Op);
05239   MVT VT =
05240    MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
05241   return DAG.getConstant(Immediate, dl, VT);
05242 }
05243 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
05244 SDValue
05245 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
05246 
05247   MVT VT = Op.getSimpleValueType();
05248   assert((VT.getVectorElementType() == MVT::i1) &&
05249          "Unexpected type in LowerBUILD_VECTORvXi1!");
05250 
05251   SDLoc dl(Op);
05252   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
05253     SDValue Cst = DAG.getTargetConstant(0, dl, MVT::i1);
05254     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05255     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05256   }
05257 
05258   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
05259     SDValue Cst = DAG.getTargetConstant(1, dl, MVT::i1);
05260     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05261     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05262   }
05263 
05264   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
05265     SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
05266     if (Imm.getValueSizeInBits() == VT.getSizeInBits())
05267       return DAG.getBitcast(VT, Imm);
05268     SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
05269     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
05270                         DAG.getIntPtrConstant(0, dl));
05271   }
05272 
05273   // Vector has one or more non-const elements
05274   uint64_t Immediate = 0;
05275   SmallVector<unsigned, 16> NonConstIdx;
05276   bool IsSplat = true;
05277   bool HasConstElts = false;
05278   int SplatIdx = -1;
05279   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
05280     SDValue In = Op.getOperand(idx);
05281     if (In.getOpcode() == ISD::UNDEF)
05282       continue;
05283     if (!isa<ConstantSDNode>(In))
05284       NonConstIdx.push_back(idx);
05285     else {
05286       Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
05287       HasConstElts = true;
05288     }
05289     if (SplatIdx == -1)
05290       SplatIdx = idx;
05291     else if (In != Op.getOperand(SplatIdx))
05292       IsSplat = false;
05293   }
05294 
05295   // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
05296   if (IsSplat)
05297     return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
05298                        DAG.getConstant(1, dl, VT),
05299                        DAG.getConstant(0, dl, VT));
05300 
05301   // insert elements one by one
05302   SDValue DstVec;
05303   SDValue Imm;
05304   if (Immediate) {
05305     MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
05306     Imm = DAG.getConstant(Immediate, dl, ImmVT);
05307   }
05308   else if (HasConstElts)
05309     Imm = DAG.getConstant(0, dl, VT);
05310   else
05311     Imm = DAG.getUNDEF(VT);
05312   if (Imm.getValueSizeInBits() == VT.getSizeInBits())
05313     DstVec = DAG.getBitcast(VT, Imm);
05314   else {
05315     SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
05316     DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
05317                          DAG.getIntPtrConstant(0, dl));
05318   }
05319 
05320   for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
05321     unsigned InsertIdx = NonConstIdx[i];
05322     DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
05323                          Op.getOperand(InsertIdx),
05324                          DAG.getIntPtrConstant(InsertIdx, dl));
05325   }
05326   return DstVec;
05327 }
05328 
05329 /// \brief Return true if \p N implements a horizontal binop and return the
05330 /// operands for the horizontal binop into V0 and V1.
05331 ///
05332 /// This is a helper function of LowerToHorizontalOp().
05333 /// This function checks that the build_vector \p N in input implements a
05334 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
05335 /// operation to match.
05336 /// For example, if \p Opcode is equal to ISD::ADD, then this function
05337 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
05338 /// is equal to ISD::SUB, then this function checks if this is a horizontal
05339 /// arithmetic sub.
05340 ///
05341 /// This function only analyzes elements of \p N whose indices are
05342 /// in range [BaseIdx, LastIdx).
05343 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
05344                               SelectionDAG &DAG,
05345                               unsigned BaseIdx, unsigned LastIdx,
05346                               SDValue &V0, SDValue &V1) {
05347   EVT VT = N->getValueType(0);
05348 
05349   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
05350   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
05351          "Invalid Vector in input!");
05352 
05353   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
05354   bool CanFold = true;
05355   unsigned ExpectedVExtractIdx = BaseIdx;
05356   unsigned NumElts = LastIdx - BaseIdx;
05357   V0 = DAG.getUNDEF(VT);
05358   V1 = DAG.getUNDEF(VT);
05359 
05360   // Check if N implements a horizontal binop.
05361   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
05362     SDValue Op = N->getOperand(i + BaseIdx);
05363 
05364     // Skip UNDEFs.
05365     if (Op->getOpcode() == ISD::UNDEF) {
05366       // Update the expected vector extract index.
05367       if (i * 2 == NumElts)
05368         ExpectedVExtractIdx = BaseIdx;
05369       ExpectedVExtractIdx += 2;
05370       continue;
05371     }
05372 
05373     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
05374 
05375     if (!CanFold)
05376       break;
05377 
05378     SDValue Op0 = Op.getOperand(0);
05379     SDValue Op1 = Op.getOperand(1);
05380 
05381     // Try to match the following pattern:
05382     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
05383     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05384         Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05385         Op0.getOperand(0) == Op1.getOperand(0) &&
05386         isa<ConstantSDNode>(Op0.getOperand(1)) &&
05387         isa<ConstantSDNode>(Op1.getOperand(1)));
05388     if (!CanFold)
05389       break;
05390 
05391     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
05392     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
05393 
05394     if (i * 2 < NumElts) {
05395       if (V0.getOpcode() == ISD::UNDEF) {
05396         V0 = Op0.getOperand(0);
05397         if (V0.getValueType() != VT)
05398           return false;
05399       }
05400     } else {
05401       if (V1.getOpcode() == ISD::UNDEF) {
05402         V1 = Op0.getOperand(0);
05403         if (V1.getValueType() != VT)
05404           return false;
05405       }
05406       if (i * 2 == NumElts)
05407         ExpectedVExtractIdx = BaseIdx;
05408     }
05409 
05410     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
05411     if (I0 == ExpectedVExtractIdx)
05412       CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
05413     else if (IsCommutable && I1 == ExpectedVExtractIdx) {
05414       // Try to match the following dag sequence:
05415       // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
05416       CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
05417     } else
05418       CanFold = false;
05419 
05420     ExpectedVExtractIdx += 2;
05421   }
05422 
05423   return CanFold;
05424 }
05425 
05426 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
05427 /// a concat_vector.
05428 ///
05429 /// This is a helper function of LowerToHorizontalOp().
05430 /// This function expects two 256-bit vectors called V0 and V1.
05431 /// At first, each vector is split into two separate 128-bit vectors.
05432 /// Then, the resulting 128-bit vectors are used to implement two
05433 /// horizontal binary operations.
05434 ///
05435 /// The kind of horizontal binary operation is defined by \p X86Opcode.
05436 ///
05437 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
05438 /// the two new horizontal binop.
05439 /// When Mode is set, the first horizontal binop dag node would take as input
05440 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
05441 /// horizontal binop dag node would take as input the lower 128-bit of V1
05442 /// and the upper 128-bit of V1.
05443 ///   Example:
05444 ///     HADD V0_LO, V0_HI
05445 ///     HADD V1_LO, V1_HI
05446 ///
05447 /// Otherwise, the first horizontal binop dag node takes as input the lower
05448 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
05449 /// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
05450 ///   Example:
05451 ///     HADD V0_LO, V1_LO
05452 ///     HADD V0_HI, V1_HI
05453 ///
05454 /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
05455 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
05456 /// the upper 128-bits of the result.
05457 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
05458                                      SDLoc DL, SelectionDAG &DAG,
05459                                      unsigned X86Opcode, bool Mode,
05460                                      bool isUndefLO, bool isUndefHI) {
05461   EVT VT = V0.getValueType();
05462   assert(VT.is256BitVector() && VT == V1.getValueType() &&
05463          "Invalid nodes in input!");
05464 
05465   unsigned NumElts = VT.getVectorNumElements();
05466   SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
05467   SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
05468   SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
05469   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
05470   EVT NewVT = V0_LO.getValueType();
05471 
05472   SDValue LO = DAG.getUNDEF(NewVT);
05473   SDValue HI = DAG.getUNDEF(NewVT);
05474 
05475   if (Mode) {
05476     // Don't emit a horizontal binop if the result is expected to be UNDEF.
05477     if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
05478       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
05479     if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
05480       HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
05481   } else {
05482     // Don't emit a horizontal binop if the result is expected to be UNDEF.
05483     if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
05484                        V1_LO->getOpcode() != ISD::UNDEF))
05485       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
05486 
05487     if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
05488                        V1_HI->getOpcode() != ISD::UNDEF))
05489       HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
05490   }
05491 
05492   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
05493 }
05494 
05495 /// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
05496 /// node.
05497 static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
05498                              const X86Subtarget *Subtarget, SelectionDAG &DAG) {
05499   EVT VT = BV->getValueType(0);
05500   if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
05501       (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
05502     return SDValue();
05503 
05504   SDLoc DL(BV);
05505   unsigned NumElts = VT.getVectorNumElements();
05506   SDValue InVec0 = DAG.getUNDEF(VT);
05507   SDValue InVec1 = DAG.getUNDEF(VT);
05508 
05509   assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
05510           VT == MVT::v2f64) && "build_vector with an invalid type found!");
05511 
05512   // Odd-numbered elements in the input build vector are obtained from
05513   // adding two integer/float elements.
05514   // Even-numbered elements in the input build vector are obtained from
05515   // subtracting two integer/float elements.
05516   unsigned ExpectedOpcode = ISD::FSUB;
05517   unsigned NextExpectedOpcode = ISD::FADD;
05518   bool AddFound = false;
05519   bool SubFound = false;
05520 
05521   for (unsigned i = 0, e = NumElts; i != e; ++i) {
05522     SDValue Op = BV->getOperand(i);
05523 
05524     // Skip 'undef' values.
05525     unsigned Opcode = Op.getOpcode();
05526     if (Opcode == ISD::UNDEF) {
05527       std::swap(ExpectedOpcode, NextExpectedOpcode);
05528       continue;
05529     }
05530 
05531     // Early exit if we found an unexpected opcode.
05532     if (Opcode != ExpectedOpcode)
05533       return SDValue();
05534 
05535     SDValue Op0 = Op.getOperand(0);
05536     SDValue Op1 = Op.getOperand(1);
05537 
05538     // Try to match the following pattern:
05539     // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
05540     // Early exit if we cannot match that sequence.
05541     if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05542         Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05543         !isa<ConstantSDNode>(Op0.getOperand(1)) ||
05544         !isa<ConstantSDNode>(Op1.getOperand(1)) ||
05545         Op0.getOperand(1) != Op1.getOperand(1))
05546       return SDValue();
05547 
05548     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
05549     if (I0 != i)
05550       return SDValue();
05551 
05552     // We found a valid add/sub node. Update the information accordingly.
05553     if (i & 1)
05554       AddFound = true;
05555     else
05556       SubFound = true;
05557 
05558     // Update InVec0 and InVec1.
05559     if (InVec0.getOpcode() == ISD::UNDEF) {
05560       InVec0 = Op0.getOperand(0);
05561       if (InVec0.getValueType() != VT)
05562         return SDValue();
05563     }
05564     if (InVec1.getOpcode() == ISD::UNDEF) {
05565       InVec1 = Op1.getOperand(0);
05566       if (InVec1.getValueType() != VT)
05567         return SDValue();
05568     }
05569 
05570     // Make sure that operands in input to each add/sub node always
05571     // come from a same pair of vectors.
05572     if (InVec0 != Op0.getOperand(0)) {
05573       if (ExpectedOpcode == ISD::FSUB)
05574         return SDValue();
05575 
05576       // FADD is commutable. Try to commute the operands
05577       // and then test again.
05578       std::swap(Op0, Op1);
05579       if (InVec0 != Op0.getOperand(0))
05580         return SDValue();
05581     }
05582 
05583     if (InVec1 != Op1.getOperand(0))
05584       return SDValue();
05585 
05586     // Update the pair of expected opcodes.
05587     std::swap(ExpectedOpcode, NextExpectedOpcode);
05588   }
05589 
05590   // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
05591   if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
05592       InVec1.getOpcode() != ISD::UNDEF)
05593     return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
05594 
05595   return SDValue();
05596 }
05597 
05598 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
05599 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
05600                                    const X86Subtarget *Subtarget,
05601                                    SelectionDAG &DAG) {
05602   EVT VT = BV->getValueType(0);
05603   unsigned NumElts = VT.getVectorNumElements();
05604   unsigned NumUndefsLO = 0;
05605   unsigned NumUndefsHI = 0;
05606   unsigned Half = NumElts/2;
05607 
05608   // Count the number of UNDEF operands in the build_vector in input.
05609   for (unsigned i = 0, e = Half; i != e; ++i)
05610     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
05611       NumUndefsLO++;
05612 
05613   for (unsigned i = Half, e = NumElts; i != e; ++i)
05614     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
05615       NumUndefsHI++;
05616 
05617   // Early exit if this is either a build_vector of all UNDEFs or all the
05618   // operands but one are UNDEF.
05619   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
05620     return SDValue();
05621 
05622   SDLoc DL(BV);
05623   SDValue InVec0, InVec1;
05624   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
05625     // Try to match an SSE3 float HADD/HSUB.
05626     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
05627       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
05628 
05629     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
05630       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
05631   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
05632     // Try to match an SSSE3 integer HADD/HSUB.
05633     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
05634       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
05635 
05636     if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
05637       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
05638   }
05639 
05640   if (!Subtarget->hasAVX())
05641     return SDValue();
05642 
05643   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
05644     // Try to match an AVX horizontal add/sub of packed single/double
05645     // precision floating point values from 256-bit vectors.
05646     SDValue InVec2, InVec3;
05647     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
05648         isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
05649         ((InVec0.getOpcode() == ISD::UNDEF ||
05650           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05651         ((InVec1.getOpcode() == ISD::UNDEF ||
05652           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05653       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
05654 
05655     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
05656         isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
05657         ((InVec0.getOpcode() == ISD::UNDEF ||
05658           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05659         ((InVec1.getOpcode() == ISD::UNDEF ||
05660           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05661       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
05662   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
05663     // Try to match an AVX2 horizontal add/sub of signed integers.
05664     SDValue InVec2, InVec3;
05665     unsigned X86Opcode;
05666     bool CanFold = true;
05667 
05668     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
05669         isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
05670         ((InVec0.getOpcode() == ISD::UNDEF ||
05671           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05672         ((InVec1.getOpcode() == ISD::UNDEF ||
05673           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05674       X86Opcode = X86ISD::HADD;
05675     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
05676         isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
05677         ((InVec0.getOpcode() == ISD::UNDEF ||
05678           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05679         ((InVec1.getOpcode() == ISD::UNDEF ||
05680           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05681       X86Opcode = X86ISD::HSUB;
05682     else
05683       CanFold = false;
05684 
05685     if (CanFold) {
05686       // Fold this build_vector into a single horizontal add/sub.
05687       // Do this only if the target has AVX2.
05688       if (Subtarget->hasAVX2())
05689         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
05690 
05691       // Do not try to expand this build_vector into a pair of horizontal
05692       // add/sub if we can emit a pair of scalar add/sub.
05693       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
05694         return SDValue();
05695 
05696       // Convert this build_vector into a pair of horizontal binop followed by
05697       // a concat vector.
05698       bool isUndefLO = NumUndefsLO == Half;
05699       bool isUndefHI = NumUndefsHI == Half;
05700       return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
05701                                    isUndefLO, isUndefHI);
05702     }
05703   }
05704 
05705   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
05706        VT == MVT::v16i16) && Subtarget->hasAVX()) {
05707     unsigned X86Opcode;
05708     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
05709       X86Opcode = X86ISD::HADD;
05710     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
05711       X86Opcode = X86ISD::HSUB;
05712     else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
05713       X86Opcode = X86ISD::FHADD;
05714     else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
05715       X86Opcode = X86ISD::FHSUB;
05716     else
05717       return SDValue();
05718 
05719     // Don't try to expan