LLVM  mainline
X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86FrameLowering.h"
00019 #include "X86InstrBuilder.h"
00020 #include "X86MachineFunctionInfo.h"
00021 #include "X86TargetMachine.h"
00022 #include "X86TargetObjectFile.h"
00023 #include "llvm/ADT/SmallBitVector.h"
00024 #include "llvm/ADT/SmallSet.h"
00025 #include "llvm/ADT/Statistic.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/ADT/StringSwitch.h"
00028 #include "llvm/CodeGen/IntrinsicLowering.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineFunction.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00033 #include "llvm/CodeGen/MachineModuleInfo.h"
00034 #include "llvm/CodeGen/MachineRegisterInfo.h"
00035 #include "llvm/CodeGen/WinEHFuncInfo.h"
00036 #include "llvm/IR/CallSite.h"
00037 #include "llvm/IR/CallingConv.h"
00038 #include "llvm/IR/Constants.h"
00039 #include "llvm/IR/DerivedTypes.h"
00040 #include "llvm/IR/Function.h"
00041 #include "llvm/IR/GlobalAlias.h"
00042 #include "llvm/IR/GlobalVariable.h"
00043 #include "llvm/IR/Instructions.h"
00044 #include "llvm/IR/Intrinsics.h"
00045 #include "llvm/MC/MCAsmInfo.h"
00046 #include "llvm/MC/MCContext.h"
00047 #include "llvm/MC/MCExpr.h"
00048 #include "llvm/MC/MCSymbol.h"
00049 #include "llvm/Support/CommandLine.h"
00050 #include "llvm/Support/Debug.h"
00051 #include "llvm/Support/ErrorHandling.h"
00052 #include "llvm/Support/MathExtras.h"
00053 #include "llvm/Target/TargetOptions.h"
00054 #include "X86IntrinsicsInfo.h"
00055 #include <bitset>
00056 #include <numeric>
00057 #include <cctype>
00058 using namespace llvm;
00059 
00060 #define DEBUG_TYPE "x86-isel"
00061 
00062 STATISTIC(NumTailCalls, "Number of tail calls");
00063 
00064 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00065     "x86-experimental-vector-widening-legalization", cl::init(false),
00066     cl::desc("Enable an experimental vector type legalization through widening "
00067              "rather than promotion."),
00068     cl::Hidden);
00069 
00070 static cl::opt<int> ReciprocalEstimateRefinementSteps(
00071     "x86-recip-refinement-steps", cl::init(1),
00072     cl::desc("Specify the number of Newton-Raphson iterations applied to the "
00073              "result of the hardware reciprocal estimate instruction."),
00074     cl::NotHidden);
00075 
00076 // Forward declarations.
00077 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
00078                        SDValue V2);
00079 
00080 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
00081                                      const X86Subtarget &STI)
00082     : TargetLowering(TM), Subtarget(&STI) {
00083   X86ScalarSSEf64 = Subtarget->hasSSE2();
00084   X86ScalarSSEf32 = Subtarget->hasSSE1();
00085   TD = getDataLayout();
00086 
00087   // Set up the TargetLowering object.
00088   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
00089 
00090   // X86 is weird. It always uses i8 for shift amounts and setcc results.
00091   setBooleanContents(ZeroOrOneBooleanContent);
00092   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00093   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00094 
00095   // For 64-bit, since we have so many registers, use the ILP scheduler.
00096   // For 32-bit, use the register pressure specific scheduling.
00097   // For Atom, always use ILP scheduling.
00098   if (Subtarget->isAtom())
00099     setSchedulingPreference(Sched::ILP);
00100   else if (Subtarget->is64Bit())
00101     setSchedulingPreference(Sched::ILP);
00102   else
00103     setSchedulingPreference(Sched::RegPressure);
00104   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
00105   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00106 
00107   // Bypass expensive divides on Atom when compiling with O2.
00108   if (TM.getOptLevel() >= CodeGenOpt::Default) {
00109     if (Subtarget->hasSlowDivide32())
00110       addBypassSlowDiv(32, 8);
00111     if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
00112       addBypassSlowDiv(64, 16);
00113   }
00114 
00115   if (Subtarget->isTargetKnownWindowsMSVC()) {
00116     // Setup Windows compiler runtime calls.
00117     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00118     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00119     setLibcallName(RTLIB::SREM_I64, "_allrem");
00120     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00121     setLibcallName(RTLIB::MUL_I64, "_allmul");
00122     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00123     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00124     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00125     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00126     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00127 
00128     // The _ftol2 runtime function has an unusual calling conv, which
00129     // is modeled by a special pseudo-instruction.
00130     setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
00131     setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
00132     setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
00133     setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
00134   }
00135 
00136   if (Subtarget->isTargetDarwin()) {
00137     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00138     setUseUnderscoreSetJmp(false);
00139     setUseUnderscoreLongJmp(false);
00140   } else if (Subtarget->isTargetWindowsGNU()) {
00141     // MS runtime is weird: it exports _setjmp, but longjmp!
00142     setUseUnderscoreSetJmp(true);
00143     setUseUnderscoreLongJmp(false);
00144   } else {
00145     setUseUnderscoreSetJmp(true);
00146     setUseUnderscoreLongJmp(true);
00147   }
00148 
00149   // Set up the register classes.
00150   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00151   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00152   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00153   if (Subtarget->is64Bit())
00154     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00155 
00156   for (MVT VT : MVT::integer_valuetypes())
00157     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00158 
00159   // We don't accept any truncstore of integer registers.
00160   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00161   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00162   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00163   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00164   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00165   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00166 
00167   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00168 
00169   // SETOEQ and SETUNE require checking two conditions.
00170   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00171   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00172   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00173   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00174   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00175   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00176 
00177   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00178   // operation.
00179   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00180   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00181   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00182 
00183   if (Subtarget->is64Bit()) {
00184     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00185     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00186   } else if (!Subtarget->useSoftFloat()) {
00187     // We have an algorithm for SSE2->double, and we turn this into a
00188     // 64-bit FILD followed by conditional FADD for other targets.
00189     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00190     // We have an algorithm for SSE2, and we turn this into a 64-bit
00191     // FILD for other targets.
00192     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00193   }
00194 
00195   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00196   // this operation.
00197   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00198   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00199 
00200   if (!Subtarget->useSoftFloat()) {
00201     // SSE has no i16 to fp conversion, only i32
00202     if (X86ScalarSSEf32) {
00203       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00204       // f32 and f64 cases are Legal, f80 case is not
00205       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00206     } else {
00207       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00208       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00209     }
00210   } else {
00211     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00212     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00213   }
00214 
00215   // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00216   // are Legal, f80 is custom lowered.
00217   setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00218   setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00219 
00220   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00221   // this operation.
00222   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00223   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00224 
00225   if (X86ScalarSSEf32) {
00226     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00227     // f32 and f64 cases are Legal, f80 case is not
00228     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00229   } else {
00230     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00231     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00232   }
00233 
00234   // Handle FP_TO_UINT by promoting the destination to a larger signed
00235   // conversion.
00236   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00237   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00238   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00239 
00240   if (Subtarget->is64Bit()) {
00241     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
00242     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00243   } else if (!Subtarget->useSoftFloat()) {
00244     // Since AVX is a superset of SSE3, only check for SSE here.
00245     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00246       // Expand FP_TO_UINT into a select.
00247       // FIXME: We would like to use a Custom expander here eventually to do
00248       // the optimal thing for SSE vs. the default expansion in the legalizer.
00249       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00250     else
00251       // With SSE3 we can use fisttpll to convert to a signed i64; without
00252       // SSE, we're stuck with a fistpll.
00253       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00254   }
00255 
00256   if (isTargetFTOL()) {
00257     // Use the _ftol2 runtime function, which has a pseudo-instruction
00258     // to handle its weird calling convention.
00259     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00260   }
00261 
00262   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00263   if (!X86ScalarSSEf64) {
00264     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00265     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00266     if (Subtarget->is64Bit()) {
00267       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00268       // Without SSE, i64->f64 goes through memory.
00269       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00270     }
00271   }
00272 
00273   // Scalar integer divide and remainder are lowered to use operations that
00274   // produce two results, to match the available instructions. This exposes
00275   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00276   // into a single instruction.
00277   //
00278   // Scalar integer multiply-high is also lowered to use two-result
00279   // operations, to match the available instructions. However, plain multiply
00280   // (low) operations are left as Legal, as there are single-result
00281   // instructions for this in x86. Using the two-result multiply instructions
00282   // when both high and low results are needed must be arranged by dagcombine.
00283   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00284     MVT VT = IntVTs[i];
00285     setOperationAction(ISD::MULHS, VT, Expand);
00286     setOperationAction(ISD::MULHU, VT, Expand);
00287     setOperationAction(ISD::SDIV, VT, Expand);
00288     setOperationAction(ISD::UDIV, VT, Expand);
00289     setOperationAction(ISD::SREM, VT, Expand);
00290     setOperationAction(ISD::UREM, VT, Expand);
00291 
00292     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00293     setOperationAction(ISD::ADDC, VT, Custom);
00294     setOperationAction(ISD::ADDE, VT, Custom);
00295     setOperationAction(ISD::SUBC, VT, Custom);
00296     setOperationAction(ISD::SUBE, VT, Custom);
00297   }
00298 
00299   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00300   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00301   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00302   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00303   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00304   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00305   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00306   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00307   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00308   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00309   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00310   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00311   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00312   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00313   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00314   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00315   if (Subtarget->is64Bit())
00316     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00317   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00318   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00319   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00320   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00321   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
00322   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00323   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00324   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00325 
00326   // Promote the i8 variants and force them on up to i32 which has a shorter
00327   // encoding.
00328   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00329   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00330   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00331   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00332   if (Subtarget->hasBMI()) {
00333     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00334     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00335     if (Subtarget->is64Bit())
00336       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00337   } else {
00338     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00339     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00340     if (Subtarget->is64Bit())
00341       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00342   }
00343 
00344   if (Subtarget->hasLZCNT()) {
00345     // When promoting the i8 variants, force them to i32 for a shorter
00346     // encoding.
00347     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00348     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00349     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00350     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00351     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00352     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00353     if (Subtarget->is64Bit())
00354       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00355   } else {
00356     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00357     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00358     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00359     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00360     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00361     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00362     if (Subtarget->is64Bit()) {
00363       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00364       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00365     }
00366   }
00367 
00368   // Special handling for half-precision floating point conversions.
00369   // If we don't have F16C support, then lower half float conversions
00370   // into library calls.
00371   if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
00372     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00373     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00374   }
00375 
00376   // There's never any support for operations beyond MVT::f32.
00377   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00378   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00379   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00380   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00381 
00382   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
00383   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
00384   setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
00385   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00386   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00387   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00388 
00389   if (Subtarget->hasPOPCNT()) {
00390     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00391   } else {
00392     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00393     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00394     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00395     if (Subtarget->is64Bit())
00396       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00397   }
00398 
00399   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00400 
00401   if (!Subtarget->hasMOVBE())
00402     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00403 
00404   // These should be promoted to a larger select which is supported.
00405   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00406   // X86 wants to expand cmov itself.
00407   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00408   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00409   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00410   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00411   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00412   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00413   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00414   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00415   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00416   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00417   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00418   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00419   if (Subtarget->is64Bit()) {
00420     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00421     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00422   }
00423   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00424   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00425   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00426   // support continuation, user-level threading, and etc.. As a result, no
00427   // other SjLj exception interfaces are implemented and please don't build
00428   // your own exception handling based on them.
00429   // LLVM/Clang supports zero-cost DWARF exception handling.
00430   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00431   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00432 
00433   // Darwin ABI issue.
00434   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00435   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00436   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00437   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00438   if (Subtarget->is64Bit())
00439     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00440   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00441   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00442   if (Subtarget->is64Bit()) {
00443     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00444     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00445     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00446     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00447     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00448   }
00449   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00450   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00451   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00452   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00453   if (Subtarget->is64Bit()) {
00454     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00455     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00456     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00457   }
00458 
00459   if (Subtarget->hasSSE1())
00460     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00461 
00462   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00463 
00464   // Expand certain atomics
00465   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
00466     MVT VT = IntVTs[i];
00467     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00468     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00469     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00470   }
00471 
00472   if (Subtarget->hasCmpxchg16b()) {
00473     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00474   }
00475 
00476   // FIXME - use subtarget debug flags
00477   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00478       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00479     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00480   }
00481 
00482   if (Subtarget->is64Bit()) {
00483     setExceptionPointerRegister(X86::RAX);
00484     setExceptionSelectorRegister(X86::RDX);
00485   } else {
00486     setExceptionPointerRegister(X86::EAX);
00487     setExceptionSelectorRegister(X86::EDX);
00488   }
00489   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00490   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00491 
00492   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00493   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00494 
00495   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00496   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00497 
00498   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00499   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00500   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00501   if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
00502     // TargetInfo::X86_64ABIBuiltinVaList
00503     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00504     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00505   } else {
00506     // TargetInfo::CharPtrBuiltinVaList
00507     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00508     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00509   }
00510 
00511   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00512   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00513 
00514   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
00515 
00516   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
00517   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
00518   setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
00519 
00520   if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
00521     // f32 and f64 use SSE.
00522     // Set up the FP register classes.
00523     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00524     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00525 
00526     // Use ANDPD to simulate FABS.
00527     setOperationAction(ISD::FABS , MVT::f64, Custom);
00528     setOperationAction(ISD::FABS , MVT::f32, Custom);
00529 
00530     // Use XORP to simulate FNEG.
00531     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00532     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00533 
00534     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00535     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00536     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00537 
00538     // Lower this to FGETSIGNx86 plus an AND.
00539     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00540     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00541 
00542     // We don't support sin/cos/fmod
00543     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00544     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00545     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00546     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00547     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00548     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00549 
00550     // Expand FP immediates into loads from the stack, except for the special
00551     // cases we handle.
00552     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00553     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00554   } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
00555     // Use SSE for f32, x87 for f64.
00556     // Set up the FP register classes.
00557     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00558     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00559 
00560     // Use ANDPS to simulate FABS.
00561     setOperationAction(ISD::FABS , MVT::f32, Custom);
00562 
00563     // Use XORP to simulate FNEG.
00564     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00565 
00566     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00567 
00568     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00569     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00570     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00571 
00572     // We don't support sin/cos/fmod
00573     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00574     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00575     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00576 
00577     // Special cases we handle for FP constants.
00578     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00579     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00580     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00581     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00582     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00583 
00584     if (!TM.Options.UnsafeFPMath) {
00585       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00586       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00587       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00588     }
00589   } else if (!Subtarget->useSoftFloat()) {
00590     // f32 and f64 in x87.
00591     // Set up the FP register classes.
00592     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00593     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00594 
00595     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00596     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00597     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00598     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00599 
00600     if (!TM.Options.UnsafeFPMath) {
00601       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00602       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00603       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00604       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00605       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00606       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00607     }
00608     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00609     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00610     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00611     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00612     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00613     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00614     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00615     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00616   }
00617 
00618   // We don't support FMA.
00619   setOperationAction(ISD::FMA, MVT::f64, Expand);
00620   setOperationAction(ISD::FMA, MVT::f32, Expand);
00621 
00622   // Long double always uses X87.
00623   if (!Subtarget->useSoftFloat()) {
00624     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00625     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00626     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00627     {
00628       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00629       addLegalFPImmediate(TmpFlt);  // FLD0
00630       TmpFlt.changeSign();
00631       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00632 
00633       bool ignored;
00634       APFloat TmpFlt2(+1.0);
00635       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00636                       &ignored);
00637       addLegalFPImmediate(TmpFlt2);  // FLD1
00638       TmpFlt2.changeSign();
00639       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00640     }
00641 
00642     if (!TM.Options.UnsafeFPMath) {
00643       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00644       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00645       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00646     }
00647 
00648     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00649     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00650     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00651     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00652     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00653     setOperationAction(ISD::FMA, MVT::f80, Expand);
00654   }
00655 
00656   // Always use a library call for pow.
00657   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00658   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00659   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00660 
00661   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00662   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00663   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00664   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00665   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00666   setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
00667   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
00668 
00669   // First set operation action for all vector types to either promote
00670   // (for widening) or expand (for scalarization). Then we will selectively
00671   // turn on ones that can be effectively codegen'd.
00672   for (MVT VT : MVT::vector_valuetypes()) {
00673     setOperationAction(ISD::ADD , VT, Expand);
00674     setOperationAction(ISD::SUB , VT, Expand);
00675     setOperationAction(ISD::FADD, VT, Expand);
00676     setOperationAction(ISD::FNEG, VT, Expand);
00677     setOperationAction(ISD::FSUB, VT, Expand);
00678     setOperationAction(ISD::MUL , VT, Expand);
00679     setOperationAction(ISD::FMUL, VT, Expand);
00680     setOperationAction(ISD::SDIV, VT, Expand);
00681     setOperationAction(ISD::UDIV, VT, Expand);
00682     setOperationAction(ISD::FDIV, VT, Expand);
00683     setOperationAction(ISD::SREM, VT, Expand);
00684     setOperationAction(ISD::UREM, VT, Expand);
00685     setOperationAction(ISD::LOAD, VT, Expand);
00686     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00687     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00688     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00689     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00690     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00691     setOperationAction(ISD::FABS, VT, Expand);
00692     setOperationAction(ISD::FSIN, VT, Expand);
00693     setOperationAction(ISD::FSINCOS, VT, Expand);
00694     setOperationAction(ISD::FCOS, VT, Expand);
00695     setOperationAction(ISD::FSINCOS, VT, Expand);
00696     setOperationAction(ISD::FREM, VT, Expand);
00697     setOperationAction(ISD::FMA,  VT, Expand);
00698     setOperationAction(ISD::FPOWI, VT, Expand);
00699     setOperationAction(ISD::FSQRT, VT, Expand);
00700     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00701     setOperationAction(ISD::FFLOOR, VT, Expand);
00702     setOperationAction(ISD::FCEIL, VT, Expand);
00703     setOperationAction(ISD::FTRUNC, VT, Expand);
00704     setOperationAction(ISD::FRINT, VT, Expand);
00705     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00706     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00707     setOperationAction(ISD::MULHS, VT, Expand);
00708     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00709     setOperationAction(ISD::MULHU, VT, Expand);
00710     setOperationAction(ISD::SDIVREM, VT, Expand);
00711     setOperationAction(ISD::UDIVREM, VT, Expand);
00712     setOperationAction(ISD::FPOW, VT, Expand);
00713     setOperationAction(ISD::CTPOP, VT, Expand);
00714     setOperationAction(ISD::CTTZ, VT, Expand);
00715     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00716     setOperationAction(ISD::CTLZ, VT, Expand);
00717     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00718     setOperationAction(ISD::SHL, VT, Expand);
00719     setOperationAction(ISD::SRA, VT, Expand);
00720     setOperationAction(ISD::SRL, VT, Expand);
00721     setOperationAction(ISD::ROTL, VT, Expand);
00722     setOperationAction(ISD::ROTR, VT, Expand);
00723     setOperationAction(ISD::BSWAP, VT, Expand);
00724     setOperationAction(ISD::SETCC, VT, Expand);
00725     setOperationAction(ISD::FLOG, VT, Expand);
00726     setOperationAction(ISD::FLOG2, VT, Expand);
00727     setOperationAction(ISD::FLOG10, VT, Expand);
00728     setOperationAction(ISD::FEXP, VT, Expand);
00729     setOperationAction(ISD::FEXP2, VT, Expand);
00730     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00731     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00732     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00733     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00734     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00735     setOperationAction(ISD::TRUNCATE, VT, Expand);
00736     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00737     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00738     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00739     setOperationAction(ISD::VSELECT, VT, Expand);
00740     setOperationAction(ISD::SELECT_CC, VT, Expand);
00741     for (MVT InnerVT : MVT::vector_valuetypes()) {
00742       setTruncStoreAction(InnerVT, VT, Expand);
00743 
00744       setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
00745       setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
00746 
00747       // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
00748       // types, we have to deal with them whether we ask for Expansion or not.
00749       // Setting Expand causes its own optimisation problems though, so leave
00750       // them legal.
00751       if (VT.getVectorElementType() == MVT::i1)
00752         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00753 
00754       // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
00755       // split/scalarized right now.
00756       if (VT.getVectorElementType() == MVT::f16)
00757         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00758     }
00759   }
00760 
00761   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00762   // with -msoft-float, disable use of MMX as well.
00763   if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
00764     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00765     // No operations on x86mmx supported, everything uses intrinsics.
00766   }
00767 
00768   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00769   // into smaller operations.
00770   for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
00771     setOperationAction(ISD::MULHS,              MMXTy,      Expand);
00772     setOperationAction(ISD::AND,                MMXTy,      Expand);
00773     setOperationAction(ISD::OR,                 MMXTy,      Expand);
00774     setOperationAction(ISD::XOR,                MMXTy,      Expand);
00775     setOperationAction(ISD::SCALAR_TO_VECTOR,   MMXTy,      Expand);
00776     setOperationAction(ISD::SELECT,             MMXTy,      Expand);
00777     setOperationAction(ISD::BITCAST,            MMXTy,      Expand);
00778   }
00779   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00780 
00781   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
00782     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00783 
00784     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00785     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00786     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00787     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00788     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00789     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00790     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00791     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00792     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00793     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00794     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
00795     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00796     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00797     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
00798   }
00799 
00800   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
00801     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00802 
00803     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
00804     // registers cannot be used even for integer operations.
00805     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00806     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00807     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00808     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00809 
00810     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00811     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00812     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00813     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00814     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
00815     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00816     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00817     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00818     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00819     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00820     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00821     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00822     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00823     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00824     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00825     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00826     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00827     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00828     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00829     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00830     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00831     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00832     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00833 
00834     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00835     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00836     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00837     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00838 
00839     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00840     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00841     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00842     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00843     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00844 
00845     // Only provide customized ctpop vector bit twiddling for vector types we
00846     // know to perform better than using the popcnt instructions on each vector
00847     // element. If popcnt isn't supported, always provide the custom version.
00848     if (!Subtarget->hasPOPCNT()) {
00849       setOperationAction(ISD::CTPOP,            MVT::v4i32, Custom);
00850       setOperationAction(ISD::CTPOP,            MVT::v2i64, Custom);
00851     }
00852 
00853     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
00854     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
00855       MVT VT = (MVT::SimpleValueType)i;
00856       // Do not attempt to custom lower non-power-of-2 vectors
00857       if (!isPowerOf2_32(VT.getVectorNumElements()))
00858         continue;
00859       // Do not attempt to custom lower non-128-bit vectors
00860       if (!VT.is128BitVector())
00861         continue;
00862       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
00863       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
00864       setOperationAction(ISD::VSELECT,            VT, Custom);
00865       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00866     }
00867 
00868     // We support custom legalizing of sext and anyext loads for specific
00869     // memory vector types which we can load as a scalar (or sequence of
00870     // scalars) and extend in-register to a legal 128-bit vector type. For sext
00871     // loads these must work with a single scalar load.
00872     for (MVT VT : MVT::integer_vector_valuetypes()) {
00873       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
00874       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
00875       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
00876       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
00877       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
00878       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
00879       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
00880       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
00881       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
00882     }
00883 
00884     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
00885     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
00886     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
00887     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
00888     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
00889     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
00890     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
00891     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00892 
00893     if (Subtarget->is64Bit()) {
00894       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
00895       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00896     }
00897 
00898     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
00899     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
00900       MVT VT = (MVT::SimpleValueType)i;
00901 
00902       // Do not attempt to promote non-128-bit vectors
00903       if (!VT.is128BitVector())
00904         continue;
00905 
00906       setOperationAction(ISD::AND,    VT, Promote);
00907       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
00908       setOperationAction(ISD::OR,     VT, Promote);
00909       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
00910       setOperationAction(ISD::XOR,    VT, Promote);
00911       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
00912       setOperationAction(ISD::LOAD,   VT, Promote);
00913       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
00914       setOperationAction(ISD::SELECT, VT, Promote);
00915       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
00916     }
00917 
00918     // Custom lower v2i64 and v2f64 selects.
00919     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
00920     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
00921     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
00922     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
00923 
00924     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
00925     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
00926 
00927     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
00928     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
00929     // As there is no 64-bit GPR available, we need build a special custom
00930     // sequence to convert from v2i32 to v2f32.
00931     if (!Subtarget->is64Bit())
00932       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
00933 
00934     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
00935     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
00936 
00937     for (MVT VT : MVT::fp_vector_valuetypes())
00938       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
00939 
00940     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
00941     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
00942     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
00943   }
00944 
00945   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
00946     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
00947       setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
00948       setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
00949       setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
00950       setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
00951       setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
00952     }
00953 
00954     // FIXME: Do we need to handle scalar-to-vector here?
00955     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
00956 
00957     // We directly match byte blends in the backend as they match the VSELECT
00958     // condition form.
00959     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
00960 
00961     // SSE41 brings specific instructions for doing vector sign extend even in
00962     // cases where we don't have SRA.
00963     for (MVT VT : MVT::integer_vector_valuetypes()) {
00964       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
00965       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
00966       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
00967     }
00968 
00969     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
00970     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
00971     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
00972     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
00973     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
00974     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
00975     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
00976 
00977     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
00978     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
00979     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
00980     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
00981     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
00982     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
00983 
00984     // i8 and i16 vectors are custom because the source register and source
00985     // source memory operand types are not the same width.  f32 vectors are
00986     // custom since the immediate controlling the insert encodes additional
00987     // information.
00988     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
00989     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00990     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00991     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00992 
00993     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
00994     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
00995     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
00996     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00997 
00998     // FIXME: these should be Legal, but that's only for the case where
00999     // the index is constant.  For now custom expand to deal with that.
01000     if (Subtarget->is64Bit()) {
01001       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01002       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01003     }
01004   }
01005 
01006   if (Subtarget->hasSSE2()) {
01007     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
01008     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
01009     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
01010 
01011     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01012     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01013 
01014     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01015     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01016 
01017     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01018     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01019 
01020     // In the customized shift lowering, the legal cases in AVX2 will be
01021     // recognized.
01022     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01023     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01024 
01025     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01026     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01027 
01028     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01029   }
01030 
01031   if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
01032     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01033     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01034     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01035     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01036     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01037     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01038 
01039     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01040     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01041     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01042 
01043     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01044     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01045     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01046     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01047     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01048     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01049     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01050     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01051     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01052     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01053     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01054     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01055 
01056     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01057     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01058     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01059     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01060     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01061     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01062     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01063     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01064     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01065     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01066     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01067     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01068 
01069     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01070     // even though v8i16 is a legal type.
01071     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01072     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01073     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01074 
01075     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01076     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01077     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01078 
01079     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01080     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01081 
01082     for (MVT VT : MVT::fp_vector_valuetypes())
01083       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
01084 
01085     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01086     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01087 
01088     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01089     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01090 
01091     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01092     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01093 
01094     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01095     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01096     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01097     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01098 
01099     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01100     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01101     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01102 
01103     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01104     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01105     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01106     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01107     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01108     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01109     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01110     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01111     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01112     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01113     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01114     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01115 
01116     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
01117       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01118       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01119       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01120       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01121       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01122       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01123     }
01124 
01125     if (Subtarget->hasInt256()) {
01126       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01127       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01128       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01129       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01130 
01131       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01132       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01133       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01134       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01135 
01136       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01137       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01138       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01139       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01140 
01141       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01142       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01143       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01144       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01145 
01146       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
01147       // when we have a 256bit-wide blend with immediate.
01148       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
01149 
01150       // Only provide customized ctpop vector bit twiddling for vector types we
01151       // know to perform better than using the popcnt instructions on each
01152       // vector element. If popcnt isn't supported, always provide the custom
01153       // version.
01154       if (!Subtarget->hasPOPCNT())
01155         setOperationAction(ISD::CTPOP,           MVT::v4i64, Custom);
01156 
01157       // Custom CTPOP always performs better on natively supported v8i32
01158       setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
01159 
01160       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
01161       setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01162       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01163       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01164       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01165       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01166       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01167 
01168       setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01169       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01170       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01171       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01172       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01173       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01174     } else {
01175       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01176       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01177       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01178       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01179 
01180       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01181       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01182       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01183       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01184 
01185       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01186       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01187       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01188       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01189     }
01190 
01191     // In the customized shift lowering, the legal cases in AVX2 will be
01192     // recognized.
01193     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01194     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01195 
01196     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01197     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01198 
01199     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01200 
01201     // Custom lower several nodes for 256-bit types.
01202     for (MVT VT : MVT::vector_valuetypes()) {
01203       if (VT.getScalarSizeInBits() >= 32) {
01204         setOperationAction(ISD::MLOAD,  VT, Legal);
01205         setOperationAction(ISD::MSTORE, VT, Legal);
01206       }
01207       // Extract subvector is special because the value type
01208       // (result) is 128-bit but the source is 256-bit wide.
01209       if (VT.is128BitVector()) {
01210         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01211       }
01212       // Do not attempt to custom lower other non-256-bit vectors
01213       if (!VT.is256BitVector())
01214         continue;
01215 
01216       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01217       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01218       setOperationAction(ISD::VSELECT,            VT, Custom);
01219       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01220       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01221       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01222       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01223       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01224     }
01225 
01226     if (Subtarget->hasInt256())
01227       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01228 
01229 
01230     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01231     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
01232       MVT VT = (MVT::SimpleValueType)i;
01233 
01234       // Do not attempt to promote non-256-bit vectors
01235       if (!VT.is256BitVector())
01236         continue;
01237 
01238       setOperationAction(ISD::AND,    VT, Promote);
01239       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01240       setOperationAction(ISD::OR,     VT, Promote);
01241       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01242       setOperationAction(ISD::XOR,    VT, Promote);
01243       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01244       setOperationAction(ISD::LOAD,   VT, Promote);
01245       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01246       setOperationAction(ISD::SELECT, VT, Promote);
01247       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01248     }
01249   }
01250 
01251   if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
01252     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01253     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01254     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01255     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01256 
01257     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01258     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01259     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01260 
01261     for (MVT VT : MVT::fp_vector_valuetypes())
01262       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
01263 
01264     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01265     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01266     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01267     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01268     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01269     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01270     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01271     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01272     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01273     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01274 
01275     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01276     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01277     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01278     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01279     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01280     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01281 
01282     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01283     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01284     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01285     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01286     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01287     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01288     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01289     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01290 
01291     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
01292     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
01293     setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
01294     setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
01295     if (Subtarget->is64Bit()) {
01296       setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
01297       setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
01298       setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
01299       setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
01300     }
01301     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01302     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01303     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01304     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01305     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01306     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
01307     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
01308     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i8,  Promote);
01309     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Promote);
01310     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01311     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01312     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01313     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
01314     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
01315     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01316     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01317 
01318     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01319     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01320     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01321     if (Subtarget->hasDQI()) {
01322       setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
01323       setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
01324     }
01325     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01326     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01327     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01328     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01329     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01330     setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
01331     setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
01332     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01333     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01334     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01335     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01336     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01337     if (Subtarget->hasDQI()) {
01338       setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
01339       setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
01340     }
01341     setOperationAction(ISD::FFLOOR,             MVT::v16f32, Legal);
01342     setOperationAction(ISD::FFLOOR,             MVT::v8f64, Legal);
01343     setOperationAction(ISD::FCEIL,              MVT::v16f32, Legal);
01344     setOperationAction(ISD::FCEIL,              MVT::v8f64, Legal);
01345     setOperationAction(ISD::FTRUNC,             MVT::v16f32, Legal);
01346     setOperationAction(ISD::FTRUNC,             MVT::v8f64, Legal);
01347     setOperationAction(ISD::FRINT,              MVT::v16f32, Legal);
01348     setOperationAction(ISD::FRINT,              MVT::v8f64, Legal);
01349     setOperationAction(ISD::FNEARBYINT,         MVT::v16f32, Legal);
01350     setOperationAction(ISD::FNEARBYINT,         MVT::v8f64, Legal);
01351 
01352     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01353     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01354     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01355     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01356     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
01357 
01358     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01359     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01360 
01361     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01362 
01363     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01364     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01365     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01366     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01367     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01368     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01369     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01370     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01371     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01372     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
01373     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
01374 
01375     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01376     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01377 
01378     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01379     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01380 
01381     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01382 
01383     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01384     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01385 
01386     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01387     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01388 
01389     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01390     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01391 
01392     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01393     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01394     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01395     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01396     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01397     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01398 
01399     if (Subtarget->hasCDI()) {
01400       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
01401       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01402     }
01403     if (Subtarget->hasDQI()) {
01404       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
01405       setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
01406       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
01407     }
01408     // Custom lower several nodes.
01409     for (MVT VT : MVT::vector_valuetypes()) {
01410       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01411       if (EltSize == 1) {
01412         setOperationAction(ISD::AND, VT, Legal);
01413         setOperationAction(ISD::OR,  VT, Legal);
01414         setOperationAction(ISD::XOR,  VT, Legal);
01415       }
01416       if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
01417         setOperationAction(ISD::MGATHER,  VT, Custom);
01418         setOperationAction(ISD::MSCATTER, VT, Custom);
01419       }
01420       // Extract subvector is special because the value type
01421       // (result) is 256/128-bit but the source is 512-bit wide.
01422       if (VT.is128BitVector() || VT.is256BitVector()) {
01423         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01424       }
01425       if (VT.getVectorElementType() == MVT::i1)
01426         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01427 
01428       // Do not attempt to custom lower other non-512-bit vectors
01429       if (!VT.is512BitVector())
01430         continue;
01431 
01432       if (EltSize >= 32) {
01433         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01434         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01435         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01436         setOperationAction(ISD::VSELECT,             VT, Legal);
01437         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01438         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01439         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01440         setOperationAction(ISD::MLOAD,               VT, Legal);
01441         setOperationAction(ISD::MSTORE,              VT, Legal);
01442       }
01443     }
01444     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01445       MVT VT = (MVT::SimpleValueType)i;
01446 
01447       // Do not attempt to promote non-512-bit vectors.
01448       if (!VT.is512BitVector())
01449         continue;
01450 
01451       setOperationAction(ISD::SELECT, VT, Promote);
01452       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01453     }
01454   }// has  AVX-512
01455 
01456   if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
01457     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
01458     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
01459 
01460     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01461     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01462 
01463     setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
01464     setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
01465     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
01466     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
01467     setOperationAction(ISD::ADD,                MVT::v32i16, Legal);
01468     setOperationAction(ISD::ADD,                MVT::v64i8, Legal);
01469     setOperationAction(ISD::SUB,                MVT::v32i16, Legal);
01470     setOperationAction(ISD::SUB,                MVT::v64i8, Legal);
01471     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
01472     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
01473     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
01474     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
01475     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
01476     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
01477     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
01478     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
01479     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
01480     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
01481     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
01482 
01483     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
01484       const MVT VT = (MVT::SimpleValueType)i;
01485 
01486       const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01487 
01488       // Do not attempt to promote non-512-bit vectors.
01489       if (!VT.is512BitVector())
01490         continue;
01491 
01492       if (EltSize < 32) {
01493         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01494         setOperationAction(ISD::VSELECT,             VT, Legal);
01495       }
01496     }
01497   }
01498 
01499   if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
01500     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
01501     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
01502 
01503     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
01504     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
01505     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
01506     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
01507     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
01508     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
01509     setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
01510     setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
01511     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
01512     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
01513 
01514     setOperationAction(ISD::AND,                MVT::v8i32, Legal);
01515     setOperationAction(ISD::OR,                 MVT::v8i32, Legal);
01516     setOperationAction(ISD::XOR,                MVT::v8i32, Legal);
01517     setOperationAction(ISD::AND,                MVT::v4i32, Legal);
01518     setOperationAction(ISD::OR,                 MVT::v4i32, Legal);
01519     setOperationAction(ISD::XOR,                MVT::v4i32, Legal);
01520     setOperationAction(ISD::SRA,                MVT::v2i64, Custom);
01521     setOperationAction(ISD::SRA,                MVT::v4i64, Custom);
01522   }
01523 
01524   // We want to custom lower some of our intrinsics.
01525   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01526   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01527   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01528   if (!Subtarget->is64Bit())
01529     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01530 
01531   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01532   // handle type legalization for these operations here.
01533   //
01534   // FIXME: We really should do custom legalization for addition and
01535   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01536   // than generic legalization for 64-bit multiplication-with-overflow, though.
01537   for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
01538     // Add/Sub/Mul with overflow operations are custom lowered.
01539     MVT VT = IntVTs[i];
01540     setOperationAction(ISD::SADDO, VT, Custom);
01541     setOperationAction(ISD::UADDO, VT, Custom);
01542     setOperationAction(ISD::SSUBO, VT, Custom);
01543     setOperationAction(ISD::USUBO, VT, Custom);
01544     setOperationAction(ISD::SMULO, VT, Custom);
01545     setOperationAction(ISD::UMULO, VT, Custom);
01546   }
01547 
01548 
01549   if (!Subtarget->is64Bit()) {
01550     // These libcalls are not available in 32-bit.
01551     setLibcallName(RTLIB::SHL_I128, nullptr);
01552     setLibcallName(RTLIB::SRL_I128, nullptr);
01553     setLibcallName(RTLIB::SRA_I128, nullptr);
01554   }
01555 
01556   // Combine sin / cos into one node or libcall if possible.
01557   if (Subtarget->hasSinCos()) {
01558     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01559     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01560     if (Subtarget->isTargetDarwin()) {
01561       // For MacOSX, we don't want the normal expansion of a libcall to sincos.
01562       // We want to issue a libcall to __sincos_stret to avoid memory traffic.
01563       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01564       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01565     }
01566   }
01567 
01568   if (Subtarget->isTargetWin64()) {
01569     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01570     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01571     setOperationAction(ISD::SREM, MVT::i128, Custom);
01572     setOperationAction(ISD::UREM, MVT::i128, Custom);
01573     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01574     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01575   }
01576 
01577   // We have target-specific dag combine patterns for the following nodes:
01578   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01579   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01580   setTargetDAGCombine(ISD::BITCAST);
01581   setTargetDAGCombine(ISD::VSELECT);
01582   setTargetDAGCombine(ISD::SELECT);
01583   setTargetDAGCombine(ISD::SHL);
01584   setTargetDAGCombine(ISD::SRA);
01585   setTargetDAGCombine(ISD::SRL);
01586   setTargetDAGCombine(ISD::OR);
01587   setTargetDAGCombine(ISD::AND);
01588   setTargetDAGCombine(ISD::ADD);
01589   setTargetDAGCombine(ISD::FADD);
01590   setTargetDAGCombine(ISD::FSUB);
01591   setTargetDAGCombine(ISD::FMA);
01592   setTargetDAGCombine(ISD::SUB);
01593   setTargetDAGCombine(ISD::LOAD);
01594   setTargetDAGCombine(ISD::MLOAD);
01595   setTargetDAGCombine(ISD::STORE);
01596   setTargetDAGCombine(ISD::MSTORE);
01597   setTargetDAGCombine(ISD::ZERO_EXTEND);
01598   setTargetDAGCombine(ISD::ANY_EXTEND);
01599   setTargetDAGCombine(ISD::SIGN_EXTEND);
01600   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01601   setTargetDAGCombine(ISD::SINT_TO_FP);
01602   setTargetDAGCombine(ISD::SETCC);
01603   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
01604   setTargetDAGCombine(ISD::BUILD_VECTOR);
01605   setTargetDAGCombine(ISD::MUL);
01606   setTargetDAGCombine(ISD::XOR);
01607 
01608   computeRegisterProperties(Subtarget->getRegisterInfo());
01609 
01610   // On Darwin, -Os means optimize for size without hurting performance,
01611   // do not reduce the limit.
01612   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01613   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
01614   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01615   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01616   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01617   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
01618   setPrefLoopAlignment(4); // 2^4 bytes.
01619 
01620   // Predictable cmov don't hurt on atom because it's in-order.
01621   PredictableSelectIsExpensive = !Subtarget->isAtom();
01622   EnableExtLdPromotion = true;
01623   setPrefFunctionAlignment(4); // 2^4 bytes.
01624 
01625   verifyIntrinsicTables();
01626 }
01627 
01628 // This has so far only been implemented for 64-bit MachO.
01629 bool X86TargetLowering::useLoadStackGuardNode() const {
01630   return Subtarget->isTargetMachO() && Subtarget->is64Bit();
01631 }
01632 
01633 TargetLoweringBase::LegalizeTypeAction
01634 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01635   if (ExperimentalVectorWideningLegalization &&
01636       VT.getVectorNumElements() != 1 &&
01637       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01638     return TypeWidenVector;
01639 
01640   return TargetLoweringBase::getPreferredVectorAction(VT);
01641 }
01642 
01643 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01644   if (!VT.isVector())
01645     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01646 
01647   const unsigned NumElts = VT.getVectorNumElements();
01648   const EVT EltVT = VT.getVectorElementType();
01649   if (VT.is512BitVector()) {
01650     if (Subtarget->hasAVX512())
01651       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01652           EltVT == MVT::f32 || EltVT == MVT::f64)
01653         switch(NumElts) {
01654         case  8: return MVT::v8i1;
01655         case 16: return MVT::v16i1;
01656       }
01657     if (Subtarget->hasBWI())
01658       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01659         switch(NumElts) {
01660         case 32: return MVT::v32i1;
01661         case 64: return MVT::v64i1;
01662       }
01663   }
01664 
01665   if (VT.is256BitVector() || VT.is128BitVector()) {
01666     if (Subtarget->hasVLX())
01667       if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01668           EltVT == MVT::f32 || EltVT == MVT::f64)
01669         switch(NumElts) {
01670         case 2: return MVT::v2i1;
01671         case 4: return MVT::v4i1;
01672         case 8: return MVT::v8i1;
01673       }
01674     if (Subtarget->hasBWI() && Subtarget->hasVLX())
01675       if (EltVT == MVT::i8 || EltVT == MVT::i16)
01676         switch(NumElts) {
01677         case  8: return MVT::v8i1;
01678         case 16: return MVT::v16i1;
01679         case 32: return MVT::v32i1;
01680       }
01681   }
01682 
01683   return VT.changeVectorElementTypeToInteger();
01684 }
01685 
01686 /// Helper for getByValTypeAlignment to determine
01687 /// the desired ByVal argument alignment.
01688 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01689   if (MaxAlign == 16)
01690     return;
01691   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01692     if (VTy->getBitWidth() == 128)
01693       MaxAlign = 16;
01694   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01695     unsigned EltAlign = 0;
01696     getMaxByValAlign(ATy->getElementType(), EltAlign);
01697     if (EltAlign > MaxAlign)
01698       MaxAlign = EltAlign;
01699   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01700     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
01701       unsigned EltAlign = 0;
01702       getMaxByValAlign(STy->getElementType(i), EltAlign);
01703       if (EltAlign > MaxAlign)
01704         MaxAlign = EltAlign;
01705       if (MaxAlign == 16)
01706         break;
01707     }
01708   }
01709 }
01710 
01711 /// Return the desired alignment for ByVal aggregate
01712 /// function arguments in the caller parameter area. For X86, aggregates
01713 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01714 /// are at 4-byte boundaries.
01715 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
01716   if (Subtarget->is64Bit()) {
01717     // Max of 8 and alignment of type.
01718     unsigned TyAlign = TD->getABITypeAlignment(Ty);
01719     if (TyAlign > 8)
01720       return TyAlign;
01721     return 8;
01722   }
01723 
01724   unsigned Align = 4;
01725   if (Subtarget->hasSSE1())
01726     getMaxByValAlign(Ty, Align);
01727   return Align;
01728 }
01729 
01730 /// Returns the target specific optimal type for load
01731 /// and store operations as a result of memset, memcpy, and memmove
01732 /// lowering. If DstAlign is zero that means it's safe to destination
01733 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01734 /// means there isn't a need to check it against alignment requirement,
01735 /// probably because the source does not need to be loaded. If 'IsMemset' is
01736 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01737 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01738 /// source is constant so it does not need to be loaded.
01739 /// It returns EVT::Other if the type should be determined using generic
01740 /// target-independent logic.
01741 EVT
01742 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01743                                        unsigned DstAlign, unsigned SrcAlign,
01744                                        bool IsMemset, bool ZeroMemset,
01745                                        bool MemcpyStrSrc,
01746                                        MachineFunction &MF) const {
01747   const Function *F = MF.getFunction();
01748   if ((!IsMemset || ZeroMemset) &&
01749       !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
01750     if (Size >= 16 &&
01751         (Subtarget->isUnalignedMemAccessFast() ||
01752          ((DstAlign == 0 || DstAlign >= 16) &&
01753           (SrcAlign == 0 || SrcAlign >= 16)))) {
01754       if (Size >= 32) {
01755         if (Subtarget->hasInt256())
01756           return MVT::v8i32;
01757         if (Subtarget->hasFp256())
01758           return MVT::v8f32;
01759       }
01760       if (Subtarget->hasSSE2())
01761         return MVT::v4i32;
01762       if (Subtarget->hasSSE1())
01763         return MVT::v4f32;
01764     } else if (!MemcpyStrSrc && Size >= 8 &&
01765                !Subtarget->is64Bit() &&
01766                Subtarget->hasSSE2()) {
01767       // Do not use f64 to lower memcpy if source is string constant. It's
01768       // better to use i32 to avoid the loads.
01769       return MVT::f64;
01770     }
01771   }
01772   if (Subtarget->is64Bit() && Size >= 8)
01773     return MVT::i64;
01774   return MVT::i32;
01775 }
01776 
01777 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
01778   if (VT == MVT::f32)
01779     return X86ScalarSSEf32;
01780   else if (VT == MVT::f64)
01781     return X86ScalarSSEf64;
01782   return true;
01783 }
01784 
01785 bool
01786 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
01787                                                   unsigned,
01788                                                   unsigned,
01789                                                   bool *Fast) const {
01790   if (Fast)
01791     *Fast = Subtarget->isUnalignedMemAccessFast();
01792   return true;
01793 }
01794 
01795 /// Return the entry encoding for a jump table in the
01796 /// current function.  The returned value is a member of the
01797 /// MachineJumpTableInfo::JTEntryKind enum.
01798 unsigned X86TargetLowering::getJumpTableEncoding() const {
01799   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
01800   // symbol.
01801   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
01802       Subtarget->isPICStyleGOT())
01803     return MachineJumpTableInfo::EK_Custom32;
01804 
01805   // Otherwise, use the normal jump table encoding heuristics.
01806   return TargetLowering::getJumpTableEncoding();
01807 }
01808 
01809 bool X86TargetLowering::useSoftFloat() const {
01810   return Subtarget->useSoftFloat();
01811 }
01812 
01813 const MCExpr *
01814 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
01815                                              const MachineBasicBlock *MBB,
01816                                              unsigned uid,MCContext &Ctx) const{
01817   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
01818          Subtarget->isPICStyleGOT());
01819   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
01820   // entries.
01821   return MCSymbolRefExpr::Create(MBB->getSymbol(),
01822                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
01823 }
01824 
01825 /// Returns relocation base for the given PIC jumptable.
01826 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
01827                                                     SelectionDAG &DAG) const {
01828   if (!Subtarget->is64Bit())
01829     // This doesn't have SDLoc associated with it, but is not really the
01830     // same as a Register.
01831     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
01832   return Table;
01833 }
01834 
01835 /// This returns the relocation base for the given PIC jumptable,
01836 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
01837 const MCExpr *X86TargetLowering::
01838 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
01839                              MCContext &Ctx) const {
01840   // X86-64 uses RIP relative addressing based on the jump table label.
01841   if (Subtarget->isPICStyleRIPRel())
01842     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
01843 
01844   // Otherwise, the reference is relative to the PIC base.
01845   return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
01846 }
01847 
01848 std::pair<const TargetRegisterClass *, uint8_t>
01849 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
01850                                            MVT VT) const {
01851   const TargetRegisterClass *RRC = nullptr;
01852   uint8_t Cost = 1;
01853   switch (VT.SimpleTy) {
01854   default:
01855     return TargetLowering::findRepresentativeClass(TRI, VT);
01856   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
01857     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
01858     break;
01859   case MVT::x86mmx:
01860     RRC = &X86::VR64RegClass;
01861     break;
01862   case MVT::f32: case MVT::f64:
01863   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01864   case MVT::v4f32: case MVT::v2f64:
01865   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
01866   case MVT::v4f64:
01867     RRC = &X86::VR128RegClass;
01868     break;
01869   }
01870   return std::make_pair(RRC, Cost);
01871 }
01872 
01873 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
01874                                                unsigned &Offset) const {
01875   if (!Subtarget->isTargetLinux())
01876     return false;
01877 
01878   if (Subtarget->is64Bit()) {
01879     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
01880     Offset = 0x28;
01881     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
01882       AddressSpace = 256;
01883     else
01884       AddressSpace = 257;
01885   } else {
01886     // %gs:0x14 on i386
01887     Offset = 0x14;
01888     AddressSpace = 256;
01889   }
01890   return true;
01891 }
01892 
01893 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
01894                                             unsigned DestAS) const {
01895   assert(SrcAS != DestAS && "Expected different address spaces!");
01896 
01897   return SrcAS < 256 && DestAS < 256;
01898 }
01899 
01900 //===----------------------------------------------------------------------===//
01901 //               Return Value Calling Convention Implementation
01902 //===----------------------------------------------------------------------===//
01903 
01904 #include "X86GenCallingConv.inc"
01905 
01906 bool
01907 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
01908                                   MachineFunction &MF, bool isVarArg,
01909                         const SmallVectorImpl<ISD::OutputArg> &Outs,
01910                         LLVMContext &Context) const {
01911   SmallVector<CCValAssign, 16> RVLocs;
01912   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
01913   return CCInfo.CheckReturn(Outs, RetCC_X86);
01914 }
01915 
01916 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
01917   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
01918   return ScratchRegs;
01919 }
01920 
01921 SDValue
01922 X86TargetLowering::LowerReturn(SDValue Chain,
01923                                CallingConv::ID CallConv, bool isVarArg,
01924                                const SmallVectorImpl<ISD::OutputArg> &Outs,
01925                                const SmallVectorImpl<SDValue> &OutVals,
01926                                SDLoc dl, SelectionDAG &DAG) const {
01927   MachineFunction &MF = DAG.getMachineFunction();
01928   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
01929 
01930   SmallVector<CCValAssign, 16> RVLocs;
01931   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
01932   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
01933 
01934   SDValue Flag;
01935   SmallVector<SDValue, 6> RetOps;
01936   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
01937   // Operand #1 = Bytes To Pop
01938   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
01939                    MVT::i16));
01940 
01941   // Copy the result values into the output registers.
01942   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01943     CCValAssign &VA = RVLocs[i];
01944     assert(VA.isRegLoc() && "Can only return in registers!");
01945     SDValue ValToCopy = OutVals[i];
01946     EVT ValVT = ValToCopy.getValueType();
01947 
01948     // Promote values to the appropriate types.
01949     if (VA.getLocInfo() == CCValAssign::SExt)
01950       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
01951     else if (VA.getLocInfo() == CCValAssign::ZExt)
01952       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
01953     else if (VA.getLocInfo() == CCValAssign::AExt) {
01954       if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
01955         ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
01956       else
01957         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
01958     }
01959     else if (VA.getLocInfo() == CCValAssign::BCvt)
01960       ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
01961 
01962     assert(VA.getLocInfo() != CCValAssign::FPExt &&
01963            "Unexpected FP-extend for return value.");
01964 
01965     // If this is x86-64, and we disabled SSE, we can't return FP values,
01966     // or SSE or MMX vectors.
01967     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
01968          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
01969           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
01970       report_fatal_error("SSE register return with SSE disabled");
01971     }
01972     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
01973     // llvm-gcc has never done it right and no one has noticed, so this
01974     // should be OK for now.
01975     if (ValVT == MVT::f64 &&
01976         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
01977       report_fatal_error("SSE2 register return with SSE2 disabled");
01978 
01979     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
01980     // the RET instruction and handled by the FP Stackifier.
01981     if (VA.getLocReg() == X86::FP0 ||
01982         VA.getLocReg() == X86::FP1) {
01983       // If this is a copy from an xmm register to ST(0), use an FPExtend to
01984       // change the value to the FP stack register class.
01985       if (isScalarFPTypeInSSEReg(VA.getValVT()))
01986         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
01987       RetOps.push_back(ValToCopy);
01988       // Don't emit a copytoreg.
01989       continue;
01990     }
01991 
01992     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
01993     // which is returned in RAX / RDX.
01994     if (Subtarget->is64Bit()) {
01995       if (ValVT == MVT::x86mmx) {
01996         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
01997           ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
01998           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
01999                                   ValToCopy);
02000           // If we don't have SSE2 available, convert to v4f32 so the generated
02001           // register is legal.
02002           if (!Subtarget->hasSSE2())
02003             ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
02004         }
02005       }
02006     }
02007 
02008     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
02009     Flag = Chain.getValue(1);
02010     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02011   }
02012 
02013   // All x86 ABIs require that for returning structs by value we copy
02014   // the sret argument into %rax/%eax (depending on ABI) for the return.
02015   // We saved the argument into a virtual register in the entry block,
02016   // so now we copy the value out and into %rax/%eax.
02017   //
02018   // Checking Function.hasStructRetAttr() here is insufficient because the IR
02019   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
02020   // false, then an sret argument may be implicitly inserted in the SelDAG. In
02021   // either case FuncInfo->setSRetReturnReg() will have been called.
02022   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
02023     SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
02024 
02025     unsigned RetValReg
02026         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02027           X86::RAX : X86::EAX;
02028     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02029     Flag = Chain.getValue(1);
02030 
02031     // RAX/EAX now acts like a return value.
02032     RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
02033   }
02034 
02035   RetOps[0] = Chain;  // Update chain.
02036 
02037   // Add the flag if we have it.
02038   if (Flag.getNode())
02039     RetOps.push_back(Flag);
02040 
02041   return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
02042 }
02043 
02044 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02045   if (N->getNumValues() != 1)
02046     return false;
02047   if (!N->hasNUsesOfValue(1, 0))
02048     return false;
02049 
02050   SDValue TCChain = Chain;
02051   SDNode *Copy = *N->use_begin();
02052   if (Copy->getOpcode() == ISD::CopyToReg) {
02053     // If the copy has a glue operand, we conservatively assume it isn't safe to
02054     // perform a tail call.
02055     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02056       return false;
02057     TCChain = Copy->getOperand(0);
02058   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02059     return false;
02060 
02061   bool HasRet = false;
02062   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02063        UI != UE; ++UI) {
02064     if (UI->getOpcode() != X86ISD::RET_FLAG)
02065       return false;
02066     // If we are returning more than one value, we can definitely
02067     // not make a tail call see PR19530
02068     if (UI->getNumOperands() > 4)
02069       return false;
02070     if (UI->getNumOperands() == 4 &&
02071         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
02072       return false;
02073     HasRet = true;
02074   }
02075 
02076   if (!HasRet)
02077     return false;
02078 
02079   Chain = TCChain;
02080   return true;
02081 }
02082 
02083 EVT
02084 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02085                                             ISD::NodeType ExtendKind) const {
02086   MVT ReturnMVT;
02087   // TODO: Is this also valid on 32-bit?
02088   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02089     ReturnMVT = MVT::i8;
02090   else
02091     ReturnMVT = MVT::i32;
02092 
02093   EVT MinVT = getRegisterType(Context, ReturnMVT);
02094   return VT.bitsLT(MinVT) ? MinVT : VT;
02095 }
02096 
02097 /// Lower the result values of a call into the
02098 /// appropriate copies out of appropriate physical registers.
02099 ///
02100 SDValue
02101 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02102                                    CallingConv::ID CallConv, bool isVarArg,
02103                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02104                                    SDLoc dl, SelectionDAG &DAG,
02105                                    SmallVectorImpl<SDValue> &InVals) const {
02106 
02107   // Assign locations to each value returned by this call.
02108   SmallVector<CCValAssign, 16> RVLocs;
02109   bool Is64Bit = Subtarget->is64Bit();
02110   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02111                  *DAG.getContext());
02112   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02113 
02114   // Copy all of the result registers out of their specified physreg.
02115   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02116     CCValAssign &VA = RVLocs[i];
02117     EVT CopyVT = VA.getLocVT();
02118 
02119     // If this is x86-64, and we disabled SSE, we can't return FP values
02120     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02121         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02122       report_fatal_error("SSE register return with SSE disabled");
02123     }
02124 
02125     // If we prefer to use the value in xmm registers, copy it out as f80 and
02126     // use a truncate to move it from fp stack reg to xmm reg.
02127     bool RoundAfterCopy = false;
02128     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02129         isScalarFPTypeInSSEReg(VA.getValVT())) {
02130       CopyVT = MVT::f80;
02131       RoundAfterCopy = (CopyVT != VA.getLocVT());
02132     }
02133 
02134     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02135                                CopyVT, InFlag).getValue(1);
02136     SDValue Val = Chain.getValue(0);
02137 
02138     if (RoundAfterCopy)
02139       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02140                         // This truncation won't change the value.
02141                         DAG.getIntPtrConstant(1, dl));
02142 
02143     if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
02144       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
02145 
02146     InFlag = Chain.getValue(2);
02147     InVals.push_back(Val);
02148   }
02149 
02150   return Chain;
02151 }
02152 
02153 //===----------------------------------------------------------------------===//
02154 //                C & StdCall & Fast Calling Convention implementation
02155 //===----------------------------------------------------------------------===//
02156 //  StdCall calling convention seems to be standard for many Windows' API
02157 //  routines and around. It differs from C calling convention just a little:
02158 //  callee should clean up the stack, not caller. Symbols should be also
02159 //  decorated in some fancy way :) It doesn't support any vector arguments.
02160 //  For info on fast calling convention see Fast Calling Convention (tail call)
02161 //  implementation LowerX86_32FastCCCallTo.
02162 
02163 /// CallIsStructReturn - Determines whether a call uses struct return
02164 /// semantics.
02165 enum StructReturnType {
02166   NotStructReturn,
02167   RegStructReturn,
02168   StackStructReturn
02169 };
02170 static StructReturnType
02171 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
02172   if (Outs.empty())
02173     return NotStructReturn;
02174 
02175   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02176   if (!Flags.isSRet())
02177     return NotStructReturn;
02178   if (Flags.isInReg())
02179     return RegStructReturn;
02180   return StackStructReturn;
02181 }
02182 
02183 /// Determines whether a function uses struct return semantics.
02184 static StructReturnType
02185 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
02186   if (Ins.empty())
02187     return NotStructReturn;
02188 
02189   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02190   if (!Flags.isSRet())
02191     return NotStructReturn;
02192   if (Flags.isInReg())
02193     return RegStructReturn;
02194   return StackStructReturn;
02195 }
02196 
02197 /// Make a copy of an aggregate at address specified by "Src" to address
02198 /// "Dst" with size and alignment information specified by the specific
02199 /// parameter attribute. The copy will be passed as a byval function parameter.
02200 static SDValue
02201 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02202                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02203                           SDLoc dl) {
02204   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
02205 
02206   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02207                        /*isVolatile*/false, /*AlwaysInline=*/true,
02208                        /*isTailCall*/false,
02209                        MachinePointerInfo(), MachinePointerInfo());
02210 }
02211 
02212 /// Return true if the calling convention is one that
02213 /// supports tail call optimization.
02214 static bool IsTailCallConvention(CallingConv::ID CC) {
02215   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02216           CC == CallingConv::HiPE);
02217 }
02218 
02219 /// \brief Return true if the calling convention is a C calling convention.
02220 static bool IsCCallConvention(CallingConv::ID CC) {
02221   return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
02222           CC == CallingConv::X86_64_SysV);
02223 }
02224 
02225 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02226   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02227     return false;
02228 
02229   CallSite CS(CI);
02230   CallingConv::ID CalleeCC = CS.getCallingConv();
02231   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
02232     return false;
02233 
02234   return true;
02235 }
02236 
02237 /// Return true if the function is being made into
02238 /// a tailcall target by changing its ABI.
02239 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
02240                                    bool GuaranteedTailCallOpt) {
02241   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
02242 }
02243 
02244 SDValue
02245 X86TargetLowering::LowerMemArgument(SDValue Chain,
02246                                     CallingConv::ID CallConv,
02247                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02248                                     SDLoc dl, SelectionDAG &DAG,
02249                                     const CCValAssign &VA,
02250                                     MachineFrameInfo *MFI,
02251                                     unsigned i) const {
02252   // Create the nodes corresponding to a load from this parameter slot.
02253   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02254   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
02255       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02256   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02257   EVT ValVT;
02258 
02259   // If value is passed by pointer we have address passed instead of the value
02260   // itself.
02261   bool ExtendedInMem = VA.isExtInLoc() &&
02262     VA.getValVT().getScalarType() == MVT::i1;
02263 
02264   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
02265     ValVT = VA.getLocVT();
02266   else
02267     ValVT = VA.getValVT();
02268 
02269   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02270   // changed with more analysis.
02271   // In case of tail call optimization mark all arguments mutable. Since they
02272   // could be overwritten by lowering of arguments in case of a tail call.
02273   if (Flags.isByVal()) {
02274     unsigned Bytes = Flags.getByValSize();
02275     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02276     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02277     return DAG.getFrameIndex(FI, getPointerTy());
02278   } else {
02279     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02280                                     VA.getLocMemOffset(), isImmutable);
02281     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02282     SDValue Val =  DAG.getLoad(ValVT, dl, Chain, FIN,
02283                                MachinePointerInfo::getFixedStack(FI),
02284                                false, false, false, 0);
02285     return ExtendedInMem ?
02286       DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
02287   }
02288 }
02289 
02290 // FIXME: Get this from tablegen.
02291 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
02292                                                 const X86Subtarget *Subtarget) {
02293   assert(Subtarget->is64Bit());
02294 
02295   if (Subtarget->isCallingConvWin64(CallConv)) {
02296     static const MCPhysReg GPR64ArgRegsWin64[] = {
02297       X86::RCX, X86::RDX, X86::R8,  X86::R9
02298     };
02299     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
02300   }
02301 
02302   static const MCPhysReg GPR64ArgRegs64Bit[] = {
02303     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02304   };
02305   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
02306 }
02307 
02308 // FIXME: Get this from tablegen.
02309 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
02310                                                 CallingConv::ID CallConv,
02311                                                 const X86Subtarget *Subtarget) {
02312   assert(Subtarget->is64Bit());
02313   if (Subtarget->isCallingConvWin64(CallConv)) {
02314     // The XMM registers which might contain var arg parameters are shadowed
02315     // in their paired GPR.  So we only need to save the GPR to their home
02316     // slots.
02317     // TODO: __vectorcall will change this.
02318     return None;
02319   }
02320 
02321   const Function *Fn = MF.getFunction();
02322   bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
02323   bool isSoftFloat = Subtarget->useSoftFloat();
02324   assert(!(isSoftFloat && NoImplicitFloatOps) &&
02325          "SSE register cannot be used when SSE is disabled!");
02326   if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
02327     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
02328     // registers.
02329     return None;
02330 
02331   static const MCPhysReg XMMArgRegs64Bit[] = {
02332     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02333     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02334   };
02335   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
02336 }
02337 
02338 SDValue
02339 X86TargetLowering::LowerFormalArguments(SDValue Chain,
02340                                         CallingConv::ID CallConv,
02341                                         bool isVarArg,
02342                                       const SmallVectorImpl<ISD::InputArg> &Ins,
02343                                         SDLoc dl,
02344                                         SelectionDAG &DAG,
02345                                         SmallVectorImpl<SDValue> &InVals)
02346                                           const {
02347   MachineFunction &MF = DAG.getMachineFunction();
02348   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02349   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
02350 
02351   const Function* Fn = MF.getFunction();
02352   if (Fn->hasExternalLinkage() &&
02353       Subtarget->isTargetCygMing() &&
02354       Fn->getName() == "main")
02355     FuncInfo->setForceFramePointer(true);
02356 
02357   MachineFrameInfo *MFI = MF.getFrameInfo();
02358   bool Is64Bit = Subtarget->is64Bit();
02359   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02360 
02361   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02362          "Var args not supported with calling convention fastcc, ghc or hipe");
02363 
02364   // Assign locations to all of the incoming arguments.
02365   SmallVector<CCValAssign, 16> ArgLocs;
02366   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02367 
02368   // Allocate shadow area for Win64
02369   if (IsWin64)
02370     CCInfo.AllocateStack(32, 8);
02371 
02372   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02373 
02374   unsigned LastVal = ~0U;
02375   SDValue ArgValue;
02376   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02377     CCValAssign &VA = ArgLocs[i];
02378     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02379     // places.
02380     assert(VA.getValNo() != LastVal &&
02381            "Don't support value assigned to multiple locs yet");
02382     (void)LastVal;
02383     LastVal = VA.getValNo();
02384 
02385     if (VA.isRegLoc()) {
02386       EVT RegVT = VA.getLocVT();
02387       const TargetRegisterClass *RC;
02388       if (RegVT == MVT::i32)
02389         RC = &X86::GR32RegClass;
02390       else if (Is64Bit && RegVT == MVT::i64)
02391         RC = &X86::GR64RegClass;
02392       else if (RegVT == MVT::f32)
02393         RC = &X86::FR32RegClass;
02394       else if (RegVT == MVT::f64)
02395         RC = &X86::FR64RegClass;
02396       else if (RegVT.is512BitVector())
02397         RC = &X86::VR512RegClass;
02398       else if (RegVT.is256BitVector())
02399         RC = &X86::VR256RegClass;
02400       else if (RegVT.is128BitVector())
02401         RC = &X86::VR128RegClass;
02402       else if (RegVT == MVT::x86mmx)
02403         RC = &X86::VR64RegClass;
02404       else if (RegVT == MVT::i1)
02405         RC = &X86::VK1RegClass;
02406       else if (RegVT == MVT::v8i1)
02407         RC = &X86::VK8RegClass;
02408       else if (RegVT == MVT::v16i1)
02409         RC = &X86::VK16RegClass;
02410       else if (RegVT == MVT::v32i1)
02411         RC = &X86::VK32RegClass;
02412       else if (RegVT == MVT::v64i1)
02413         RC = &X86::VK64RegClass;
02414       else
02415         llvm_unreachable("Unknown argument type!");
02416 
02417       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02418       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02419 
02420       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02421       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02422       // right size.
02423       if (VA.getLocInfo() == CCValAssign::SExt)
02424         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02425                                DAG.getValueType(VA.getValVT()));
02426       else if (VA.getLocInfo() == CCValAssign::ZExt)
02427         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02428                                DAG.getValueType(VA.getValVT()));
02429       else if (VA.getLocInfo() == CCValAssign::BCvt)
02430         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
02431 
02432       if (VA.isExtInLoc()) {
02433         // Handle MMX values passed in XMM regs.
02434         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
02435           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02436         else
02437           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02438       }
02439     } else {
02440       assert(VA.isMemLoc());
02441       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02442     }
02443 
02444     // If value is passed via pointer - do a load.
02445     if (VA.getLocInfo() == CCValAssign::Indirect)
02446       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02447                              MachinePointerInfo(), false, false, false, 0);
02448 
02449     InVals.push_back(ArgValue);
02450   }
02451 
02452   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02453     // All x86 ABIs require that for returning structs by value we copy the
02454     // sret argument into %rax/%eax (depending on ABI) for the return. Save
02455     // the argument into a virtual register so that we can access it from the
02456     // return points.
02457     if (Ins[i].Flags.isSRet()) {
02458       unsigned Reg = FuncInfo->getSRetReturnReg();
02459       if (!Reg) {
02460         MVT PtrTy = getPointerTy();
02461         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02462         FuncInfo->setSRetReturnReg(Reg);
02463       }
02464       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02465       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02466       break;
02467     }
02468   }
02469 
02470   unsigned StackSize = CCInfo.getNextStackOffset();
02471   // Align stack specially for tail calls.
02472   if (FuncIsMadeTailCallSafe(CallConv,
02473                              MF.getTarget().Options.GuaranteedTailCallOpt))
02474     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02475 
02476   // If the function takes variable number of arguments, make a frame index for
02477   // the start of the first vararg value... for expansion of llvm.va_start. We
02478   // can skip this if there are no va_start calls.
02479   if (MFI->hasVAStart() &&
02480       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02481                    CallConv != CallingConv::X86_ThisCall))) {
02482     FuncInfo->setVarArgsFrameIndex(
02483         MFI->CreateFixedObject(1, StackSize, true));
02484   }
02485 
02486   MachineModuleInfo &MMI = MF.getMMI();
02487   const Function *WinEHParent = nullptr;
02488   if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
02489     WinEHParent = MMI.getWinEHParent(Fn);
02490   bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
02491   bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
02492 
02493   // Figure out if XMM registers are in use.
02494   assert(!(Subtarget->useSoftFloat() &&
02495            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
02496          "SSE register cannot be used when SSE is disabled!");
02497 
02498   // 64-bit calling conventions support varargs and register parameters, so we
02499   // have to do extra work to spill them in the prologue.
02500   if (Is64Bit && isVarArg && MFI->hasVAStart()) {
02501     // Find the first unallocated argument registers.
02502     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
02503     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
02504     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
02505     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
02506     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02507            "SSE register cannot be used when SSE is disabled!");
02508 
02509     // Gather all the live in physical registers.
02510     SmallVector<SDValue, 6> LiveGPRs;
02511     SmallVector<SDValue, 8> LiveXMMRegs;
02512     SDValue ALVal;
02513     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
02514       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
02515       LiveGPRs.push_back(
02516           DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
02517     }
02518     if (!ArgXMMs.empty()) {
02519       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02520       ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
02521       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
02522         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
02523         LiveXMMRegs.push_back(
02524             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
02525       }
02526     }
02527 
02528     if (IsWin64) {
02529       // Get to the caller-allocated home save location.  Add 8 to account
02530       // for the return address.
02531       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02532       FuncInfo->setRegSaveFrameIndex(
02533           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02534       // Fixup to set vararg frame on shadow area (4 x i64).
02535       if (NumIntRegs < 4)
02536         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02537     } else {
02538       // For X86-64, if there are vararg parameters that are passed via
02539       // registers, then we must store them to their spots on the stack so
02540       // they may be loaded by deferencing the result of va_next.
02541       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02542       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
02543       FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
02544           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
02545     }
02546 
02547     // Store the integer parameter registers.
02548     SmallVector<SDValue, 8> MemOps;
02549     SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02550                                       getPointerTy());
02551     unsigned Offset = FuncInfo->getVarArgsGPOffset();
02552     for (SDValue Val : LiveGPRs) {
02553       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
02554                                 DAG.getIntPtrConstant(Offset, dl));
02555       SDValue Store =
02556         DAG.getStore(Val.getValue(1), dl, Val, FIN,
02557                      MachinePointerInfo::getFixedStack(
02558                        FuncInfo->getRegSaveFrameIndex(), Offset),
02559                      false, false, 0);
02560       MemOps.push_back(Store);
02561       Offset += 8;
02562     }
02563 
02564     if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
02565       // Now store the XMM (fp + vector) parameter registers.
02566       SmallVector<SDValue, 12> SaveXMMOps;
02567       SaveXMMOps.push_back(Chain);
02568       SaveXMMOps.push_back(ALVal);
02569       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02570                              FuncInfo->getRegSaveFrameIndex(), dl));
02571       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02572                              FuncInfo->getVarArgsFPOffset(), dl));
02573       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
02574                         LiveXMMRegs.end());
02575       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02576                                    MVT::Other, SaveXMMOps));
02577     }
02578 
02579     if (!MemOps.empty())
02580       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02581   } else if (IsWinEHOutlined) {
02582     // Get to the caller-allocated home save location.  Add 8 to account
02583     // for the return address.
02584     int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02585     FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
02586         /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
02587 
02588     MMI.getWinEHFuncInfo(Fn)
02589         .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
02590         FuncInfo->getRegSaveFrameIndex();
02591 
02592     // Store the second integer parameter (rdx) into rsp+16 relative to the
02593     // stack pointer at the entry of the function.
02594     SDValue RSFIN =
02595         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
02596     unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
02597     SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
02598     Chain = DAG.getStore(
02599         Val.getValue(1), dl, Val, RSFIN,
02600         MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
02601         /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
02602   }
02603 
02604   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
02605     // Find the largest legal vector type.
02606     MVT VecVT = MVT::Other;
02607     // FIXME: Only some x86_32 calling conventions support AVX512.
02608     if (Subtarget->hasAVX512() &&
02609         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
02610                      CallConv == CallingConv::Intel_OCL_BI)))
02611       VecVT = MVT::v16f32;
02612     else if (Subtarget->hasAVX())
02613       VecVT = MVT::v8f32;
02614     else if (Subtarget->hasSSE2())
02615       VecVT = MVT::v4f32;
02616 
02617     // We forward some GPRs and some vector types.
02618     SmallVector<MVT, 2> RegParmTypes;
02619     MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
02620     RegParmTypes.push_back(IntVT);
02621     if (VecVT != MVT::Other)
02622       RegParmTypes.push_back(VecVT);
02623 
02624     // Compute the set of forwarded registers. The rest are scratch.
02625     SmallVectorImpl<ForwardedRegister> &Forwards =
02626         FuncInfo->getForwardedMustTailRegParms();
02627     CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
02628 
02629     // Conservatively forward AL on x86_64, since it might be used for varargs.
02630     if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
02631       unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02632       Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
02633     }
02634 
02635     // Copy all forwards from physical to virtual registers.
02636     for (ForwardedRegister &F : Forwards) {
02637       // FIXME: Can we use a less constrained schedule?
02638       SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02639       F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
02640       Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
02641     }
02642   }
02643 
02644   // Some CCs need callee pop.
02645   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02646                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02647     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02648   } else {
02649     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02650     // If this is an sret function, the return should pop the hidden pointer.
02651     if (!Is64Bit && !IsTailCallConvention(CallConv) &&
02652         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02653         argsAreStructReturn(Ins) == StackStructReturn)
02654       FuncInfo->setBytesToPopOnReturn(4);
02655   }
02656 
02657   if (!Is64Bit) {
02658     // RegSaveFrameIndex is X86-64 only.
02659     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02660     if (CallConv == CallingConv::X86_FastCall ||
02661         CallConv == CallingConv::X86_ThisCall)
02662       // fastcc functions can't have varargs.
02663       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02664   }
02665 
02666   FuncInfo->setArgumentStackSize(StackSize);
02667 
02668   if (IsWinEHParent) {
02669     int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
02670     SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
02671     MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
02672     SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
02673     Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
02674                          MachinePointerInfo::getFixedStack(UnwindHelpFI),
02675                          /*isVolatile=*/true,
02676                          /*isNonTemporal=*/false, /*Alignment=*/0);
02677   }
02678 
02679   return Chain;
02680 }
02681 
02682 SDValue
02683 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
02684                                     SDValue StackPtr, SDValue Arg,
02685                                     SDLoc dl, SelectionDAG &DAG,
02686                                     const CCValAssign &VA,
02687                                     ISD::ArgFlagsTy Flags) const {
02688   unsigned LocMemOffset = VA.getLocMemOffset();
02689   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
02690   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
02691   if (Flags.isByVal())
02692     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
02693 
02694   return DAG.getStore(Chain, dl, Arg, PtrOff,
02695                       MachinePointerInfo::getStack(LocMemOffset),
02696                       false, false, 0);
02697 }
02698 
02699 /// Emit a load of return address if tail call
02700 /// optimization is performed and it is required.
02701 SDValue
02702 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
02703                                            SDValue &OutRetAddr, SDValue Chain,
02704                                            bool IsTailCall, bool Is64Bit,
02705                                            int FPDiff, SDLoc dl) const {
02706   // Adjust the Return address stack slot.
02707   EVT VT = getPointerTy();
02708   OutRetAddr = getReturnAddressFrameIndex(DAG);
02709 
02710   // Load the "old" Return address.
02711   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
02712                            false, false, false, 0);
02713   return SDValue(OutRetAddr.getNode(), 1);
02714 }
02715 
02716 /// Emit a store of the return address if tail call
02717 /// optimization is performed and it is required (FPDiff!=0).
02718 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
02719                                         SDValue Chain, SDValue RetAddrFrIdx,
02720                                         EVT PtrVT, unsigned SlotSize,
02721                                         int FPDiff, SDLoc dl) {
02722   // Store the return address to the appropriate stack slot.
02723   if (!FPDiff) return Chain;
02724   // Calculate the new stack slot for the return address.
02725   int NewReturnAddrFI =
02726     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
02727                                          false);
02728   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
02729   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
02730                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
02731                        false, false, 0);
02732   return Chain;
02733 }
02734 
02735 SDValue
02736 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
02737                              SmallVectorImpl<SDValue> &InVals) const {
02738   SelectionDAG &DAG                     = CLI.DAG;
02739   SDLoc &dl                             = CLI.DL;
02740   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
02741   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
02742   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
02743   SDValue Chain                         = CLI.Chain;
02744   SDValue Callee                        = CLI.Callee;
02745   CallingConv::ID CallConv              = CLI.CallConv;
02746   bool &isTailCall                      = CLI.IsTailCall;
02747   bool isVarArg                         = CLI.IsVarArg;
02748 
02749   MachineFunction &MF = DAG.getMachineFunction();
02750   bool Is64Bit        = Subtarget->is64Bit();
02751   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
02752   StructReturnType SR = callIsStructReturn(Outs);
02753   bool IsSibcall      = false;
02754   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
02755 
02756   if (MF.getTarget().Options.DisableTailCalls)
02757     isTailCall = false;
02758 
02759   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
02760   if (IsMustTail) {
02761     // Force this to be a tail call.  The verifier rules are enough to ensure
02762     // that we can lower this successfully without moving the return address
02763     // around.
02764     isTailCall = true;
02765   } else if (isTailCall) {
02766     // Check if it's really possible to do a tail call.
02767     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
02768                     isVarArg, SR != NotStructReturn,
02769                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
02770                     Outs, OutVals, Ins, DAG);
02771 
02772     // Sibcalls are automatically detected tailcalls which do not require
02773     // ABI changes.
02774     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
02775       IsSibcall = true;
02776 
02777     if (isTailCall)
02778       ++NumTailCalls;
02779   }
02780 
02781   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
02782          "Var args not supported with calling convention fastcc, ghc or hipe");
02783 
02784   // Analyze operands of the call, assigning locations to each operand.
02785   SmallVector<CCValAssign, 16> ArgLocs;
02786   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02787 
02788   // Allocate shadow area for Win64
02789   if (IsWin64)
02790     CCInfo.AllocateStack(32, 8);
02791 
02792   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
02793 
02794   // Get a count of how many bytes are to be pushed on the stack.
02795   unsigned NumBytes = CCInfo.getNextStackOffset();
02796   if (IsSibcall)
02797     // This is a sibcall. The memory operands are available in caller's
02798     // own caller's stack.
02799     NumBytes = 0;
02800   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
02801            IsTailCallConvention(CallConv))
02802     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
02803 
02804   int FPDiff = 0;
02805   if (isTailCall && !IsSibcall && !IsMustTail) {
02806     // Lower arguments at fp - stackoffset + fpdiff.
02807     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
02808 
02809     FPDiff = NumBytesCallerPushed - NumBytes;
02810 
02811     // Set the delta of movement of the returnaddr stackslot.
02812     // But only set if delta is greater than previous delta.
02813     if (FPDiff < X86Info->getTCReturnAddrDelta())
02814       X86Info->setTCReturnAddrDelta(FPDiff);
02815   }
02816 
02817   unsigned NumBytesToPush = NumBytes;
02818   unsigned NumBytesToPop = NumBytes;
02819 
02820   // If we have an inalloca argument, all stack space has already been allocated
02821   // for us and be right at the top of the stack.  We don't support multiple
02822   // arguments passed in memory when using inalloca.
02823   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
02824     NumBytesToPush = 0;
02825     if (!ArgLocs.back().isMemLoc())
02826       report_fatal_error("cannot use inalloca attribute on a register "
02827                          "parameter");
02828     if (ArgLocs.back().getLocMemOffset() != 0)
02829       report_fatal_error("any parameter with the inalloca attribute must be "
02830                          "the only memory argument");
02831   }
02832 
02833   if (!IsSibcall)
02834     Chain = DAG.getCALLSEQ_START(
02835         Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
02836 
02837   SDValue RetAddrFrIdx;
02838   // Load return address for tail calls.
02839   if (isTailCall && FPDiff)
02840     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
02841                                     Is64Bit, FPDiff, dl);
02842 
02843   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
02844   SmallVector<SDValue, 8> MemOpChains;
02845   SDValue StackPtr;
02846 
02847   // Walk the register/memloc assignments, inserting copies/loads.  In the case
02848   // of tail call optimization arguments are handle later.
02849   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
02850   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02851     // Skip inalloca arguments, they have already been written.
02852     ISD::ArgFlagsTy Flags = Outs[i].Flags;
02853     if (Flags.isInAlloca())
02854       continue;
02855 
02856     CCValAssign &VA = ArgLocs[i];
02857     EVT RegVT = VA.getLocVT();
02858     SDValue Arg = OutVals[i];
02859     bool isByVal = Flags.isByVal();
02860 
02861     // Promote the value if needed.
02862     switch (VA.getLocInfo()) {
02863     default: llvm_unreachable("Unknown loc info!");
02864     case CCValAssign::Full: break;
02865     case CCValAssign::SExt:
02866       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02867       break;
02868     case CCValAssign::ZExt:
02869       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
02870       break;
02871     case CCValAssign::AExt:
02872       if (Arg.getValueType().isVector() &&
02873           Arg.getValueType().getScalarType() == MVT::i1)
02874         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
02875       else if (RegVT.is128BitVector()) {
02876         // Special case: passing MMX values in XMM registers.
02877         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
02878         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
02879         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
02880       } else
02881         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
02882       break;
02883     case CCValAssign::BCvt:
02884       Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
02885       break;
02886     case CCValAssign::Indirect: {
02887       // Store the argument.
02888       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
02889       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
02890       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
02891                            MachinePointerInfo::getFixedStack(FI),
02892                            false, false, 0);
02893       Arg = SpillSlot;
02894       break;
02895     }
02896     }
02897 
02898     if (VA.isRegLoc()) {
02899       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
02900       if (isVarArg && IsWin64) {
02901         // Win64 ABI requires argument XMM reg to be copied to the corresponding
02902         // shadow reg if callee is a varargs function.
02903         unsigned ShadowReg = 0;
02904         switch (VA.getLocReg()) {
02905         case X86::XMM0: ShadowReg = X86::RCX; break;
02906         case X86::XMM1: ShadowReg = X86::RDX; break;
02907         case X86::XMM2: ShadowReg = X86::R8; break;
02908         case X86::XMM3: ShadowReg = X86::R9; break;
02909         }
02910         if (ShadowReg)
02911           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
02912       }
02913     } else if (!IsSibcall && (!isTailCall || isByVal)) {
02914       assert(VA.isMemLoc());
02915       if (!StackPtr.getNode())
02916         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
02917                                       getPointerTy());
02918       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
02919                                              dl, DAG, VA, Flags));
02920     }
02921   }
02922 
02923   if (!MemOpChains.empty())
02924     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
02925 
02926   if (Subtarget->isPICStyleGOT()) {
02927     // ELF / PIC requires GOT in the EBX register before function calls via PLT
02928     // GOT pointer.
02929     if (!isTailCall) {
02930       RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
02931                DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
02932     } else {
02933       // If we are tail calling and generating PIC/GOT style code load the
02934       // address of the callee into ECX. The value in ecx is used as target of
02935       // the tail jump. This is done to circumvent the ebx/callee-saved problem
02936       // for tail calls on PIC/GOT architectures. Normally we would just put the
02937       // address of GOT into ebx and then call target@PLT. But for tail calls
02938       // ebx would be restored (since ebx is callee saved) before jumping to the
02939       // target@PLT.
02940 
02941       // Note: The actual moving to ECX is done further down.
02942       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
02943       if (G && !G->getGlobal()->hasHiddenVisibility() &&
02944           !G->getGlobal()->hasProtectedVisibility())
02945         Callee = LowerGlobalAddress(Callee, DAG);
02946       else if (isa<ExternalSymbolSDNode>(Callee))
02947         Callee = LowerExternalSymbol(Callee, DAG);
02948     }
02949   }
02950 
02951   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
02952     // From AMD64 ABI document:
02953     // For calls that may call functions that use varargs or stdargs
02954     // (prototype-less calls or calls to functions containing ellipsis (...) in
02955     // the declaration) %al is used as hidden argument to specify the number
02956     // of SSE registers used. The contents of %al do not need to match exactly
02957     // the number of registers, but must be an ubound on the number of SSE
02958     // registers used and is in the range 0 - 8 inclusive.
02959 
02960     // Count the number of XMM registers allocated.
02961     static const MCPhysReg XMMArgRegs[] = {
02962       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02963       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02964     };
02965     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
02966     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02967            && "SSE registers cannot be used when SSE is disabled");
02968 
02969     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
02970                                         DAG.getConstant(NumXMMRegs, dl,
02971                                                         MVT::i8)));
02972   }
02973 
02974   if (isVarArg && IsMustTail) {
02975     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
02976     for (const auto &F : Forwards) {
02977       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02978       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
02979     }
02980   }
02981 
02982   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
02983   // don't need this because the eligibility check rejects calls that require
02984   // shuffling arguments passed in memory.
02985   if (!IsSibcall && isTailCall) {
02986     // Force all the incoming stack arguments to be loaded from the stack
02987     // before any new outgoing arguments are stored to the stack, because the
02988     // outgoing stack slots may alias the incoming argument stack slots, and
02989     // the alias isn't otherwise explicit. This is slightly more conservative
02990     // than necessary, because it means that each store effectively depends
02991     // on every argument instead of just those arguments it would clobber.
02992     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
02993 
02994     SmallVector<SDValue, 8> MemOpChains2;
02995     SDValue FIN;
02996     int FI = 0;
02997     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02998       CCValAssign &VA = ArgLocs[i];
02999       if (VA.isRegLoc())
03000         continue;
03001       assert(VA.isMemLoc());
03002       SDValue Arg = OutVals[i];
03003       ISD::ArgFlagsTy Flags = Outs[i].Flags;
03004       // Skip inalloca arguments.  They don't require any work.
03005       if (Flags.isInAlloca())
03006         continue;
03007       // Create frame index.
03008       int32_t Offset = VA.getLocMemOffset()+FPDiff;
03009       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
03010       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03011       FIN = DAG.getFrameIndex(FI, getPointerTy());
03012 
03013       if (Flags.isByVal()) {
03014         // Copy relative to framepointer.
03015         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
03016         if (!StackPtr.getNode())
03017           StackPtr = DAG.getCopyFromReg(Chain, dl,
03018                                         RegInfo->getStackRegister(),
03019                                         getPointerTy());
03020         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
03021 
03022         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
03023                                                          ArgChain,
03024                                                          Flags, DAG, dl));
03025       } else {
03026         // Store relative to framepointer.
03027         MemOpChains2.push_back(
03028           DAG.getStore(ArgChain, dl, Arg, FIN,
03029                        MachinePointerInfo::getFixedStack(FI),
03030                        false, false, 0));
03031       }
03032     }
03033 
03034     if (!MemOpChains2.empty())
03035       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03036 
03037     // Store the return address to the appropriate stack slot.
03038     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
03039                                      getPointerTy(), RegInfo->getSlotSize(),
03040                                      FPDiff, dl);
03041   }
03042 
03043   // Build a sequence of copy-to-reg nodes chained together with token chain
03044   // and flag operands which copy the outgoing args into registers.
03045   SDValue InFlag;
03046   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
03047     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
03048                              RegsToPass[i].second, InFlag);
03049     InFlag = Chain.getValue(1);
03050   }
03051 
03052   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
03053     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
03054     // In the 64-bit large code model, we have to make all calls
03055     // through a register, since the call instruction's 32-bit
03056     // pc-relative offset may not be large enough to hold the whole
03057     // address.
03058   } else if (Callee->getOpcode() == ISD::GlobalAddress) {
03059     // If the callee is a GlobalAddress node (quite common, every direct call
03060     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
03061     // it.
03062     GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
03063 
03064     // We should use extra load for direct calls to dllimported functions in
03065     // non-JIT mode.
03066     const GlobalValue *GV = G->getGlobal();
03067     if (!GV->hasDLLImportStorageClass()) {
03068       unsigned char OpFlags = 0;
03069       bool ExtraLoad = false;
03070       unsigned WrapperKind = ISD::DELETED_NODE;
03071 
03072       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
03073       // external symbols most go through the PLT in PIC mode.  If the symbol
03074       // has hidden or protected visibility, or if it is static or local, then
03075       // we don't need to use the PLT - we can directly call it.
03076       if (Subtarget->isTargetELF() &&
03077           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
03078           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
03079         OpFlags = X86II::MO_PLT;
03080       } else if (Subtarget->isPICStyleStubAny() &&
03081                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
03082                  (!Subtarget->getTargetTriple().isMacOSX() ||
03083                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03084         // PC-relative references to external symbols should go through $stub,
03085         // unless we're building with the leopard linker or later, which
03086         // automatically synthesizes these stubs.
03087         OpFlags = X86II::MO_DARWIN_STUB;
03088       } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
03089                  cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
03090         // If the function is marked as non-lazy, generate an indirect call
03091         // which loads from the GOT directly. This avoids runtime overhead
03092         // at the cost of eager binding (and one extra byte of encoding).
03093         OpFlags = X86II::MO_GOTPCREL;
03094         WrapperKind = X86ISD::WrapperRIP;
03095         ExtraLoad = true;
03096       }
03097 
03098       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
03099                                           G->getOffset(), OpFlags);
03100 
03101       // Add a wrapper if needed.
03102       if (WrapperKind != ISD::DELETED_NODE)
03103         Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
03104       // Add extra indirection if needed.
03105       if (ExtraLoad)
03106         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
03107                              MachinePointerInfo::getGOT(),
03108                              false, false, false, 0);
03109     }
03110   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03111     unsigned char OpFlags = 0;
03112 
03113     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
03114     // external symbols should go through the PLT.
03115     if (Subtarget->isTargetELF() &&
03116         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
03117       OpFlags = X86II::MO_PLT;
03118     } else if (Subtarget->isPICStyleStubAny() &&
03119                (!Subtarget->getTargetTriple().isMacOSX() ||
03120                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03121       // PC-relative references to external symbols should go through $stub,
03122       // unless we're building with the leopard linker or later, which
03123       // automatically synthesizes these stubs.
03124       OpFlags = X86II::MO_DARWIN_STUB;
03125     }
03126 
03127     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
03128                                          OpFlags);
03129   } else if (Subtarget->isTarget64BitILP32() &&
03130              Callee->getValueType(0) == MVT::i32) {
03131     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
03132     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
03133   }
03134 
03135   // Returns a chain & a flag for retval copy to use.
03136   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
03137   SmallVector<SDValue, 8> Ops;
03138 
03139   if (!IsSibcall && isTailCall) {
03140     Chain = DAG.getCALLSEQ_END(Chain,
03141                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03142                                DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
03143     InFlag = Chain.getValue(1);
03144   }
03145 
03146   Ops.push_back(Chain);
03147   Ops.push_back(Callee);
03148 
03149   if (isTailCall)
03150     Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
03151 
03152   // Add argument registers to the end of the list so that they are known live
03153   // into the call.
03154   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03155     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03156                                   RegsToPass[i].second.getValueType()));
03157 
03158   // Add a register mask operand representing the call-preserved registers.
03159   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
03160   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
03161   assert(Mask && "Missing call preserved mask for calling convention");
03162   Ops.push_back(DAG.getRegisterMask(Mask));
03163 
03164   if (InFlag.getNode())
03165     Ops.push_back(InFlag);
03166 
03167   if (isTailCall) {
03168     // We used to do:
03169     //// If this is the first return lowered for this function, add the regs
03170     //// to the liveout set for the function.
03171     // This isn't right, although it's probably harmless on x86; liveouts
03172     // should be computed from returns not tail calls.  Consider a void
03173     // function making a tail call to a function returning int.
03174     MF.getFrameInfo()->setHasTailCall();
03175     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03176   }
03177 
03178   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03179   InFlag = Chain.getValue(1);
03180 
03181   // Create the CALLSEQ_END node.
03182   unsigned NumBytesForCalleeToPop;
03183   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03184                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03185     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03186   else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
03187            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03188            SR == StackStructReturn)
03189     // If this is a call to a struct-return function, the callee
03190     // pops the hidden struct pointer, so we have to push it back.
03191     // This is common for Darwin/X86, Linux & Mingw32 targets.
03192     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03193     NumBytesForCalleeToPop = 4;
03194   else
03195     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03196 
03197   // Returns a flag for retval copy to use.
03198   if (!IsSibcall) {
03199     Chain = DAG.getCALLSEQ_END(Chain,
03200                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03201                                DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
03202                                                      true),
03203                                InFlag, dl);
03204     InFlag = Chain.getValue(1);
03205   }
03206 
03207   // Handle result values, copying them out of physregs into vregs that we
03208   // return.
03209   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03210                          Ins, dl, DAG, InVals);
03211 }
03212 
03213 //===----------------------------------------------------------------------===//
03214 //                Fast Calling Convention (tail call) implementation
03215 //===----------------------------------------------------------------------===//
03216 
03217 //  Like std call, callee cleans arguments, convention except that ECX is
03218 //  reserved for storing the tail called function address. Only 2 registers are
03219 //  free for argument passing (inreg). Tail call optimization is performed
03220 //  provided:
03221 //                * tailcallopt is enabled
03222 //                * caller/callee are fastcc
03223 //  On X86_64 architecture with GOT-style position independent code only local
03224 //  (within module) calls are supported at the moment.
03225 //  To keep the stack aligned according to platform abi the function
03226 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03227 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03228 //  If a tail called function callee has more arguments than the caller the
03229 //  caller needs to make sure that there is room to move the RETADDR to. This is
03230 //  achieved by reserving an area the size of the argument delta right after the
03231 //  original RETADDR, but before the saved framepointer or the spilled registers
03232 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03233 //  stack layout:
03234 //    arg1
03235 //    arg2
03236 //    RETADDR
03237 //    [ new RETADDR
03238 //      move area ]
03239 //    (possible EBP)
03240 //    ESI
03241 //    EDI
03242 //    local1 ..
03243 
03244 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
03245 /// for a 16 byte align requirement.
03246 unsigned
03247 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03248                                                SelectionDAG& DAG) const {
03249   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03250   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
03251   unsigned StackAlignment = TFI.getStackAlignment();
03252   uint64_t AlignMask = StackAlignment - 1;
03253   int64_t Offset = StackSize;
03254   unsigned SlotSize = RegInfo->getSlotSize();
03255   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03256     // Number smaller than 12 so just add the difference.
03257     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03258   } else {
03259     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03260     Offset = ((~AlignMask) & Offset) + StackAlignment +
03261       (StackAlignment-SlotSize);
03262   }
03263   return Offset;
03264 }
03265 
03266 /// MatchingStackOffset - Return true if the given stack call argument is
03267 /// already available in the same position (relatively) of the caller's
03268 /// incoming argument stack.
03269 static
03270 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03271                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03272                          const X86InstrInfo *TII) {
03273   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03274   int FI = INT_MAX;
03275   if (Arg.getOpcode() == ISD::CopyFromReg) {
03276     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03277     if (!TargetRegisterInfo::isVirtualRegister(VR))
03278       return false;
03279     MachineInstr *Def = MRI->getVRegDef(VR);
03280     if (!Def)
03281       return false;
03282     if (!Flags.isByVal()) {
03283       if (!TII->isLoadFromStackSlot(Def, FI))
03284         return false;
03285     } else {
03286       unsigned Opcode = Def->getOpcode();
03287       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
03288            Opcode == X86::LEA64_32r) &&
03289           Def->getOperand(1).isFI()) {
03290         FI = Def->getOperand(1).getIndex();
03291         Bytes = Flags.getByValSize();
03292       } else
03293         return false;
03294     }
03295   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03296     if (Flags.isByVal())
03297       // ByVal argument is passed in as a pointer but it's now being
03298       // dereferenced. e.g.
03299       // define @foo(%struct.X* %A) {
03300       //   tail call @bar(%struct.X* byval %A)
03301       // }
03302       return false;
03303     SDValue Ptr = Ld->getBasePtr();
03304     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03305     if (!FINode)
03306       return false;
03307     FI = FINode->getIndex();
03308   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03309     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03310     FI = FINode->getIndex();
03311     Bytes = Flags.getByValSize();
03312   } else
03313     return false;
03314 
03315   assert(FI != INT_MAX);
03316   if (!MFI->isFixedObjectIndex(FI))
03317     return false;
03318   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03319 }
03320 
03321 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03322 /// for tail call optimization. Targets which want to do tail call
03323 /// optimization should implement this function.
03324 bool
03325 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03326                                                      CallingConv::ID CalleeCC,
03327                                                      bool isVarArg,
03328                                                      bool isCalleeStructRet,
03329                                                      bool isCallerStructRet,
03330                                                      Type *RetTy,
03331                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
03332                                     const SmallVectorImpl<SDValue> &OutVals,
03333                                     const SmallVectorImpl<ISD::InputArg> &Ins,
03334                                                      SelectionDAG &DAG) const {
03335   if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
03336     return false;
03337 
03338   // If -tailcallopt is specified, make fastcc functions tail-callable.
03339   const MachineFunction &MF = DAG.getMachineFunction();
03340   const Function *CallerF = MF.getFunction();
03341 
03342   // If the function return type is x86_fp80 and the callee return type is not,
03343   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03344   // perform a tailcall optimization here.
03345   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03346     return false;
03347 
03348   CallingConv::ID CallerCC = CallerF->getCallingConv();
03349   bool CCMatch = CallerCC == CalleeCC;
03350   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03351   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03352 
03353   // Win64 functions have extra shadow space for argument homing. Don't do the
03354   // sibcall if the caller and callee have mismatched expectations for this
03355   // space.
03356   if (IsCalleeWin64 != IsCallerWin64)
03357     return false;
03358 
03359   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03360     if (IsTailCallConvention(CalleeCC) && CCMatch)
03361       return true;
03362     return false;
03363   }
03364 
03365   // Look for obvious safe cases to perform tail call optimization that do not
03366   // require ABI changes. This is what gcc calls sibcall.
03367 
03368   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03369   // emit a special epilogue.
03370   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03371   if (RegInfo->needsStackRealignment(MF))
03372     return false;
03373 
03374   // Also avoid sibcall optimization if either caller or callee uses struct
03375   // return semantics.
03376   if (isCalleeStructRet || isCallerStructRet)
03377     return false;
03378 
03379   // An stdcall/thiscall caller is expected to clean up its arguments; the
03380   // callee isn't going to do that.
03381   // FIXME: this is more restrictive than needed. We could produce a tailcall
03382   // when the stack adjustment matches. For example, with a thiscall that takes
03383   // only one argument.
03384   if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
03385                    CallerCC == CallingConv::X86_ThisCall))
03386     return false;
03387 
03388   // Do not sibcall optimize vararg calls unless all arguments are passed via
03389   // registers.
03390   if (isVarArg && !Outs.empty()) {
03391 
03392     // Optimizing for varargs on Win64 is unlikely to be safe without
03393     // additional testing.
03394     if (IsCalleeWin64 || IsCallerWin64)
03395       return false;
03396 
03397     SmallVector<CCValAssign, 16> ArgLocs;
03398     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03399                    *DAG.getContext());
03400 
03401     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03402     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03403       if (!ArgLocs[i].isRegLoc())
03404         return false;
03405   }
03406 
03407   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03408   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03409   // this into a sibcall.
03410   bool Unused = false;
03411   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03412     if (!Ins[i].Used) {
03413       Unused = true;
03414       break;
03415     }
03416   }
03417   if (Unused) {
03418     SmallVector<CCValAssign, 16> RVLocs;
03419     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03420                    *DAG.getContext());
03421     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03422     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03423       CCValAssign &VA = RVLocs[i];
03424       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03425         return false;
03426     }
03427   }
03428 
03429   // If the calling conventions do not match, then we'd better make sure the
03430   // results are returned in the same way as what the caller expects.
03431   if (!CCMatch) {
03432     SmallVector<CCValAssign, 16> RVLocs1;
03433     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03434                     *DAG.getContext());
03435     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03436 
03437     SmallVector<CCValAssign, 16> RVLocs2;
03438     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03439                     *DAG.getContext());
03440     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03441 
03442     if (RVLocs1.size() != RVLocs2.size())
03443       return false;
03444     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03445       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03446         return false;
03447       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03448         return false;
03449       if (RVLocs1[i].isRegLoc()) {
03450         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03451           return false;
03452       } else {
03453         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03454           return false;
03455       }
03456     }
03457   }
03458 
03459   // If the callee takes no arguments then go on to check the results of the
03460   // call.
03461   if (!Outs.empty()) {
03462     // Check if stack adjustment is needed. For now, do not do this if any
03463     // argument is passed on the stack.
03464     SmallVector<CCValAssign, 16> ArgLocs;
03465     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03466                    *DAG.getContext());
03467 
03468     // Allocate shadow area for Win64
03469     if (IsCalleeWin64)
03470       CCInfo.AllocateStack(32, 8);
03471 
03472     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03473     if (CCInfo.getNextStackOffset()) {
03474       MachineFunction &MF = DAG.getMachineFunction();
03475       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
03476         return false;
03477 
03478       // Check if the arguments are already laid out in the right way as
03479       // the caller's fixed stack objects.
03480       MachineFrameInfo *MFI = MF.getFrameInfo();
03481       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03482       const X86InstrInfo *TII = Subtarget->getInstrInfo();
03483       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03484         CCValAssign &VA = ArgLocs[i];
03485         SDValue Arg = OutVals[i];
03486         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03487         if (VA.getLocInfo() == CCValAssign::Indirect)
03488           return false;
03489         if (!VA.isRegLoc()) {
03490           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03491                                    MFI, MRI, TII))
03492             return false;
03493         }
03494       }
03495     }
03496 
03497     // If the tailcall address may be in a register, then make sure it's
03498     // possible to register allocate for it. In 32-bit, the call address can
03499     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03500     // callee-saved registers are restored. These happen to be the same
03501     // registers used to pass 'inreg' arguments so watch out for those.
03502     if (!Subtarget->is64Bit() &&
03503         ((!isa<GlobalAddressSDNode>(Callee) &&
03504           !isa<ExternalSymbolSDNode>(Callee)) ||
03505          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03506       unsigned NumInRegs = 0;
03507       // In PIC we need an extra register to formulate the address computation
03508       // for the callee.
03509       unsigned MaxInRegs =
03510         (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03511 
03512       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03513         CCValAssign &VA = ArgLocs[i];
03514         if (!VA.isRegLoc())
03515           continue;
03516         unsigned Reg = VA.getLocReg();
03517         switch (Reg) {
03518         default: break;
03519         case X86::EAX: case X86::EDX: case X86::ECX:
03520           if (++NumInRegs == MaxInRegs)
03521             return false;
03522           break;
03523         }
03524       }
03525     }
03526   }
03527 
03528   return true;
03529 }
03530 
03531 FastISel *
03532 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03533                                   const TargetLibraryInfo *libInfo) const {
03534   return X86::createFastISel(funcInfo, libInfo);
03535 }
03536 
03537 //===----------------------------------------------------------------------===//
03538 //                           Other Lowering Hooks
03539 //===----------------------------------------------------------------------===//
03540 
03541 static bool MayFoldLoad(SDValue Op) {
03542   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03543 }
03544 
03545 static bool MayFoldIntoStore(SDValue Op) {
03546   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03547 }
03548 
03549 static bool isTargetShuffle(unsigned Opcode) {
03550   switch(Opcode) {
03551   default: return false;
03552   case X86ISD::BLENDI:
03553   case X86ISD::PSHUFB:
03554   case X86ISD::PSHUFD:
03555   case X86ISD::PSHUFHW:
03556   case X86ISD::PSHUFLW:
03557   case X86ISD::SHUFP:
03558   case X86ISD::PALIGNR:
03559   case X86ISD::MOVLHPS:
03560   case X86ISD::MOVLHPD:
03561   case X86ISD::MOVHLPS:
03562   case X86ISD::MOVLPS:
03563   case X86ISD::MOVLPD:
03564   case X86ISD::MOVSHDUP:
03565   case X86ISD::MOVSLDUP:
03566   case X86ISD::MOVDDUP:
03567   case X86ISD::MOVSS:
03568   case X86ISD::MOVSD:
03569   case X86ISD::UNPCKL:
03570   case X86ISD::UNPCKH:
03571   case X86ISD::VPERMILPI:
03572   case X86ISD::VPERM2X128:
03573   case X86ISD::VPERMI:
03574     return true;
03575   }
03576 }
03577 
03578 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03579                                     SDValue V1, unsigned TargetMask,
03580                                     SelectionDAG &DAG) {
03581   switch(Opc) {
03582   default: llvm_unreachable("Unknown x86 shuffle node");
03583   case X86ISD::PSHUFD:
03584   case X86ISD::PSHUFHW:
03585   case X86ISD::PSHUFLW:
03586   case X86ISD::VPERMILPI:
03587   case X86ISD::VPERMI:
03588     return DAG.getNode(Opc, dl, VT, V1,
03589                        DAG.getConstant(TargetMask, dl, MVT::i8));
03590   }
03591 }
03592 
03593 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
03594                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03595   switch(Opc) {
03596   default: llvm_unreachable("Unknown x86 shuffle node");
03597   case X86ISD::MOVLHPS:
03598   case X86ISD::MOVLHPD:
03599   case X86ISD::MOVHLPS:
03600   case X86ISD::MOVLPS:
03601   case X86ISD::MOVLPD:
03602   case X86ISD::MOVSS:
03603   case X86ISD::MOVSD:
03604   case X86ISD::UNPCKL:
03605   case X86ISD::UNPCKH:
03606     return DAG.getNode(Opc, dl, VT, V1, V2);
03607   }
03608 }
03609 
03610 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03611   MachineFunction &MF = DAG.getMachineFunction();
03612   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03613   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03614   int ReturnAddrIndex = FuncInfo->getRAIndex();
03615 
03616   if (ReturnAddrIndex == 0) {
03617     // Set up a frame object for the return address.
03618     unsigned SlotSize = RegInfo->getSlotSize();
03619     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03620                                                            -(int64_t)SlotSize,
03621                                                            false);
03622     FuncInfo->setRAIndex(ReturnAddrIndex);
03623   }
03624 
03625   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
03626 }
03627 
03628 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03629                                        bool hasSymbolicDisplacement) {
03630   // Offset should fit into 32 bit immediate field.
03631   if (!isInt<32>(Offset))
03632     return false;
03633 
03634   // If we don't have a symbolic displacement - we don't have any extra
03635   // restrictions.
03636   if (!hasSymbolicDisplacement)
03637     return true;
03638 
03639   // FIXME: Some tweaks might be needed for medium code model.
03640   if (M != CodeModel::Small && M != CodeModel::Kernel)
03641     return false;
03642 
03643   // For small code model we assume that latest object is 16MB before end of 31
03644   // bits boundary. We may also accept pretty large negative constants knowing
03645   // that all objects are in the positive half of address space.
03646   if (M == CodeModel::Small && Offset < 16*1024*1024)
03647     return true;
03648 
03649   // For kernel code model we know that all object resist in the negative half
03650   // of 32bits address space. We may not accept negative offsets, since they may
03651   // be just off and we may accept pretty large positive ones.
03652   if (M == CodeModel::Kernel && Offset >= 0)
03653     return true;
03654 
03655   return false;
03656 }
03657 
03658 /// isCalleePop - Determines whether the callee is required to pop its
03659 /// own arguments. Callee pop is necessary to support tail calls.
03660 bool X86::isCalleePop(CallingConv::ID CallingConv,
03661                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
03662   switch (CallingConv) {
03663   default:
03664     return false;
03665   case CallingConv::X86_StdCall:
03666   case CallingConv::X86_FastCall:
03667   case CallingConv::X86_ThisCall:
03668     return !is64Bit;
03669   case CallingConv::Fast:
03670   case CallingConv::GHC:
03671   case CallingConv::HiPE:
03672     if (IsVarArg)
03673       return false;
03674     return TailCallOpt;
03675   }
03676 }
03677 
03678 /// \brief Return true if the condition is an unsigned comparison operation.
03679 static bool isX86CCUnsigned(unsigned X86CC) {
03680   switch (X86CC) {
03681   default: llvm_unreachable("Invalid integer condition!");
03682   case X86::COND_E:     return true;
03683   case X86::COND_G:     return false;
03684   case X86::COND_GE:    return false;
03685   case X86::COND_L:     return false;
03686   case X86::COND_LE:    return false;
03687   case X86::COND_NE:    return true;
03688   case X86::COND_B:     return true;
03689   case X86::COND_A:     return true;
03690   case X86::COND_BE:    return true;
03691   case X86::COND_AE:    return true;
03692   }
03693   llvm_unreachable("covered switch fell through?!");
03694 }
03695 
03696 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
03697 /// specific condition code, returning the condition code and the LHS/RHS of the
03698 /// comparison to make.
03699 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
03700                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
03701   if (!isFP) {
03702     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
03703       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
03704         // X > -1   -> X == 0, jump !sign.
03705         RHS = DAG.getConstant(0, DL, RHS.getValueType());
03706         return X86::COND_NS;
03707       }
03708       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
03709         // X < 0   -> X == 0, jump on sign.
03710         return X86::COND_S;
03711       }
03712       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
03713         // X < 1   -> X <= 0
03714         RHS = DAG.getConstant(0, DL, RHS.getValueType());
03715         return X86::COND_LE;
03716       }
03717     }
03718 
03719     switch (SetCCOpcode) {
03720     default: llvm_unreachable("Invalid integer condition!");
03721     case ISD::SETEQ:  return X86::COND_E;
03722     case ISD::SETGT:  return X86::COND_G;
03723     case ISD::SETGE:  return X86::COND_GE;
03724     case ISD::SETLT:  return X86::COND_L;
03725     case ISD::SETLE:  return X86::COND_LE;
03726     case ISD::SETNE:  return X86::COND_NE;
03727     case ISD::SETULT: return X86::COND_B;
03728     case ISD::SETUGT: return X86::COND_A;
03729     case ISD::SETULE: return X86::COND_BE;
03730     case ISD::SETUGE: return X86::COND_AE;
03731     }
03732   }
03733 
03734   // First determine if it is required or is profitable to flip the operands.
03735 
03736   // If LHS is a foldable load, but RHS is not, flip the condition.
03737   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
03738       !ISD::isNON_EXTLoad(RHS.getNode())) {
03739     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
03740     std::swap(LHS, RHS);
03741   }
03742 
03743   switch (SetCCOpcode) {
03744   default: break;
03745   case ISD::SETOLT:
03746   case ISD::SETOLE:
03747   case ISD::SETUGT:
03748   case ISD::SETUGE:
03749     std::swap(LHS, RHS);
03750     break;
03751   }
03752 
03753   // On a floating point condition, the flags are set as follows:
03754   // ZF  PF  CF   op
03755   //  0 | 0 | 0 | X > Y
03756   //  0 | 0 | 1 | X < Y
03757   //  1 | 0 | 0 | X == Y
03758   //  1 | 1 | 1 | unordered
03759   switch (SetCCOpcode) {
03760   default: llvm_unreachable("Condcode should be pre-legalized away");
03761   case ISD::SETUEQ:
03762   case ISD::SETEQ:   return X86::COND_E;
03763   case ISD::SETOLT:              // flipped
03764   case ISD::SETOGT:
03765   case ISD::SETGT:   return X86::COND_A;
03766   case ISD::SETOLE:              // flipped
03767   case ISD::SETOGE:
03768   case ISD::SETGE:   return X86::COND_AE;
03769   case ISD::SETUGT:              // flipped
03770   case ISD::SETULT:
03771   case ISD::SETLT:   return X86::COND_B;
03772   case ISD::SETUGE:              // flipped
03773   case ISD::SETULE:
03774   case ISD::SETLE:   return X86::COND_BE;
03775   case ISD::SETONE:
03776   case ISD::SETNE:   return X86::COND_NE;
03777   case ISD::SETUO:   return X86::COND_P;
03778   case ISD::SETO:    return X86::COND_NP;
03779   case ISD::SETOEQ:
03780   case ISD::SETUNE:  return X86::COND_INVALID;
03781   }
03782 }
03783 
03784 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
03785 /// code. Current x86 isa includes the following FP cmov instructions:
03786 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
03787 static bool hasFPCMov(unsigned X86CC) {
03788   switch (X86CC) {
03789   default:
03790     return false;
03791   case X86::COND_B:
03792   case X86::COND_BE:
03793   case X86::COND_E:
03794   case X86::COND_P:
03795   case X86::COND_A:
03796   case X86::COND_AE:
03797   case X86::COND_NE:
03798   case X86::COND_NP:
03799     return true;
03800   }
03801 }
03802 
03803 /// isFPImmLegal - Returns true if the target can instruction select the
03804 /// specified FP immediate natively. If false, the legalizer will
03805 /// materialize the FP immediate as a load from a constant pool.
03806 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
03807   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
03808     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
03809       return true;
03810   }
03811   return false;
03812 }
03813 
03814 bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
03815                                               ISD::LoadExtType ExtTy,
03816                                               EVT NewVT) const {
03817   // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
03818   // relocation target a movq or addq instruction: don't let the load shrink.
03819   SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
03820   if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
03821     if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
03822       return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
03823   return true;
03824 }
03825 
03826 /// \brief Returns true if it is beneficial to convert a load of a constant
03827 /// to just the constant itself.
03828 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
03829                                                           Type *Ty) const {
03830   assert(Ty->isIntegerTy());
03831 
03832   unsigned BitSize = Ty->getPrimitiveSizeInBits();
03833   if (BitSize == 0 || BitSize > 64)
03834     return false;
03835   return true;
03836 }
03837 
03838 bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
03839                                                 unsigned Index) const {
03840   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
03841     return false;
03842 
03843   return (Index == 0 || Index == ResVT.getVectorNumElements());
03844 }
03845 
03846 bool X86TargetLowering::isCheapToSpeculateCttz() const {
03847   // Speculate cttz only if we can directly use TZCNT.
03848   return Subtarget->hasBMI();
03849 }
03850 
03851 bool X86TargetLowering::isCheapToSpeculateCtlz() const {
03852   // Speculate ctlz only if we can directly use LZCNT.
03853   return Subtarget->hasLZCNT();
03854 }
03855 
03856 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
03857 /// the specified range (L, H].
03858 static bool isUndefOrInRange(int Val, int Low, int Hi) {
03859   return (Val < 0) || (Val >= Low && Val < Hi);
03860 }
03861 
03862 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
03863 /// specified value.
03864 static bool isUndefOrEqual(int Val, int CmpVal) {
03865   return (Val < 0 || Val == CmpVal);
03866 }
03867 
03868 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
03869 /// from position Pos and ending in Pos+Size, falls within the specified
03870 /// sequential range (Low, Low+Size]. or is undef.
03871 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
03872                                        unsigned Pos, unsigned Size, int Low) {
03873   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
03874     if (!isUndefOrEqual(Mask[i], Low))
03875       return false;
03876   return true;
03877 }
03878 
03879 /// isVEXTRACTIndex - Return true if the specified
03880 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
03881 /// suitable for instruction that extract 128 or 256 bit vectors
03882 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
03883   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
03884   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
03885     return false;
03886 
03887   // The index should be aligned on a vecWidth-bit boundary.
03888   uint64_t Index =
03889     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
03890 
03891   MVT VT = N->getSimpleValueType(0);
03892   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
03893   bool Result = (Index * ElSize) % vecWidth == 0;
03894 
03895   return Result;
03896 }
03897 
03898 /// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
03899 /// operand specifies a subvector insert that is suitable for input to
03900 /// insertion of 128 or 256-bit subvectors
03901 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
03902   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
03903   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
03904     return false;
03905   // The index should be aligned on a vecWidth-bit boundary.
03906   uint64_t Index =
03907     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
03908 
03909   MVT VT = N->getSimpleValueType(0);
03910   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
03911   bool Result = (Index * ElSize) % vecWidth == 0;
03912 
03913   return Result;
03914 }
03915 
03916 bool X86::isVINSERT128Index(SDNode *N) {
03917   return isVINSERTIndex(N, 128);
03918 }
03919 
03920 bool X86::isVINSERT256Index(SDNode *N) {
03921   return isVINSERTIndex(N, 256);
03922 }
03923 
03924 bool X86::isVEXTRACT128Index(SDNode *N) {
03925   return isVEXTRACTIndex(N, 128);
03926 }
03927 
03928 bool X86::isVEXTRACT256Index(SDNode *N) {
03929   return isVEXTRACTIndex(N, 256);
03930 }
03931 
03932 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
03933   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
03934   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
03935     llvm_unreachable("Illegal extract subvector for VEXTRACT");
03936 
03937   uint64_t Index =
03938     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
03939 
03940   MVT VecVT = N->getOperand(0).getSimpleValueType();
03941   MVT ElVT = VecVT.getVectorElementType();
03942 
03943   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
03944   return Index / NumElemsPerChunk;
03945 }
03946 
03947 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
03948   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
03949   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
03950     llvm_unreachable("Illegal insert subvector for VINSERT");
03951 
03952   uint64_t Index =
03953     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
03954 
03955   MVT VecVT = N->getSimpleValueType(0);
03956   MVT ElVT = VecVT.getVectorElementType();
03957 
03958   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
03959   return Index / NumElemsPerChunk;
03960 }
03961 
03962 /// getExtractVEXTRACT128Immediate - Return the appropriate immediate
03963 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
03964 /// and VINSERTI128 instructions.
03965 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
03966   return getExtractVEXTRACTImmediate(N, 128);
03967 }
03968 
03969 /// getExtractVEXTRACT256Immediate - Return the appropriate immediate
03970 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
03971 /// and VINSERTI64x4 instructions.
03972 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
03973   return getExtractVEXTRACTImmediate(N, 256);
03974 }
03975 
03976 /// getInsertVINSERT128Immediate - Return the appropriate immediate
03977 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
03978 /// and VINSERTI128 instructions.
03979 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
03980   return getInsertVINSERTImmediate(N, 128);
03981 }
03982 
03983 /// getInsertVINSERT256Immediate - Return the appropriate immediate
03984 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
03985 /// and VINSERTI64x4 instructions.
03986 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
03987   return getInsertVINSERTImmediate(N, 256);
03988 }
03989 
03990 /// isZero - Returns true if Elt is a constant integer zero
03991 static bool isZero(SDValue V) {
03992   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
03993   return C && C->isNullValue();
03994 }
03995 
03996 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
03997 /// constant +0.0.
03998 bool X86::isZeroNode(SDValue Elt) {
03999   if (isZero(Elt))
04000     return true;
04001   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
04002     return CFP->getValueAPF().isPosZero();
04003   return false;
04004 }
04005 
04006 /// getZeroVector - Returns a vector of specified type with all zero elements.
04007 ///
04008 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
04009                              SelectionDAG &DAG, SDLoc dl) {
04010   assert(VT.isVector() && "Expected a vector type");
04011 
04012   // Always build SSE zero vectors as <4 x i32> bitcasted
04013   // to their dest type. This ensures they get CSE'd.
04014   SDValue Vec;
04015   if (VT.is128BitVector()) {  // SSE
04016     if (Subtarget->hasSSE2()) {  // SSE2
04017       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04018       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04019     } else { // SSE1
04020       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04021       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
04022     }
04023   } else if (VT.is256BitVector()) { // AVX
04024     if (Subtarget->hasInt256()) { // AVX2
04025       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04026       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04027       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04028     } else {
04029       // 256-bit logic and arithmetic instructions in AVX are all
04030       // floating-point, no support for integer ops. Emit fp zeroed vectors.
04031       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04032       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04033       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
04034     }
04035   } else if (VT.is512BitVector()) { // AVX-512
04036       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04037       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
04038                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04039       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
04040   } else if (VT.getScalarType() == MVT::i1) {
04041 
04042     assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
04043             && "Unexpected vector type");
04044     assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
04045             && "Unexpected vector type");
04046     SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
04047     SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
04048     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
04049   } else
04050     llvm_unreachable("Unexpected vector type");
04051 
04052   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
04053 }
04054 
04055 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
04056                                 SelectionDAG &DAG, SDLoc dl,
04057                                 unsigned vectorWidth) {
04058   assert((vectorWidth == 128 || vectorWidth == 256) &&
04059          "Unsupported vector width");
04060   EVT VT = Vec.getValueType();
04061   EVT ElVT = VT.getVectorElementType();
04062   unsigned Factor = VT.getSizeInBits()/vectorWidth;
04063   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
04064                                   VT.getVectorNumElements()/Factor);
04065 
04066   // Extract from UNDEF is UNDEF.
04067   if (Vec.getOpcode() == ISD::UNDEF)
04068     return DAG.getUNDEF(ResultVT);
04069 
04070   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
04071   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
04072 
04073   // This is the index of the first element of the vectorWidth-bit chunk
04074   // we want.
04075   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
04076                                * ElemsPerChunk);
04077 
04078   // If the input is a buildvector just emit a smaller one.
04079   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
04080     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
04081                        makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
04082                                     ElemsPerChunk));
04083 
04084   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
04085   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
04086 }
04087 
04088 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
04089 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
04090 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
04091 /// instructions or a simple subregister reference. Idx is an index in the
04092 /// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
04093 /// lowering EXTRACT_VECTOR_ELT operations easier.
04094 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
04095                                    SelectionDAG &DAG, SDLoc dl) {
04096   assert((Vec.getValueType().is256BitVector() ||
04097           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
04098   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
04099 }
04100 
04101 /// Generate a DAG to grab 256-bits from a 512-bit vector.
04102 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
04103                                    SelectionDAG &DAG, SDLoc dl) {
04104   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
04105   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
04106 }
04107 
04108 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
04109                                unsigned IdxVal, SelectionDAG &DAG,
04110                                SDLoc dl, unsigned vectorWidth) {
04111   assert((vectorWidth == 128 || vectorWidth == 256) &&
04112          "Unsupported vector width");
04113   // Inserting UNDEF is Result
04114   if (Vec.getOpcode() == ISD::UNDEF)
04115     return Result;
04116   EVT VT = Vec.getValueType();
04117   EVT ElVT = VT.getVectorElementType();
04118   EVT ResultVT = Result.getValueType();
04119 
04120   // Insert the relevant vectorWidth bits.
04121   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
04122 
04123   // This is the index of the first element of the vectorWidth-bit chunk
04124   // we want.
04125   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
04126                                * ElemsPerChunk);
04127 
04128   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
04129   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
04130 }
04131 
04132 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
04133 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
04134 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
04135 /// simple superregister reference.  Idx is an index in the 128 bits
04136 /// we want.  It need not be aligned to a 128-bit boundary.  That makes
04137 /// lowering INSERT_VECTOR_ELT operations easier.
04138 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04139                                   SelectionDAG &DAG, SDLoc dl) {
04140   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
04141 
04142   // For insertion into the zero index (low half) of a 256-bit vector, it is
04143   // more efficient to generate a blend with immediate instead of an insert*128.
04144   // We are still creating an INSERT_SUBVECTOR below with an undef node to
04145   // extend the subvector to the size of the result vector. Make sure that
04146   // we are not recursing on that node by checking for undef here.
04147   if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
04148       Result.getOpcode() != ISD::UNDEF) {
04149     EVT ResultVT = Result.getValueType();
04150     SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
04151     SDValue Undef = DAG.getUNDEF(ResultVT);
04152     SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
04153                                  Vec, ZeroIndex);
04154 
04155     // The blend instruction, and therefore its mask, depend on the data type.
04156     MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
04157     if (ScalarType.isFloatingPoint()) {
04158       // Choose either vblendps (float) or vblendpd (double).
04159       unsigned ScalarSize = ScalarType.getSizeInBits();
04160       assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
04161       unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
04162       SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
04163       return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
04164     }
04165 
04166     const X86Subtarget &Subtarget =
04167     static_cast<const X86Subtarget &>(DAG.getSubtarget());
04168 
04169     // AVX2 is needed for 256-bit integer blend support.
04170     // Integers must be cast to 32-bit because there is only vpblendd;
04171     // vpblendw can't be used for this because it has a handicapped mask.
04172 
04173     // If we don't have AVX2, then cast to float. Using a wrong domain blend
04174     // is still more efficient than using the wrong domain vinsertf128 that
04175     // will be created by InsertSubVector().
04176     MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
04177 
04178     SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
04179     Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
04180     Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
04181     return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
04182   }
04183 
04184   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
04185 }
04186 
04187 static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04188                                   SelectionDAG &DAG, SDLoc dl) {
04189   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
04190   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
04191 }
04192 
04193 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
04194 /// instructions. This is used because creating CONCAT_VECTOR nodes of
04195 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
04196 /// large BUILD_VECTORS.
04197 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
04198                                    unsigned NumElems, SelectionDAG &DAG,
04199                                    SDLoc dl) {
04200   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04201   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
04202 }
04203 
04204 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
04205                                    unsigned NumElems, SelectionDAG &DAG,
04206                                    SDLoc dl) {
04207   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04208   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
04209 }
04210 
04211 /// getOnesVector - Returns a vector of specified type with all bits set.
04212 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
04213 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
04214 /// Then bitcast to their original type, ensuring they get CSE'd.
04215 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
04216                              SDLoc dl) {
04217   assert(VT.isVector() && "Expected a vector type");
04218 
04219   SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
04220   SDValue Vec;
04221   if (VT.is256BitVector()) {
04222     if (HasInt256) { // AVX2
04223       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04224       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04225     } else { // AVX
04226       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04227       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
04228     }
04229   } else if (VT.is128BitVector()) {
04230     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04231   } else
04232     llvm_unreachable("Unexpected vector type");
04233 
04234   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
04235 }
04236 
04237 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
04238 /// operation of specified width.
04239 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
04240                        SDValue V2) {
04241   unsigned NumElems = VT.getVectorNumElements();
04242   SmallVector<int, 8> Mask;
04243   Mask.push_back(NumElems);
04244   for (unsigned i = 1; i != NumElems; ++i)
04245     Mask.push_back(i);
04246   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04247 }
04248 
04249 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
04250 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04251                           SDValue V2) {
04252   unsigned NumElems = VT.getVectorNumElements();
04253   SmallVector<int, 8> Mask;
04254   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
04255     Mask.push_back(i);
04256     Mask.push_back(i + NumElems);
04257   }
04258   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04259 }
04260 
04261 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
04262 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04263                           SDValue V2) {
04264   unsigned NumElems = VT.getVectorNumElements();
04265   SmallVector<int, 8> Mask;
04266   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
04267     Mask.push_back(i + Half);
04268     Mask.push_back(i + NumElems + Half);
04269   }
04270   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04271 }
04272 
04273 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
04274 /// vector of zero or undef vector.  This produces a shuffle where the low
04275 /// element of V2 is swizzled into the zero/undef vector, landing at element
04276 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
04277 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
04278                                            bool IsZero,
04279                                            const X86Subtarget *Subtarget,
04280                                            SelectionDAG &DAG) {
04281   MVT VT = V2.getSimpleValueType();
04282   SDValue V1 = IsZero
04283     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
04284   unsigned NumElems = VT.getVectorNumElements();
04285   SmallVector<int, 16> MaskVec;
04286   for (unsigned i = 0; i != NumElems; ++i)
04287     // If this is the insertion idx, put the low elt of V2 here.
04288     MaskVec.push_back(i == Idx ? NumElems : i);
04289   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
04290 }
04291 
04292 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
04293 /// target specific opcode. Returns true if the Mask could be calculated. Sets
04294 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
04295 /// shuffles which use a single input multiple times, and in those cases it will
04296 /// adjust the mask to only have indices within that single input.
04297 static bool getTargetShuffleMask(SDNode *N, MVT VT,
04298                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
04299   unsigned NumElems = VT.getVectorNumElements();
04300   SDValue ImmN;
04301 
04302   IsUnary = false;
04303   bool IsFakeUnary = false;
04304   switch(N->getOpcode()) {
04305   case X86ISD::BLENDI:
04306     ImmN = N->getOperand(N->getNumOperands()-1);
04307     DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04308     break;
04309   case X86ISD::SHUFP:
04310     ImmN = N->getOperand(N->getNumOperands()-1);
04311     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04312     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04313     break;
04314   case X86ISD::UNPCKH:
04315     DecodeUNPCKHMask(VT, Mask);
04316     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04317     break;
04318   case X86ISD::UNPCKL:
04319     DecodeUNPCKLMask(VT, Mask);
04320     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04321     break;
04322   case X86ISD::MOVHLPS:
04323     DecodeMOVHLPSMask(NumElems, Mask);
04324     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04325     break;
04326   case X86ISD::MOVLHPS:
04327     DecodeMOVLHPSMask(NumElems, Mask);
04328     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04329     break;
04330   case X86ISD::PALIGNR:
04331     ImmN = N->getOperand(N->getNumOperands()-1);
04332     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04333     break;
04334   case X86ISD::PSHUFD:
04335   case X86ISD::VPERMILPI:
04336     ImmN = N->getOperand(N->getNumOperands()-1);
04337     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04338     IsUnary = true;
04339     break;
04340   case X86ISD::PSHUFHW:
04341     ImmN = N->getOperand(N->getNumOperands()-1);
04342     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04343     IsUnary = true;
04344     break;
04345   case X86ISD::PSHUFLW:
04346     ImmN = N->getOperand(N->getNumOperands()-1);
04347     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04348     IsUnary = true;
04349     break;
04350   case X86ISD::PSHUFB: {
04351     IsUnary = true;
04352     SDValue MaskNode = N->getOperand(1);
04353     while (MaskNode->getOpcode() == ISD::BITCAST)
04354       MaskNode = MaskNode->getOperand(0);
04355 
04356     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
04357       // If we have a build-vector, then things are easy.
04358       EVT VT = MaskNode.getValueType();
04359       assert(VT.isVector() &&
04360              "Can't produce a non-vector with a build_vector!");
04361       if (!VT.isInteger())
04362         return false;
04363 
04364       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
04365 
04366       SmallVector<uint64_t, 32> RawMask;
04367       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
04368         SDValue Op = MaskNode->getOperand(i);
04369         if (Op->getOpcode() == ISD::UNDEF) {
04370           RawMask.push_back((uint64_t)SM_SentinelUndef);
04371           continue;
04372         }
04373         auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
04374         if (!CN)
04375           return false;
04376         APInt MaskElement = CN->getAPIntValue();
04377 
04378         // We now have to decode the element which could be any integer size and
04379         // extract each byte of it.
04380         for (int j = 0; j < NumBytesPerElement; ++j) {
04381           // Note that this is x86 and so always little endian: the low byte is
04382           // the first byte of the mask.
04383           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
04384           MaskElement = MaskElement.lshr(8);
04385         }
04386       }
04387       DecodePSHUFBMask(RawMask, Mask);
04388       break;
04389     }
04390 
04391     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
04392     if (!MaskLoad)
04393       return false;
04394 
04395     SDValue Ptr = MaskLoad->getBasePtr();
04396     if (Ptr->getOpcode() == X86ISD::Wrapper ||
04397         Ptr->getOpcode() == X86ISD::WrapperRIP)
04398       Ptr = Ptr->getOperand(0);
04399 
04400     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
04401     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
04402       return false;
04403 
04404     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
04405       DecodePSHUFBMask(C, Mask);
04406       if (Mask.empty())
04407         return false;
04408       break;
04409     }
04410 
04411     return false;
04412   }
04413   case X86ISD::VPERMI:
04414     ImmN = N->getOperand(N->getNumOperands()-1);
04415     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04416     IsUnary = true;
04417     break;
04418   case X86ISD::MOVSS:
04419   case X86ISD::MOVSD:
04420     DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
04421     break;
04422   case X86ISD::VPERM2X128:
04423     ImmN = N->getOperand(N->getNumOperands()-1);
04424     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04425     if (Mask.empty()) return false;
04426     break;
04427   case X86ISD::MOVSLDUP:
04428     DecodeMOVSLDUPMask(VT, Mask);
04429     IsUnary = true;
04430     break;
04431   case X86ISD::MOVSHDUP:
04432     DecodeMOVSHDUPMask(VT, Mask);
04433     IsUnary = true;
04434     break;
04435   case X86ISD::MOVDDUP:
04436     DecodeMOVDDUPMask(VT, Mask);
04437     IsUnary = true;
04438     break;
04439   case X86ISD::MOVLHPD:
04440   case X86ISD::MOVLPD:
04441   case X86ISD::MOVLPS:
04442     // Not yet implemented
04443     return false;
04444   default: llvm_unreachable("unknown target shuffle node");
04445   }
04446 
04447   // If we have a fake unary shuffle, the shuffle mask is spread across two
04448   // inputs that are actually the same node. Re-map the mask to always point
04449   // into the first input.
04450   if (IsFakeUnary)
04451     for (int &M : Mask)
04452       if (M >= (int)Mask.size())
04453         M -= Mask.size();
04454 
04455   return true;
04456 }
04457 
04458 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
04459 /// element of the result of the vector shuffle.
04460 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
04461                                    unsigned Depth) {
04462   if (Depth == 6)
04463     return SDValue();  // Limit search depth.
04464 
04465   SDValue V = SDValue(N, 0);
04466   EVT VT = V.getValueType();
04467   unsigned Opcode = V.getOpcode();
04468 
04469   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
04470   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
04471     int Elt = SV->getMaskElt(Index);
04472 
04473     if (Elt < 0)
04474       return DAG.getUNDEF(VT.getVectorElementType());
04475 
04476     unsigned NumElems = VT.getVectorNumElements();
04477     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
04478                                          : SV->getOperand(1);
04479     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
04480   }
04481 
04482   // Recurse into target specific vector shuffles to find scalars.
04483   if (isTargetShuffle(Opcode)) {
04484     MVT ShufVT = V.getSimpleValueType();
04485     unsigned NumElems = ShufVT.getVectorNumElements();
04486     SmallVector<int, 16> ShuffleMask;
04487     bool IsUnary;
04488 
04489     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
04490       return SDValue();
04491 
04492     int Elt = ShuffleMask[Index];
04493     if (Elt < 0)
04494       return DAG.getUNDEF(ShufVT.getVectorElementType());
04495 
04496     SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
04497                                          : N->getOperand(1);
04498     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
04499                                Depth+1);
04500   }
04501 
04502   // Actual nodes that may contain scalar elements
04503   if (Opcode == ISD::BITCAST) {
04504     V = V.getOperand(0);
04505     EVT SrcVT = V.getValueType();
04506     unsigned NumElems = VT.getVectorNumElements();
04507 
04508     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
04509       return SDValue();
04510   }
04511 
04512   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
04513     return (Index == 0) ? V.getOperand(0)
04514                         : DAG.getUNDEF(VT.getVectorElementType());
04515 
04516   if (V.getOpcode() == ISD::BUILD_VECTOR)
04517     return V.getOperand(Index);
04518 
04519   return SDValue();
04520 }
04521 
04522 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
04523 ///
04524 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
04525                                        unsigned NumNonZero, unsigned NumZero,
04526                                        SelectionDAG &DAG,
04527                                        const X86Subtarget* Subtarget,
04528                                        const TargetLowering &TLI) {
04529   if (NumNonZero > 8)
04530     return SDValue();
04531 
04532   SDLoc dl(Op);
04533   SDValue V;
04534   bool First = true;
04535 
04536   // SSE4.1 - use PINSRB to insert each byte directly.
04537   if (Subtarget->hasSSE41()) {
04538     for (unsigned i = 0; i < 16; ++i) {
04539       bool isNonZero = (NonZeros & (1 << i)) != 0;
04540       if (isNonZero) {
04541         if (First) {
04542           if (NumZero)
04543             V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
04544           else
04545             V = DAG.getUNDEF(MVT::v16i8);
04546           First = false;
04547         }
04548         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
04549                         MVT::v16i8, V, Op.getOperand(i),
04550                         DAG.getIntPtrConstant(i, dl));
04551       }
04552     }
04553 
04554     return V;
04555   }
04556 
04557   // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
04558   for (unsigned i = 0; i < 16; ++i) {
04559     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
04560     if (ThisIsNonZero && First) {
04561       if (NumZero)
04562         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
04563       else
04564         V = DAG.getUNDEF(MVT::v8i16);
04565       First = false;
04566     }
04567 
04568     if ((i & 1) != 0) {
04569       SDValue ThisElt, LastElt;
04570       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
04571       if (LastIsNonZero) {
04572         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
04573                               MVT::i16, Op.getOperand(i-1));
04574       }
04575       if (ThisIsNonZero) {
04576         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
04577         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
04578                               ThisElt, DAG.getConstant(8, dl, MVT::i8));
04579         if (LastIsNonZero)
04580           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
04581       } else
04582         ThisElt = LastElt;
04583 
04584       if (ThisElt.getNode())
04585         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
04586                         DAG.getIntPtrConstant(i/2, dl));
04587     }
04588   }
04589 
04590   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
04591 }
04592 
04593 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
04594 ///
04595 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
04596                                      unsigned NumNonZero, unsigned NumZero,
04597                                      SelectionDAG &DAG,
04598                                      const X86Subtarget* Subtarget,
04599                                      const TargetLowering &TLI) {
04600   if (NumNonZero > 4)
04601     return SDValue();
04602 
04603   SDLoc dl(Op);
04604   SDValue V;
04605   bool First = true;
04606   for (unsigned i = 0; i < 8; ++i) {
04607     bool isNonZero = (NonZeros & (1 << i)) != 0;
04608     if (isNonZero) {
04609       if (First) {
04610         if (NumZero)
04611           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
04612         else
04613           V = DAG.getUNDEF(MVT::v8i16);
04614         First = false;
04615       }
04616       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
04617                       MVT::v8i16, V, Op.getOperand(i),
04618                       DAG.getIntPtrConstant(i, dl));
04619     }
04620   }
04621 
04622   return V;
04623 }
04624 
04625 /// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
04626 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
04627                                      const X86Subtarget *Subtarget,
04628                                      const TargetLowering &TLI) {
04629   // Find all zeroable elements.
04630   std::bitset<4> Zeroable;
04631   for (int i=0; i < 4; ++i) {
04632     SDValue Elt = Op->getOperand(i);
04633     Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
04634   }
04635   assert(Zeroable.size() - Zeroable.count() > 1 &&
04636          "We expect at least two non-zero elements!");
04637 
04638   // We only know how to deal with build_vector nodes where elements are either
04639   // zeroable or extract_vector_elt with constant index.
04640   SDValue FirstNonZero;
04641   unsigned FirstNonZeroIdx;
04642   for (unsigned i=0; i < 4; ++i) {
04643     if (Zeroable[i])
04644       continue;
04645     SDValue Elt = Op->getOperand(i);
04646     if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
04647         !isa<ConstantSDNode>(Elt.getOperand(1)))
04648       return SDValue();
04649     // Make sure that this node is extracting from a 128-bit vector.
04650     MVT VT = Elt.getOperand(0).getSimpleValueType();
04651     if (!VT.is128BitVector())
04652       return SDValue();
04653     if (!FirstNonZero.getNode()) {
04654       FirstNonZero = Elt;
04655       FirstNonZeroIdx = i;
04656     }
04657   }
04658 
04659   assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
04660   SDValue V1 = FirstNonZero.getOperand(0);
04661   MVT VT = V1.getSimpleValueType();
04662 
04663   // See if this build_vector can be lowered as a blend with zero.
04664   SDValue Elt;
04665   unsigned EltMaskIdx, EltIdx;
04666   int Mask[4];
04667   for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
04668     if (Zeroable[EltIdx]) {
04669       // The zero vector will be on the right hand side.
04670       Mask[EltIdx] = EltIdx+4;
04671       continue;
04672     }
04673 
04674     Elt = Op->getOperand(EltIdx);
04675     // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
04676     EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
04677     if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
04678       break;
04679     Mask[EltIdx] = EltIdx;
04680   }
04681 
04682   if (EltIdx == 4) {
04683     // Let the shuffle legalizer deal with blend operations.
04684     SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
04685     if (V1.getSimpleValueType() != VT)
04686       V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
04687     return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
04688   }
04689 
04690   // See if we can lower this build_vector to a INSERTPS.
04691   if (!Subtarget->hasSSE41())
04692     return SDValue();
04693 
04694   SDValue V2 = Elt.getOperand(0);
04695   if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
04696     V1 = SDValue();
04697 
04698   bool CanFold = true;
04699   for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
04700     if (Zeroable[i])
04701       continue;
04702 
04703     SDValue Current = Op->getOperand(i);
04704     SDValue SrcVector = Current->getOperand(0);
04705     if (!V1.getNode())
04706       V1 = SrcVector;
04707     CanFold = SrcVector == V1 &&
04708       cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
04709   }
04710 
04711   if (!CanFold)
04712     return SDValue();
04713 
04714   assert(V1.getNode() && "Expected at least two non-zero elements!");
04715   if (V1.getSimpleValueType() != MVT::v4f32)
04716     V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
04717   if (V2.getSimpleValueType() != MVT::v4f32)
04718     V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
04719 
04720   // Ok, we can emit an INSERTPS instruction.
04721   unsigned ZMask = Zeroable.to_ulong();
04722 
04723   unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
04724   assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
04725   SDLoc DL(Op);
04726   SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
04727                                DAG.getIntPtrConstant(InsertPSMask, DL));
04728   return DAG.getNode(ISD::BITCAST, DL, VT, Result);
04729 }
04730 
04731 /// Return a vector logical shift node.
04732 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
04733                          unsigned NumBits, SelectionDAG &DAG,
04734                          const TargetLowering &TLI, SDLoc dl) {
04735   assert(VT.is128BitVector() && "Unknown type for VShift");
04736   MVT ShVT = MVT::v2i64;
04737   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
04738   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
04739   MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
04740   assert(NumBits % 8 == 0 && "Only support byte sized shifts");
04741   SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
04742   return DAG.getNode(ISD::BITCAST, dl, VT,
04743                      DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
04744 }
04745 
04746 static SDValue
04747 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
04748 
04749   // Check if the scalar load can be widened into a vector load. And if
04750   // the address is "base + cst" see if the cst can be "absorbed" into
04751   // the shuffle mask.
04752   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
04753     SDValue Ptr = LD->getBasePtr();
04754     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
04755       return SDValue();
04756     EVT PVT = LD->getValueType(0);
04757     if (PVT != MVT::i32 && PVT != MVT::f32)
04758       return SDValue();
04759 
04760     int FI = -1;
04761     int64_t Offset = 0;
04762     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
04763       FI = FINode->getIndex();
04764       Offset = 0;
04765     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
04766                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
04767       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
04768       Offset = Ptr.getConstantOperandVal(1);
04769       Ptr = Ptr.getOperand(0);
04770     } else {
04771       return SDValue();
04772     }
04773 
04774     // FIXME: 256-bit vector instructions don't require a strict alignment,
04775     // improve this code to support it better.
04776     unsigned RequiredAlign = VT.getSizeInBits()/8;
04777     SDValue Chain = LD->getChain();
04778     // Make sure the stack object alignment is at least 16 or 32.
04779     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
04780     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
04781       if (MFI->isFixedObjectIndex(FI)) {
04782         // Can't change the alignment. FIXME: It's possible to compute
04783         // the exact stack offset and reference FI + adjust offset instead.
04784         // If someone *really* cares about this. That's the way to implement it.
04785         return SDValue();
04786       } else {
04787         MFI->setObjectAlignment(FI, RequiredAlign);
04788       }
04789     }
04790 
04791     // (Offset % 16 or 32) must be multiple of 4. Then address is then
04792     // Ptr + (Offset & ~15).
04793     if (Offset < 0)
04794       return SDValue();
04795     if ((Offset % RequiredAlign) & 3)
04796       return SDValue();
04797     int64_t StartOffset = Offset & ~(RequiredAlign-1);
04798     if (StartOffset) {
04799       SDLoc DL(Ptr);
04800       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
04801                         DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
04802     }
04803 
04804     int EltNo = (Offset - StartOffset) >> 2;
04805     unsigned NumElems = VT.getVectorNumElements();
04806 
04807     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
04808     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
04809                              LD->getPointerInfo().getWithOffset(StartOffset),
04810                              false, false, false, 0);
04811 
04812     SmallVector<int, 8> Mask(NumElems, EltNo);
04813 
04814     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
04815   }
04816 
04817   return SDValue();
04818 }
04819 
04820 /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
04821 /// elements can be replaced by a single large load which has the same value as
04822 /// a build_vector or insert_subvector whose loaded operands are 'Elts'.
04823 ///
04824 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
04825 ///
04826 /// FIXME: we'd also like to handle the case where the last elements are zero
04827 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
04828 /// There's even a handy isZeroNode for that purpose.
04829 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
04830                                         SDLoc &DL, SelectionDAG &DAG,
04831                                         bool isAfterLegalize) {
04832   unsigned NumElems = Elts.size();
04833 
04834   LoadSDNode *LDBase = nullptr;
04835   unsigned LastLoadedElt = -1U;
04836 
04837   // For each element in the initializer, see if we've found a load or an undef.
04838   // If we don't find an initial load element, or later load elements are
04839   // non-consecutive, bail out.
04840   for (unsigned i = 0; i < NumElems; ++i) {
04841     SDValue Elt = Elts[i];
04842     // Look through a bitcast.
04843     if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST)
04844       Elt = Elt.getOperand(0);
04845     if (!Elt.getNode() ||
04846         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
04847       return SDValue();
04848     if (!LDBase) {
04849       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
04850         return SDValue();
04851       LDBase = cast<LoadSDNode>(Elt.getNode());
04852       LastLoadedElt = i;
04853       continue;
04854     }
04855     if (Elt.getOpcode() == ISD::UNDEF)
04856       continue;
04857 
04858     LoadSDNode *LD = cast<LoadSDNode>(Elt);
04859     EVT LdVT = Elt.getValueType();
04860     // Each loaded element must be the correct fractional portion of the
04861     // requested vector load.
04862     if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems)
04863       return SDValue();
04864     if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i))
04865       return SDValue();
04866     LastLoadedElt = i;
04867   }
04868 
04869   // If we have found an entire vector of loads and undefs, then return a large
04870   // load of the entire vector width starting at the base pointer.  If we found
04871   // consecutive loads for the low half, generate a vzext_load node.
04872   if (LastLoadedElt == NumElems - 1) {
04873     assert(LDBase && "Did not find base load for merging consecutive loads");
04874     EVT EltVT = LDBase->getValueType(0);
04875     // Ensure that the input vector size for the merged loads matches the
04876     // cumulative size of the input elements.
04877     if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
04878       return SDValue();
04879 
04880     if (isAfterLegalize &&
04881         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
04882       return SDValue();
04883 
04884     SDValue NewLd = SDValue();
04885 
04886     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
04887                         LDBase->getPointerInfo(), LDBase->isVolatile(),
04888                         LDBase->isNonTemporal(), LDBase->isInvariant(),
04889                         LDBase->getAlignment());
04890 
04891     if (LDBase->hasAnyUseOfValue(1)) {
04892       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
04893                                      SDValue(LDBase, 1),
04894                                      SDValue(NewLd.getNode(), 1));
04895       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
04896       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
04897                              SDValue(NewLd.getNode(), 1));
04898     }
04899 
04900     return NewLd;
04901   }
04902 
04903   //TODO: The code below fires only for for loading the low v2i32 / v2f32
04904   //of a v4i32 / v4f32. It's probably worth generalizing.
04905   EVT EltVT = VT.getVectorElementType();
04906   if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
04907       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
04908     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
04909     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
04910     SDValue ResNode =
04911         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
04912                                 LDBase->getPointerInfo(),
04913                                 LDBase->getAlignment(),
04914                                 false/*isVolatile*/, true/*ReadMem*/,
04915                                 false/*WriteMem*/);
04916 
04917     // Make sure the newly-created LOAD is in the same position as LDBase in
04918     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
04919     // update uses of LDBase's output chain to use the TokenFactor.
04920     if (LDBase->hasAnyUseOfValue(1)) {
04921       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
04922                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
04923       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
04924       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
04925                              SDValue(ResNode.getNode(), 1));
04926     }
04927 
04928     return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
04929   }
04930   return SDValue();
04931 }
04932 
04933 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
04934 /// to generate a splat value for the following cases:
04935 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
04936 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
04937 /// a scalar load, or a constant.
04938 /// The VBROADCAST node is returned when a pattern is found,
04939 /// or SDValue() otherwise.
04940 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
04941                                     SelectionDAG &DAG) {
04942   // VBROADCAST requires AVX.
04943   // TODO: Splats could be generated for non-AVX CPUs using SSE
04944   // instructions, but there's less potential gain for only 128-bit vectors.
04945   if (!Subtarget->hasAVX())
04946     return SDValue();
04947 
04948   MVT VT = Op.getSimpleValueType();
04949   SDLoc dl(Op);
04950 
04951   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
04952          "Unsupported vector type for broadcast.");
04953 
04954   SDValue Ld;
04955   bool ConstSplatVal;
04956 
04957   switch (Op.getOpcode()) {
04958     default:
04959       // Unknown pattern found.
04960       return SDValue();
04961 
04962     case ISD::BUILD_VECTOR: {
04963       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
04964       BitVector UndefElements;
04965       SDValue Splat = BVOp->getSplatValue(&UndefElements);
04966 
04967       // We need a splat of a single value to use broadcast, and it doesn't
04968       // make any sense if the value is only in one element of the vector.
04969       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
04970         return SDValue();
04971 
04972       Ld = Splat;
04973       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
04974                        Ld.getOpcode() == ISD::ConstantFP);
04975 
04976       // Make sure that all of the users of a non-constant load are from the
04977       // BUILD_VECTOR node.
04978       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
04979         return SDValue();
04980       break;
04981     }
04982 
04983     case ISD::VECTOR_SHUFFLE: {
04984       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
04985 
04986       // Shuffles must have a splat mask where the first element is
04987       // broadcasted.
04988       if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
04989         return SDValue();
04990 
04991       SDValue Sc = Op.getOperand(0);
04992       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
04993           Sc.getOpcode() != ISD::BUILD_VECTOR) {
04994 
04995         if (!Subtarget->hasInt256())
04996           return SDValue();
04997 
04998         // Use the register form of the broadcast instruction available on AVX2.
04999         if (VT.getSizeInBits() >= 256)
05000           Sc = Extract128BitVector(Sc, 0, DAG, dl);
05001         return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
05002       }
05003 
05004       Ld = Sc.getOperand(0);
05005       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05006                        Ld.getOpcode() == ISD::ConstantFP);
05007 
05008       // The scalar_to_vector node and the suspected
05009       // load node must have exactly one user.
05010       // Constants may have multiple users.
05011 
05012       // AVX-512 has register version of the broadcast
05013       bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
05014         Ld.getValueType().getSizeInBits() >= 32;
05015       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
05016           !hasRegVer))
05017         return SDValue();
05018       break;
05019     }
05020   }
05021 
05022   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
05023   bool IsGE256 = (VT.getSizeInBits() >= 256);
05024 
05025   // When optimizing for size, generate up to 5 extra bytes for a broadcast
05026   // instruction to save 8 or more bytes of constant pool data.
05027   // TODO: If multiple splats are generated to load the same constant,
05028   // it may be detrimental to overall size. There needs to be a way to detect
05029   // that condition to know if this is truly a size win.
05030   const Function *F = DAG.getMachineFunction().getFunction();
05031   bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
05032 
05033   // Handle broadcasting a single constant scalar from the constant pool
05034   // into a vector.
05035   // On Sandybridge (no AVX2), it is still better to load a constant vector
05036   // from the constant pool and not to broadcast it from a scalar.
05037   // But override that restriction when optimizing for size.
05038   // TODO: Check if splatting is recommended for other AVX-capable CPUs.
05039   if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
05040     EVT CVT = Ld.getValueType();
05041     assert(!CVT.isVector() && "Must not broadcast a vector type");
05042 
05043     // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
05044     // For size optimization, also splat v2f64 and v2i64, and for size opt
05045     // with AVX2, also splat i8 and i16.
05046     // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
05047     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
05048         (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
05049       const Constant *C = nullptr;
05050       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
05051         C = CI->getConstantIntValue();
05052       else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
05053         C = CF->getConstantFPValue();
05054 
05055       assert(C && "Invalid constant type");
05056 
05057       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
05058       SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
05059       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
05060       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
05061                        MachinePointerInfo::getConstantPool(),
05062                        false, false, false, Alignment);
05063 
05064       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05065     }
05066   }
05067 
05068   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
05069 
05070   // Handle AVX2 in-register broadcasts.
05071   if (!IsLoad && Subtarget->hasInt256() &&
05072       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
05073     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05074 
05075   // The scalar source must be a normal load.
05076   if (!IsLoad)
05077     return SDValue();
05078 
05079   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
05080       (Subtarget->hasVLX() && ScalarSize == 64))
05081     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05082 
05083   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
05084   // double since there is no vbroadcastsd xmm
05085   if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
05086     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
05087       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
05088   }
05089 
05090   // Unsupported broadcast.
05091   return SDValue();
05092 }
05093 
05094 /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
05095 /// underlying vector and index.
05096 ///
05097 /// Modifies \p ExtractedFromVec to the real vector and returns the real
05098 /// index.
05099 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
05100                                          SDValue ExtIdx) {
05101   int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
05102   if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
05103     return Idx;
05104 
05105   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
05106   // lowered this:
05107   //   (extract_vector_elt (v8f32 %vreg1), Constant<6>)
05108   // to:
05109   //   (extract_vector_elt (vector_shuffle<2,u,u,u>
05110   //                           (extract_subvector (v8f32 %vreg0), Constant<4>),
05111   //                           undef)
05112   //                       Constant<0>)
05113   // In this case the vector is the extract_subvector expression and the index
05114   // is 2, as specified by the shuffle.
05115   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
05116   SDValue ShuffleVec = SVOp->getOperand(0);
05117   MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
05118   assert(ShuffleVecVT.getVectorElementType() ==
05119          ExtractedFromVec.getSimpleValueType().getVectorElementType());
05120 
05121   int ShuffleIdx = SVOp->getMaskElt(Idx);
05122   if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
05123     ExtractedFromVec = ShuffleVec;
05124     return ShuffleIdx;
05125   }
05126   return Idx;
05127 }
05128 
05129 static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
05130   MVT VT = Op.getSimpleValueType();
05131 
05132   // Skip if insert_vec_elt is not supported.
05133   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
05134   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
05135     return SDValue();
05136 
05137   SDLoc DL(Op);
05138   unsigned NumElems = Op.getNumOperands();
05139 
05140   SDValue VecIn1;
05141   SDValue VecIn2;
05142   SmallVector<unsigned, 4> InsertIndices;
05143   SmallVector<int, 8> Mask(NumElems, -1);
05144 
05145   for (unsigned i = 0; i != NumElems; ++i) {
05146     unsigned Opc = Op.getOperand(i).getOpcode();
05147 
05148     if (Opc == ISD::UNDEF)
05149       continue;
05150 
05151     if (Opc != ISD::EXTRACT_VECTOR_ELT) {
05152       // Quit if more than 1 elements need inserting.
05153       if (InsertIndices.size() > 1)
05154         return SDValue();
05155 
05156       InsertIndices.push_back(i);
05157       continue;
05158     }
05159 
05160     SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
05161     SDValue ExtIdx = Op.getOperand(i).getOperand(1);
05162     // Quit if non-constant index.
05163     if (!isa<ConstantSDNode>(ExtIdx))
05164       return SDValue();
05165     int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
05166 
05167     // Quit if extracted from vector of different type.
05168     if (ExtractedFromVec.getValueType() != VT)
05169       return SDValue();
05170 
05171     if (!VecIn1.getNode())
05172       VecIn1 = ExtractedFromVec;
05173     else if (VecIn1 != ExtractedFromVec) {
05174       if (!VecIn2.getNode())
05175         VecIn2 = ExtractedFromVec;
05176       else if (VecIn2 != ExtractedFromVec)
05177         // Quit if more than 2 vectors to shuffle
05178         return SDValue();
05179     }
05180 
05181     if (ExtractedFromVec == VecIn1)
05182       Mask[i] = Idx;
05183     else if (ExtractedFromVec == VecIn2)
05184       Mask[i] = Idx + NumElems;
05185   }
05186 
05187   if (!VecIn1.getNode())
05188     return SDValue();
05189 
05190   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
05191   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
05192   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
05193     unsigned Idx = InsertIndices[i];
05194     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
05195                      DAG.getIntPtrConstant(Idx, DL));
05196   }
05197 
05198   return NV;
05199 }
05200 
05201 static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
05202   assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
05203          Op.getScalarValueSizeInBits() == 1 &&
05204          "Can not convert non-constant vector");
05205   uint64_t Immediate = 0;
05206   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
05207     SDValue In = Op.getOperand(idx);
05208     if (In.getOpcode() != ISD::UNDEF)
05209       Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
05210   }
05211   SDLoc dl(Op);
05212   MVT VT =
05213    MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
05214   return DAG.getConstant(Immediate, dl, VT);
05215 }
05216 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
05217 SDValue
05218 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
05219 
05220   MVT VT = Op.getSimpleValueType();
05221   assert((VT.getVectorElementType() == MVT::i1) &&
05222          "Unexpected type in LowerBUILD_VECTORvXi1!");
05223 
05224   SDLoc dl(Op);
05225   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
05226     SDValue Cst = DAG.getTargetConstant(0, dl, MVT::i1);
05227     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05228     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05229   }
05230 
05231   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
05232     SDValue Cst = DAG.getTargetConstant(1, dl, MVT::i1);
05233     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
05234     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
05235   }
05236 
05237   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
05238     SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
05239     if (Imm.getValueSizeInBits() == VT.getSizeInBits())
05240       return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
05241     SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
05242     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
05243                         DAG.getIntPtrConstant(0, dl));
05244   }
05245 
05246   // Vector has one or more non-const elements
05247   uint64_t Immediate = 0;
05248   SmallVector<unsigned, 16> NonConstIdx;
05249   bool IsSplat = true;
05250   bool HasConstElts = false;
05251   int SplatIdx = -1;
05252   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
05253     SDValue In = Op.getOperand(idx);
05254     if (In.getOpcode() == ISD::UNDEF)
05255       continue;
05256     if (!isa<ConstantSDNode>(In)) 
05257       NonConstIdx.push_back(idx);
05258     else {
05259       Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
05260       HasConstElts = true;
05261     }
05262     if (SplatIdx == -1)
05263       SplatIdx = idx;
05264     else if (In != Op.getOperand(SplatIdx))
05265       IsSplat = false;
05266   }
05267 
05268   // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
05269   if (IsSplat)
05270     return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
05271                        DAG.getConstant(1, dl, VT),
05272                        DAG.getConstant(0, dl, VT));
05273 
05274   // insert elements one by one
05275   SDValue DstVec;
05276   SDValue Imm;
05277   if (Immediate) {
05278     MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
05279     Imm = DAG.getConstant(Immediate, dl, ImmVT);
05280   }
05281   else if (HasConstElts)
05282     Imm = DAG.getConstant(0, dl, VT);
05283   else 
05284     Imm = DAG.getUNDEF(VT);
05285   if (Imm.getValueSizeInBits() == VT.getSizeInBits())
05286     DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
05287   else {
05288     SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
05289     DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
05290                          DAG.getIntPtrConstant(0, dl));
05291   }
05292 
05293   for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
05294     unsigned InsertIdx = NonConstIdx[i];
05295     DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
05296                          Op.getOperand(InsertIdx),
05297                          DAG.getIntPtrConstant(InsertIdx, dl));
05298   }
05299   return DstVec;
05300 }
05301 
05302 /// \brief Return true if \p N implements a horizontal binop and return the
05303 /// operands for the horizontal binop into V0 and V1.
05304 ///
05305 /// This is a helper function of LowerToHorizontalOp().
05306 /// This function checks that the build_vector \p N in input implements a
05307 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
05308 /// operation to match.
05309 /// For example, if \p Opcode is equal to ISD::ADD, then this function
05310 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
05311 /// is equal to ISD::SUB, then this function checks if this is a horizontal
05312 /// arithmetic sub.
05313 ///
05314 /// This function only analyzes elements of \p N whose indices are
05315 /// in range [BaseIdx, LastIdx).
05316 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
05317                               SelectionDAG &DAG,
05318                               unsigned BaseIdx, unsigned LastIdx,
05319                               SDValue &V0, SDValue &V1) {
05320   EVT VT = N->getValueType(0);
05321 
05322   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
05323   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
05324          "Invalid Vector in input!");
05325 
05326   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
05327   bool CanFold = true;
05328   unsigned ExpectedVExtractIdx = BaseIdx;
05329   unsigned NumElts = LastIdx - BaseIdx;
05330   V0 = DAG.getUNDEF(VT);
05331   V1 = DAG.getUNDEF(VT);
05332 
05333   // Check if N implements a horizontal binop.
05334   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
05335     SDValue Op = N->getOperand(i + BaseIdx);
05336 
05337     // Skip UNDEFs.
05338     if (Op->getOpcode() == ISD::UNDEF) {
05339       // Update the expected vector extract index.
05340       if (i * 2 == NumElts)
05341         ExpectedVExtractIdx = BaseIdx;
05342       ExpectedVExtractIdx += 2;
05343       continue;
05344     }
05345 
05346     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
05347 
05348     if (!CanFold)
05349       break;
05350 
05351     SDValue Op0 = Op.getOperand(0);
05352     SDValue Op1 = Op.getOperand(1);
05353 
05354     // Try to match the following pattern:
05355     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
05356     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05357         Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05358         Op0.getOperand(0) == Op1.getOperand(0) &&
05359         isa<ConstantSDNode>(Op0.getOperand(1)) &&
05360         isa<ConstantSDNode>(Op1.getOperand(1)));
05361     if (!CanFold)
05362       break;
05363 
05364     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
05365     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
05366 
05367     if (i * 2 < NumElts) {
05368       if (V0.getOpcode() == ISD::UNDEF) {
05369         V0 = Op0.getOperand(0);
05370         if (V0.getValueType() != VT)
05371           return false;
05372       }
05373     } else {
05374       if (V1.getOpcode() == ISD::UNDEF) {
05375         V1 = Op0.getOperand(0);
05376         if (V1.getValueType() != VT)
05377           return false;
05378       }
05379       if (i * 2 == NumElts)
05380         ExpectedVExtractIdx = BaseIdx;
05381     }
05382 
05383     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
05384     if (I0 == ExpectedVExtractIdx)
05385       CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
05386     else if (IsCommutable && I1 == ExpectedVExtractIdx) {
05387       // Try to match the following dag sequence:
05388       // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
05389       CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
05390     } else
05391       CanFold = false;
05392 
05393     ExpectedVExtractIdx += 2;
05394   }
05395 
05396   return CanFold;
05397 }
05398 
05399 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
05400 /// a concat_vector.
05401 ///
05402 /// This is a helper function of LowerToHorizontalOp().
05403 /// This function expects two 256-bit vectors called V0 and V1.
05404 /// At first, each vector is split into two separate 128-bit vectors.
05405 /// Then, the resulting 128-bit vectors are used to implement two
05406 /// horizontal binary operations.
05407 ///
05408 /// The kind of horizontal binary operation is defined by \p X86Opcode.
05409 ///
05410 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
05411 /// the two new horizontal binop.
05412 /// When Mode is set, the first horizontal binop dag node would take as input
05413 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
05414 /// horizontal binop dag node would take as input the lower 128-bit of V1
05415 /// and the upper 128-bit of V1.
05416 ///   Example:
05417 ///     HADD V0_LO, V0_HI
05418 ///     HADD V1_LO, V1_HI
05419 ///
05420 /// Otherwise, the first horizontal binop dag node takes as input the lower
05421 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
05422 /// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
05423 ///   Example:
05424 ///     HADD V0_LO, V1_LO
05425 ///     HADD V0_HI, V1_HI
05426 ///
05427 /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
05428 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
05429 /// the upper 128-bits of the result.
05430 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
05431                                      SDLoc DL, SelectionDAG &DAG,
05432                                      unsigned X86Opcode, bool Mode,
05433                                      bool isUndefLO, bool isUndefHI) {
05434   EVT VT = V0.getValueType();
05435   assert(VT.is256BitVector() && VT == V1.getValueType() &&
05436          "Invalid nodes in input!");
05437 
05438   unsigned NumElts = VT.getVectorNumElements();
05439   SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
05440   SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
05441   SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
05442   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
05443   EVT NewVT = V0_LO.getValueType();
05444 
05445   SDValue LO = DAG.getUNDEF(NewVT);
05446   SDValue HI = DAG.getUNDEF(NewVT);
05447 
05448   if (Mode) {
05449     // Don't emit a horizontal binop if the result is expected to be UNDEF.
05450     if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
05451       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
05452     if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
05453       HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
05454   } else {
05455     // Don't emit a horizontal binop if the result is expected to be UNDEF.
05456     if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
05457                        V1_LO->getOpcode() != ISD::UNDEF))
05458       LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
05459 
05460     if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
05461                        V1_HI->getOpcode() != ISD::UNDEF))
05462       HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
05463   }
05464 
05465   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
05466 }
05467 
05468 /// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
05469 /// node.
05470 static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
05471                              const X86Subtarget *Subtarget, SelectionDAG &DAG) {
05472   EVT VT = BV->getValueType(0);
05473   if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
05474       (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
05475     return SDValue();
05476 
05477   SDLoc DL(BV);
05478   unsigned NumElts = VT.getVectorNumElements();
05479   SDValue InVec0 = DAG.getUNDEF(VT);
05480   SDValue InVec1 = DAG.getUNDEF(VT);
05481 
05482   assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
05483           VT == MVT::v2f64) && "build_vector with an invalid type found!");
05484 
05485   // Odd-numbered elements in the input build vector are obtained from
05486   // adding two integer/float elements.
05487   // Even-numbered elements in the input build vector are obtained from
05488   // subtracting two integer/float elements.
05489   unsigned ExpectedOpcode = ISD::FSUB;
05490   unsigned NextExpectedOpcode = ISD::FADD;
05491   bool AddFound = false;
05492   bool SubFound = false;
05493 
05494   for (unsigned i = 0, e = NumElts; i != e; ++i) {
05495     SDValue Op = BV->getOperand(i);
05496 
05497     // Skip 'undef' values.
05498     unsigned Opcode = Op.getOpcode();
05499     if (Opcode == ISD::UNDEF) {
05500       std::swap(ExpectedOpcode, NextExpectedOpcode);
05501       continue;
05502     }
05503 
05504     // Early exit if we found an unexpected opcode.
05505     if (Opcode != ExpectedOpcode)
05506       return SDValue();
05507 
05508     SDValue Op0 = Op.getOperand(0);
05509     SDValue Op1 = Op.getOperand(1);
05510 
05511     // Try to match the following pattern:
05512     // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
05513     // Early exit if we cannot match that sequence.
05514     if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05515         Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05516         !isa<ConstantSDNode>(Op0.getOperand(1)) ||
05517         !isa<ConstantSDNode>(Op1.getOperand(1)) ||
05518         Op0.getOperand(1) != Op1.getOperand(1))
05519       return SDValue();
05520 
05521     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
05522     if (I0 != i)
05523       return SDValue();
05524 
05525     // We found a valid add/sub node. Update the information accordingly.
05526     if (i & 1)
05527       AddFound = true;
05528     else
05529       SubFound = true;
05530 
05531     // Update InVec0 and InVec1.
05532     if (InVec0.getOpcode() == ISD::UNDEF) {
05533       InVec0 = Op0.getOperand(0);
05534       if (InVec0.getValueType() != VT)
05535         return SDValue();
05536     }
05537     if (InVec1.getOpcode() == ISD::UNDEF) {
05538       InVec1 = Op1.getOperand(0);
05539       if (InVec1.getValueType() != VT)
05540         return SDValue();
05541     }
05542 
05543     // Make sure that operands in input to each add/sub node always
05544     // come from a same pair of vectors.
05545     if (InVec0 != Op0.getOperand(0)) {
05546       if (ExpectedOpcode == ISD::FSUB)
05547         return SDValue();
05548 
05549       // FADD is commutable. Try to commute the operands
05550       // and then test again.
05551       std::swap(Op0, Op1);
05552       if (InVec0 != Op0.getOperand(0))
05553         return SDValue();
05554     }
05555 
05556     if (InVec1 != Op1.getOperand(0))
05557       return SDValue();
05558 
05559     // Update the pair of expected opcodes.
05560     std::swap(ExpectedOpcode, NextExpectedOpcode);
05561   }
05562 
05563   // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
05564   if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
05565       InVec1.getOpcode() != ISD::UNDEF)
05566     return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
05567 
05568   return SDValue();
05569 }
05570 
05571 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
05572 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
05573                                    const X86Subtarget *Subtarget,
05574                                    SelectionDAG &DAG) {
05575   EVT VT = BV->getValueType(0);
05576   unsigned NumElts = VT.getVectorNumElements();
05577   unsigned NumUndefsLO = 0;
05578   unsigned NumUndefsHI = 0;
05579   unsigned Half = NumElts/2;
05580 
05581   // Count the number of UNDEF operands in the build_vector in input.
05582   for (unsigned i = 0, e = Half; i != e; ++i)
05583     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
05584       NumUndefsLO++;
05585 
05586   for (unsigned i = Half, e = NumElts; i != e; ++i)
05587     if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
05588       NumUndefsHI++;
05589 
05590   // Early exit if this is either a build_vector of all UNDEFs or all the
05591   // operands but one are UNDEF.
05592   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
05593     return SDValue();
05594 
05595   SDLoc DL(BV);
05596   SDValue InVec0, InVec1;
05597   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
05598     // Try to match an SSE3 float HADD/HSUB.
05599     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
05600       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
05601 
05602     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
05603       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
05604   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
05605     // Try to match an SSSE3 integer HADD/HSUB.
05606     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
05607       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
05608 
05609     if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
05610       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
05611   }
05612 
05613   if (!Subtarget->hasAVX())
05614     return SDValue();
05615 
05616   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
05617     // Try to match an AVX horizontal add/sub of packed single/double
05618     // precision floating point values from 256-bit vectors.
05619     SDValue InVec2, InVec3;
05620     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
05621         isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
05622         ((InVec0.getOpcode() == ISD::UNDEF ||
05623           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05624         ((InVec1.getOpcode() == ISD::UNDEF ||
05625           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05626       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
05627 
05628     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
05629         isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
05630         ((InVec0.getOpcode() == ISD::UNDEF ||
05631           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05632         ((InVec1.getOpcode() == ISD::UNDEF ||
05633           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05634       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
05635   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
05636     // Try to match an AVX2 horizontal add/sub of signed integers.
05637     SDValue InVec2, InVec3;
05638     unsigned X86Opcode;
05639     bool CanFold = true;
05640 
05641     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
05642         isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
05643         ((InVec0.getOpcode() == ISD::UNDEF ||
05644           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05645         ((InVec1.getOpcode() == ISD::UNDEF ||
05646           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05647       X86Opcode = X86ISD::HADD;
05648     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
05649         isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
05650         ((InVec0.getOpcode() == ISD::UNDEF ||
05651           InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
05652         ((InVec1.getOpcode() == ISD::UNDEF ||
05653           InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
05654       X86Opcode = X86ISD::HSUB;
05655     else
05656       CanFold = false;
05657 
05658     if (CanFold) {
05659       // Fold this build_vector into a single horizontal add/sub.
05660       // Do this only if the target has AVX2.
05661       if (Subtarget->hasAVX2())
05662         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
05663 
05664       // Do not try to expand this build_vector into a pair of horizontal
05665       // add/sub if we can emit a pair of scalar add/sub.
05666       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
05667         return SDValue();
05668 
05669       // Convert this build_vector into a pair of horizontal binop followed by
05670       // a concat vector.
05671       bool isUndefLO = NumUndefsLO == Half;
05672       bool isUndefHI = NumUndefsHI == Half;
05673       return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
05674                                    isUndefLO, isUndefHI);
05675     }
05676   }
05677 
05678   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
05679        VT == MVT::v16i16) && Subtarget->hasAVX()) {
05680     unsigned X86Opcode;
05681     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
05682       X86Opcode = X86ISD::HADD;
05683     else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
05684       X86Opcode = X86ISD::HSUB;
05685     else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
05686       X86Opcode = X86ISD::FHADD;
05687     else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
05688       X86Opcode = X86ISD::FHSUB;
05689     else
05690       return SDValue();
05691 
05692     // Don't try to expand this build_vector into a pair of horizontal add/sub
05693     // if we can simply emit a pair of scalar add/sub.
05694     if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
05695       return SDValue();
05696 
05697     // Convert this build_vector into two horizontal add/sub followed by
05698     // a concat vector.
05699     bool isUndefLO = NumUndefsLO == Half;
05700     bool isUndefHI = NumUndefsHI == Half;
05701     return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
05702                                  isUndefLO, isUndefHI);
05703   }
05704 
05705   return SDValue();
05706 }
05707 
05708 SDValue
05709 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
05710   SDLoc dl(Op);
05711 
05712   MVT VT = Op.getSimpleValueType();
05713   MVT ExtVT = VT.getVectorElementType();
05714   unsigned NumElems = Op.getNumOperands();
05715 
05716   // Generate vectors for predicate vectors.
05717   if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
05718     return LowerBUILD_VECTORvXi1(Op, DAG);
05719 
05720   // Vectors containing all zeros can be matched by pxor and xorps later
05721   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
05722     // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
05723     // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
05724     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
05725       return Op;
05726 
05727     return