LLVM  mainline
X86ISelLowering.cpp
Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86ISelLowering.h"
00016 #include "Utils/X86ShuffleDecode.h"
00017 #include "X86CallingConv.h"
00018 #include "X86FrameLowering.h"
00019 #include "X86InstrBuilder.h"
00020 #include "X86MachineFunctionInfo.h"
00021 #include "X86ShuffleDecodeConstantPool.h"
00022 #include "X86TargetMachine.h"
00023 #include "X86TargetObjectFile.h"
00024 #include "llvm/ADT/SmallBitVector.h"
00025 #include "llvm/ADT/SmallSet.h"
00026 #include "llvm/ADT/Statistic.h"
00027 #include "llvm/ADT/StringExtras.h"
00028 #include "llvm/ADT/StringSwitch.h"
00029 #include "llvm/Analysis/EHPersonalities.h"
00030 #include "llvm/CodeGen/IntrinsicLowering.h"
00031 #include "llvm/CodeGen/MachineFrameInfo.h"
00032 #include "llvm/CodeGen/MachineFunction.h"
00033 #include "llvm/CodeGen/MachineInstrBuilder.h"
00034 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00035 #include "llvm/CodeGen/MachineModuleInfo.h"
00036 #include "llvm/CodeGen/MachineRegisterInfo.h"
00037 #include "llvm/CodeGen/WinEHFuncInfo.h"
00038 #include "llvm/IR/CallSite.h"
00039 #include "llvm/IR/CallingConv.h"
00040 #include "llvm/IR/Constants.h"
00041 #include "llvm/IR/DerivedTypes.h"
00042 #include "llvm/IR/Function.h"
00043 #include "llvm/IR/GlobalAlias.h"
00044 #include "llvm/IR/GlobalVariable.h"
00045 #include "llvm/IR/Instructions.h"
00046 #include "llvm/IR/Intrinsics.h"
00047 #include "llvm/MC/MCAsmInfo.h"
00048 #include "llvm/MC/MCContext.h"
00049 #include "llvm/MC/MCExpr.h"
00050 #include "llvm/MC/MCSymbol.h"
00051 #include "llvm/Support/CommandLine.h"
00052 #include "llvm/Support/Debug.h"
00053 #include "llvm/Support/ErrorHandling.h"
00054 #include "llvm/Support/MathExtras.h"
00055 #include "llvm/Target/TargetOptions.h"
00056 #include "X86IntrinsicsInfo.h"
00057 #include <bitset>
00058 #include <numeric>
00059 #include <cctype>
00060 using namespace llvm;
00061 
00062 #define DEBUG_TYPE "x86-isel"
00063 
00064 STATISTIC(NumTailCalls, "Number of tail calls");
00065 
00066 static cl::opt<bool> ExperimentalVectorWideningLegalization(
00067     "x86-experimental-vector-widening-legalization", cl::init(false),
00068     cl::desc("Enable an experimental vector type legalization through widening "
00069              "rather than promotion."),
00070     cl::Hidden);
00071 
00072 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
00073                                      const X86Subtarget &STI)
00074     : TargetLowering(TM), Subtarget(&STI) {
00075   X86ScalarSSEf64 = Subtarget->hasSSE2();
00076   X86ScalarSSEf32 = Subtarget->hasSSE1();
00077   MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
00078 
00079   // Set up the TargetLowering object.
00080 
00081   // X86 is weird. It always uses i8 for shift amounts and setcc results.
00082   setBooleanContents(ZeroOrOneBooleanContent);
00083   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
00084   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00085 
00086   // For 64-bit, since we have so many registers, use the ILP scheduler.
00087   // For 32-bit, use the register pressure specific scheduling.
00088   // For Atom, always use ILP scheduling.
00089   if (Subtarget->isAtom())
00090     setSchedulingPreference(Sched::ILP);
00091   else if (Subtarget->is64Bit())
00092     setSchedulingPreference(Sched::ILP);
00093   else
00094     setSchedulingPreference(Sched::RegPressure);
00095   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
00096   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
00097 
00098   // Bypass expensive divides on Atom when compiling with O2.
00099   if (TM.getOptLevel() >= CodeGenOpt::Default) {
00100     if (Subtarget->hasSlowDivide32())
00101       addBypassSlowDiv(32, 8);
00102     if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
00103       addBypassSlowDiv(64, 16);
00104   }
00105 
00106   if (Subtarget->isTargetKnownWindowsMSVC()) {
00107     // Setup Windows compiler runtime calls.
00108     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
00109     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
00110     setLibcallName(RTLIB::SREM_I64, "_allrem");
00111     setLibcallName(RTLIB::UREM_I64, "_aullrem");
00112     setLibcallName(RTLIB::MUL_I64, "_allmul");
00113     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
00114     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
00115     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
00116     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
00117     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
00118   }
00119 
00120   if (Subtarget->isTargetDarwin()) {
00121     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00122     setUseUnderscoreSetJmp(false);
00123     setUseUnderscoreLongJmp(false);
00124   } else if (Subtarget->isTargetWindowsGNU()) {
00125     // MS runtime is weird: it exports _setjmp, but longjmp!
00126     setUseUnderscoreSetJmp(true);
00127     setUseUnderscoreLongJmp(false);
00128   } else {
00129     setUseUnderscoreSetJmp(true);
00130     setUseUnderscoreLongJmp(true);
00131   }
00132 
00133   // Set up the register classes.
00134   addRegisterClass(MVT::i8, &X86::GR8RegClass);
00135   addRegisterClass(MVT::i16, &X86::GR16RegClass);
00136   addRegisterClass(MVT::i32, &X86::GR32RegClass);
00137   if (Subtarget->is64Bit())
00138     addRegisterClass(MVT::i64, &X86::GR64RegClass);
00139 
00140   for (MVT VT : MVT::integer_valuetypes())
00141     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00142 
00143   // We don't accept any truncstore of integer registers.
00144   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
00145   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
00146   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
00147   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
00148   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
00149   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
00150 
00151   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00152 
00153   // SETOEQ and SETUNE require checking two conditions.
00154   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
00155   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
00156   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
00157   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
00158   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
00159   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
00160 
00161   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00162   // operation.
00163   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00164   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00165   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00166 
00167   if (Subtarget->is64Bit()) {
00168     if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
00169       // f32/f64 are legal, f80 is custom.
00170       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
00171     else
00172       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
00173     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00174   } else if (!Subtarget->useSoftFloat()) {
00175     // We have an algorithm for SSE2->double, and we turn this into a
00176     // 64-bit FILD followed by conditional FADD for other targets.
00177     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
00178     // We have an algorithm for SSE2, and we turn this into a 64-bit
00179     // FILD or VCVTUSI2SS/SD for other targets.
00180     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
00181   }
00182 
00183   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00184   // this operation.
00185   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00186   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00187 
00188   if (!Subtarget->useSoftFloat()) {
00189     // SSE has no i16 to fp conversion, only i32
00190     if (X86ScalarSSEf32) {
00191       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00192       // f32 and f64 cases are Legal, f80 case is not
00193       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00194     } else {
00195       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00196       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00197     }
00198   } else {
00199     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00200     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
00201   }
00202 
00203   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00204   // this operation.
00205   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00206   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00207 
00208   if (!Subtarget->useSoftFloat()) {
00209     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
00210     // are Legal, f80 is custom lowered.
00211     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
00212     setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
00213 
00214     if (X86ScalarSSEf32) {
00215       setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00216       // f32 and f64 cases are Legal, f80 case is not
00217       setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00218     } else {
00219       setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00220       setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00221     }
00222   } else {
00223     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00224     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
00225     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Expand);
00226   }
00227 
00228   // Handle FP_TO_UINT by promoting the destination to a larger signed
00229   // conversion.
00230   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00231   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00232   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00233 
00234   if (Subtarget->is64Bit()) {
00235     if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
00236       // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
00237       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00238       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
00239     } else {
00240       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
00241       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
00242     }
00243   } else if (!Subtarget->useSoftFloat()) {
00244     // Since AVX is a superset of SSE3, only check for SSE here.
00245     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
00246       // Expand FP_TO_UINT into a select.
00247       // FIXME: We would like to use a Custom expander here eventually to do
00248       // the optimal thing for SSE vs. the default expansion in the legalizer.
00249       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
00250     else
00251       // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
00252       // With SSE3 we can use fisttpll to convert to a signed i64; without
00253       // SSE, we're stuck with a fistpll.
00254       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
00255 
00256     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
00257   }
00258 
00259   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
00260   if (!X86ScalarSSEf64) {
00261     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
00262     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
00263     if (Subtarget->is64Bit()) {
00264       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
00265       // Without SSE, i64->f64 goes through memory.
00266       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
00267     }
00268   } else if (!Subtarget->is64Bit())
00269     setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
00270 
00271   // Scalar integer divide and remainder are lowered to use operations that
00272   // produce two results, to match the available instructions. This exposes
00273   // the two-result form to trivial CSE, which is able to combine x/y and x%y
00274   // into a single instruction.
00275   //
00276   // Scalar integer multiply-high is also lowered to use two-result
00277   // operations, to match the available instructions. However, plain multiply
00278   // (low) operations are left as Legal, as there are single-result
00279   // instructions for this in x86. Using the two-result multiply instructions
00280   // when both high and low results are needed must be arranged by dagcombine.
00281   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
00282     setOperationAction(ISD::MULHS, VT, Expand);
00283     setOperationAction(ISD::MULHU, VT, Expand);
00284     setOperationAction(ISD::SDIV, VT, Expand);
00285     setOperationAction(ISD::UDIV, VT, Expand);
00286     setOperationAction(ISD::SREM, VT, Expand);
00287     setOperationAction(ISD::UREM, VT, Expand);
00288 
00289     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
00290     setOperationAction(ISD::ADDC, VT, Custom);
00291     setOperationAction(ISD::ADDE, VT, Custom);
00292     setOperationAction(ISD::SUBC, VT, Custom);
00293     setOperationAction(ISD::SUBE, VT, Custom);
00294   }
00295 
00296   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
00297   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00298   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
00299   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
00300   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
00301   setOperationAction(ISD::BR_CC            , MVT::f128,  Expand);
00302   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
00303   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
00304   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
00305   setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
00306   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
00307   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
00308   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
00309   setOperationAction(ISD::SELECT_CC        , MVT::f128,  Expand);
00310   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
00311   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
00312   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
00313   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
00314   if (Subtarget->is64Bit())
00315     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00316   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
00317   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
00318   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00319   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00320 
00321   if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
00322     // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
00323     // is. We should promote the value to 64-bits to solve this.
00324     // This is what the CRT headers do - `fmodf` is an inline header
00325     // function casting to f64 and calling `fmod`.
00326     setOperationAction(ISD::FREM           , MVT::f32  , Promote);
00327   } else {
00328     setOperationAction(ISD::FREM           , MVT::f32  , Expand);
00329   }
00330 
00331   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00332   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
00333   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
00334 
00335   // Promote the i8 variants and force them on up to i32 which has a shorter
00336   // encoding.
00337   setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
00338   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
00339   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
00340   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
00341   if (Subtarget->hasBMI()) {
00342     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
00343     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
00344     if (Subtarget->is64Bit())
00345       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00346   } else {
00347     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
00348     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
00349     if (Subtarget->is64Bit())
00350       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
00351   }
00352 
00353   if (Subtarget->hasLZCNT()) {
00354     // When promoting the i8 variants, force them to i32 for a shorter
00355     // encoding.
00356     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
00357     AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
00358     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
00359     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
00360     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
00361     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
00362     if (Subtarget->is64Bit())
00363       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00364   } else {
00365     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
00366     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
00367     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
00368     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
00369     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
00370     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
00371     if (Subtarget->is64Bit()) {
00372       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
00373       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
00374     }
00375   }
00376 
00377   // Special handling for half-precision floating point conversions.
00378   // If we don't have F16C support, then lower half float conversions
00379   // into library calls.
00380   if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
00381     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00382     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00383   }
00384 
00385   // There's never any support for operations beyond MVT::f32.
00386   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00387   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
00388   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00389   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
00390 
00391   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
00392   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
00393   setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
00394   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00395   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00396   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
00397 
00398   if (Subtarget->hasPOPCNT()) {
00399     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
00400   } else {
00401     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
00402     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
00403     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
00404     if (Subtarget->is64Bit())
00405       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
00406   }
00407 
00408   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00409 
00410   if (!Subtarget->hasMOVBE())
00411     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
00412 
00413   // These should be promoted to a larger select which is supported.
00414   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
00415   // X86 wants to expand cmov itself.
00416   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
00417   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00418   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00419   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00420   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00421   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
00422   setOperationAction(ISD::SELECT          , MVT::f128 , Custom);
00423   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00424   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00425   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00426   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00427   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00428   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
00429   setOperationAction(ISD::SETCC           , MVT::f128 , Custom);
00430   setOperationAction(ISD::SETCCE          , MVT::i8   , Custom);
00431   setOperationAction(ISD::SETCCE          , MVT::i16  , Custom);
00432   setOperationAction(ISD::SETCCE          , MVT::i32  , Custom);
00433   if (Subtarget->is64Bit()) {
00434     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
00435     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
00436     setOperationAction(ISD::SETCCE        , MVT::i64  , Custom);
00437   }
00438   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
00439   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00440   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00441   // support continuation, user-level threading, and etc.. As a result, no
00442   // other SjLj exception interfaces are implemented and please don't build
00443   // your own exception handling based on them.
00444   // LLVM/Clang supports zero-cost DWARF exception handling.
00445   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00446   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00447 
00448   // Darwin ABI issue.
00449   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00450   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00451   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00452   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
00453   if (Subtarget->is64Bit())
00454     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00455   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00456   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
00457   if (Subtarget->is64Bit()) {
00458     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
00459     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
00460     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
00461     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
00462     setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
00463   }
00464   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00465   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00466   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00467   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00468   if (Subtarget->is64Bit()) {
00469     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
00470     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
00471     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
00472   }
00473 
00474   if (Subtarget->hasSSE1())
00475     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
00476 
00477   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
00478 
00479   // Expand certain atomics
00480   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
00481     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
00482     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
00483     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
00484   }
00485 
00486   if (Subtarget->hasCmpxchg16b()) {
00487     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
00488   }
00489 
00490   // FIXME - use subtarget debug flags
00491   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
00492       !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
00493     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
00494   }
00495 
00496   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
00497   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
00498 
00499   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00500   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00501 
00502   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00503   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
00504 
00505   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00506   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00507   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00508   if (Subtarget->is64Bit()) {
00509     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
00510     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
00511   } else {
00512     // TargetInfo::CharPtrBuiltinVaList
00513     setOperationAction(ISD::VAARG           , MVT::Other, Expand);
00514     setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
00515   }
00516 
00517   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00518   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00519 
00520   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
00521 
00522   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
00523   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
00524   setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
00525 
00526   if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
00527     // f32 and f64 use SSE.
00528     // Set up the FP register classes.
00529     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00530     addRegisterClass(MVT::f64, &X86::FR64RegClass);
00531 
00532     // Use ANDPD to simulate FABS.
00533     setOperationAction(ISD::FABS , MVT::f64, Custom);
00534     setOperationAction(ISD::FABS , MVT::f32, Custom);
00535 
00536     // Use XORP to simulate FNEG.
00537     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00538     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00539 
00540     // Use ANDPD and ORPD to simulate FCOPYSIGN.
00541     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00542     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00543 
00544     // Lower this to FGETSIGNx86 plus an AND.
00545     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
00546     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
00547 
00548     // We don't support sin/cos/fmod
00549     setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00550     setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00551     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00552     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00553     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00554     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00555 
00556     // Expand FP immediates into loads from the stack, except for the special
00557     // cases we handle.
00558     addLegalFPImmediate(APFloat(+0.0)); // xorpd
00559     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00560   } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
00561     // Use SSE for f32, x87 for f64.
00562     // Set up the FP register classes.
00563     addRegisterClass(MVT::f32, &X86::FR32RegClass);
00564     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00565 
00566     // Use ANDPS to simulate FABS.
00567     setOperationAction(ISD::FABS , MVT::f32, Custom);
00568 
00569     // Use XORP to simulate FNEG.
00570     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00571 
00572     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00573 
00574     // Use ANDPS and ORPS to simulate FCOPYSIGN.
00575     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00576     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00577 
00578     // We don't support sin/cos/fmod
00579     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00580     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00581     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00582 
00583     // Special cases we handle for FP constants.
00584     addLegalFPImmediate(APFloat(+0.0f)); // xorps
00585     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00586     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00587     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00588     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00589 
00590     if (!TM.Options.UnsafeFPMath) {
00591       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00592       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00593       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00594     }
00595   } else if (!Subtarget->useSoftFloat()) {
00596     // f32 and f64 in x87.
00597     // Set up the FP register classes.
00598     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
00599     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
00600 
00601     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
00602     setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
00603     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00604     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00605 
00606     if (!TM.Options.UnsafeFPMath) {
00607       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
00608       setOperationAction(ISD::FSIN   , MVT::f32, Expand);
00609       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
00610       setOperationAction(ISD::FCOS   , MVT::f32, Expand);
00611       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00612       setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00613     }
00614     addLegalFPImmediate(APFloat(+0.0)); // FLD0
00615     addLegalFPImmediate(APFloat(+1.0)); // FLD1
00616     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
00617     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
00618     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
00619     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
00620     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
00621     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
00622   }
00623 
00624   // We don't support FMA.
00625   setOperationAction(ISD::FMA, MVT::f64, Expand);
00626   setOperationAction(ISD::FMA, MVT::f32, Expand);
00627 
00628   // Long double always uses X87, except f128 in MMX.
00629   if (!Subtarget->useSoftFloat()) {
00630     if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
00631       addRegisterClass(MVT::f128, &X86::FR128RegClass);
00632       ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
00633       setOperationAction(ISD::FABS , MVT::f128, Custom);
00634       setOperationAction(ISD::FNEG , MVT::f128, Custom);
00635       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
00636     }
00637 
00638     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
00639     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
00640     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
00641     {
00642       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
00643       addLegalFPImmediate(TmpFlt);  // FLD0
00644       TmpFlt.changeSign();
00645       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
00646 
00647       bool ignored;
00648       APFloat TmpFlt2(+1.0);
00649       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
00650                       &ignored);
00651       addLegalFPImmediate(TmpFlt2);  // FLD1
00652       TmpFlt2.changeSign();
00653       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
00654     }
00655 
00656     if (!TM.Options.UnsafeFPMath) {
00657       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
00658       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
00659       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
00660     }
00661 
00662     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
00663     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
00664     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
00665     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
00666     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
00667     setOperationAction(ISD::FMA, MVT::f80, Expand);
00668   }
00669 
00670   // Always use a library call for pow.
00671   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
00672   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
00673   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
00674 
00675   setOperationAction(ISD::FLOG, MVT::f80, Expand);
00676   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
00677   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
00678   setOperationAction(ISD::FEXP, MVT::f80, Expand);
00679   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
00680   setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
00681   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
00682 
00683   // First set operation action for all vector types to either promote
00684   // (for widening) or expand (for scalarization). Then we will selectively
00685   // turn on ones that can be effectively codegen'd.
00686   for (MVT VT : MVT::vector_valuetypes()) {
00687     setOperationAction(ISD::ADD , VT, Expand);
00688     setOperationAction(ISD::SUB , VT, Expand);
00689     setOperationAction(ISD::FADD, VT, Expand);
00690     setOperationAction(ISD::FNEG, VT, Expand);
00691     setOperationAction(ISD::FSUB, VT, Expand);
00692     setOperationAction(ISD::MUL , VT, Expand);
00693     setOperationAction(ISD::FMUL, VT, Expand);
00694     setOperationAction(ISD::SDIV, VT, Expand);
00695     setOperationAction(ISD::UDIV, VT, Expand);
00696     setOperationAction(ISD::FDIV, VT, Expand);
00697     setOperationAction(ISD::SREM, VT, Expand);
00698     setOperationAction(ISD::UREM, VT, Expand);
00699     setOperationAction(ISD::LOAD, VT, Expand);
00700     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
00701     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
00702     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00703     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
00704     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
00705     setOperationAction(ISD::FABS, VT, Expand);
00706     setOperationAction(ISD::FSIN, VT, Expand);
00707     setOperationAction(ISD::FSINCOS, VT, Expand);
00708     setOperationAction(ISD::FCOS, VT, Expand);
00709     setOperationAction(ISD::FSINCOS, VT, Expand);
00710     setOperationAction(ISD::FREM, VT, Expand);
00711     setOperationAction(ISD::FMA,  VT, Expand);
00712     setOperationAction(ISD::FPOWI, VT, Expand);
00713     setOperationAction(ISD::FSQRT, VT, Expand);
00714     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
00715     setOperationAction(ISD::FFLOOR, VT, Expand);
00716     setOperationAction(ISD::FCEIL, VT, Expand);
00717     setOperationAction(ISD::FTRUNC, VT, Expand);
00718     setOperationAction(ISD::FRINT, VT, Expand);
00719     setOperationAction(ISD::FNEARBYINT, VT, Expand);
00720     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00721     setOperationAction(ISD::MULHS, VT, Expand);
00722     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00723     setOperationAction(ISD::MULHU, VT, Expand);
00724     setOperationAction(ISD::SDIVREM, VT, Expand);
00725     setOperationAction(ISD::UDIVREM, VT, Expand);
00726     setOperationAction(ISD::FPOW, VT, Expand);
00727     setOperationAction(ISD::CTPOP, VT, Expand);
00728     setOperationAction(ISD::CTTZ, VT, Expand);
00729     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00730     setOperationAction(ISD::CTLZ, VT, Expand);
00731     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00732     setOperationAction(ISD::SHL, VT, Expand);
00733     setOperationAction(ISD::SRA, VT, Expand);
00734     setOperationAction(ISD::SRL, VT, Expand);
00735     setOperationAction(ISD::ROTL, VT, Expand);
00736     setOperationAction(ISD::ROTR, VT, Expand);
00737     setOperationAction(ISD::BSWAP, VT, Expand);
00738     setOperationAction(ISD::SETCC, VT, Expand);
00739     setOperationAction(ISD::FLOG, VT, Expand);
00740     setOperationAction(ISD::FLOG2, VT, Expand);
00741     setOperationAction(ISD::FLOG10, VT, Expand);
00742     setOperationAction(ISD::FEXP, VT, Expand);
00743     setOperationAction(ISD::FEXP2, VT, Expand);
00744     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00745     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00746     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00747     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00748     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
00749     setOperationAction(ISD::TRUNCATE, VT, Expand);
00750     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
00751     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
00752     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
00753     setOperationAction(ISD::VSELECT, VT, Expand);
00754     setOperationAction(ISD::SELECT_CC, VT, Expand);
00755     for (MVT InnerVT : MVT::vector_valuetypes()) {
00756       setTruncStoreAction(InnerVT, VT, Expand);
00757 
00758       setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
00759       setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
00760 
00761       // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
00762       // types, we have to deal with them whether we ask for Expansion or not.
00763       // Setting Expand causes its own optimisation problems though, so leave
00764       // them legal.
00765       if (VT.getVectorElementType() == MVT::i1)
00766         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00767 
00768       // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
00769       // split/scalarized right now.
00770       if (VT.getVectorElementType() == MVT::f16)
00771         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
00772     }
00773   }
00774 
00775   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
00776   // with -msoft-float, disable use of MMX as well.
00777   if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
00778     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
00779     // No operations on x86mmx supported, everything uses intrinsics.
00780   }
00781 
00782   // MMX-sized vectors (other than x86mmx) are expected to be expanded
00783   // into smaller operations.
00784   for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
00785     setOperationAction(ISD::MULHS,              MMXTy,      Expand);
00786     setOperationAction(ISD::AND,                MMXTy,      Expand);
00787     setOperationAction(ISD::OR,                 MMXTy,      Expand);
00788     setOperationAction(ISD::XOR,                MMXTy,      Expand);
00789     setOperationAction(ISD::SCALAR_TO_VECTOR,   MMXTy,      Expand);
00790     setOperationAction(ISD::SELECT,             MMXTy,      Expand);
00791     setOperationAction(ISD::BITCAST,            MMXTy,      Expand);
00792   }
00793   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
00794 
00795   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
00796     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
00797 
00798     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
00799     setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
00800     setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
00801     setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
00802     setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
00803     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
00804     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
00805     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00806     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00807     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00808     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
00809     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00810     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00811     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
00812   }
00813 
00814   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
00815     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
00816 
00817     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
00818     // registers cannot be used even for integer operations.
00819     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
00820     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
00821     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
00822     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
00823 
00824     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00825     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00826     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00827     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
00828     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
00829     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
00830     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
00831     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
00832     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
00833     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
00834     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
00835     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00836     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00837     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00838     setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
00839     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00840     setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
00841     setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
00842     setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
00843     setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
00844     setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
00845     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
00846     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
00847 
00848     setOperationAction(ISD::SMAX,               MVT::v8i16, Legal);
00849     setOperationAction(ISD::UMAX,               MVT::v16i8, Legal);
00850     setOperationAction(ISD::SMIN,               MVT::v8i16, Legal);
00851     setOperationAction(ISD::UMIN,               MVT::v16i8, Legal);
00852 
00853     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
00854     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
00855     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
00856     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
00857 
00858     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00859     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00860     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00861     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00862     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00863 
00864     setOperationAction(ISD::CTPOP,              MVT::v16i8, Custom);
00865     setOperationAction(ISD::CTPOP,              MVT::v8i16, Custom);
00866     setOperationAction(ISD::CTPOP,              MVT::v4i32, Custom);
00867     setOperationAction(ISD::CTPOP,              MVT::v2i64, Custom);
00868 
00869     setOperationAction(ISD::CTTZ,               MVT::v16i8, Custom);
00870     setOperationAction(ISD::CTTZ,               MVT::v8i16, Custom);
00871     setOperationAction(ISD::CTTZ,               MVT::v4i32, Custom);
00872     // ISD::CTTZ v2i64 - scalarization is faster.
00873     setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v16i8, Custom);
00874     setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v8i16, Custom);
00875     setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v4i32, Custom);
00876     // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
00877 
00878     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
00879     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
00880       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
00881       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
00882       setOperationAction(ISD::VSELECT,            VT, Custom);
00883       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00884     }
00885 
00886     // We support custom legalizing of sext and anyext loads for specific
00887     // memory vector types which we can load as a scalar (or sequence of
00888     // scalars) and extend in-register to a legal 128-bit vector type. For sext
00889     // loads these must work with a single scalar load.
00890     for (MVT VT : MVT::integer_vector_valuetypes()) {
00891       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
00892       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
00893       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
00894       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
00895       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
00896       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
00897       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
00898       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
00899       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
00900     }
00901 
00902     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
00903     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
00904     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
00905     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
00906     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
00907     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
00908     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
00909     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00910 
00911     if (Subtarget->is64Bit()) {
00912       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
00913       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00914     }
00915 
00916     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
00917     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
00918       setOperationAction(ISD::AND,    VT, Promote);
00919       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
00920       setOperationAction(ISD::OR,     VT, Promote);
00921       AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
00922       setOperationAction(ISD::XOR,    VT, Promote);
00923       AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
00924       setOperationAction(ISD::LOAD,   VT, Promote);
00925       AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
00926       setOperationAction(ISD::SELECT, VT, Promote);
00927       AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
00928     }
00929 
00930     // Custom lower v2i64 and v2f64 selects.
00931     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
00932     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
00933     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
00934     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
00935 
00936     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
00937     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
00938 
00939     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
00940 
00941     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
00942     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
00943     // As there is no 64-bit GPR available, we need build a special custom
00944     // sequence to convert from v2i32 to v2f32.
00945     if (!Subtarget->is64Bit())
00946       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
00947 
00948     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
00949     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
00950 
00951     for (MVT VT : MVT::fp_vector_valuetypes())
00952       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
00953 
00954     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
00955     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
00956     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
00957   }
00958 
00959   if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
00960     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
00961       setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
00962       setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
00963       setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
00964       setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
00965       setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
00966     }
00967 
00968     setOperationAction(ISD::SMAX,               MVT::v16i8, Legal);
00969     setOperationAction(ISD::SMAX,               MVT::v4i32, Legal);
00970     setOperationAction(ISD::UMAX,               MVT::v8i16, Legal);
00971     setOperationAction(ISD::UMAX,               MVT::v4i32, Legal);
00972     setOperationAction(ISD::SMIN,               MVT::v16i8, Legal);
00973     setOperationAction(ISD::SMIN,               MVT::v4i32, Legal);
00974     setOperationAction(ISD::UMIN,               MVT::v8i16, Legal);
00975     setOperationAction(ISD::UMIN,               MVT::v4i32, Legal);
00976 
00977     // FIXME: Do we need to handle scalar-to-vector here?
00978     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
00979 
00980     // We directly match byte blends in the backend as they match the VSELECT
00981     // condition form.
00982     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
00983 
00984     // SSE41 brings specific instructions for doing vector sign extend even in
00985     // cases where we don't have SRA.
00986     for (MVT VT : MVT::integer_vector_valuetypes()) {
00987       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
00988       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
00989       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
00990     }
00991 
00992     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
00993     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
00994     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
00995     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
00996     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
00997     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
00998     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
00999 
01000     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
01001     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
01002     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
01003     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
01004     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
01005     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
01006 
01007     // i8 and i16 vectors are custom because the source register and source
01008     // source memory operand types are not the same width.  f32 vectors are
01009     // custom since the immediate controlling the insert encodes additional
01010     // information.
01011     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
01012     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
01013     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
01014     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
01015 
01016     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
01017     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
01018     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
01019     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
01020 
01021     // FIXME: these should be Legal, but that's only for the case where
01022     // the index is constant.  For now custom expand to deal with that.
01023     if (Subtarget->is64Bit()) {
01024       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
01025       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
01026     }
01027   }
01028 
01029   if (Subtarget->hasSSE2()) {
01030     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
01031     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
01032     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
01033 
01034     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
01035     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
01036 
01037     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
01038     setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
01039 
01040     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
01041     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
01042 
01043     // In the customized shift lowering, the legal cases in AVX2 will be
01044     // recognized.
01045     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
01046     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
01047 
01048     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
01049     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
01050 
01051     setOperationAction(ISD::SRA,               MVT::v2i64, Custom);
01052     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
01053   }
01054 
01055   if (Subtarget->hasXOP()) {
01056     setOperationAction(ISD::ROTL,              MVT::v16i8, Custom);
01057     setOperationAction(ISD::ROTL,              MVT::v8i16, Custom);
01058     setOperationAction(ISD::ROTL,              MVT::v4i32, Custom);
01059     setOperationAction(ISD::ROTL,              MVT::v2i64, Custom);
01060     setOperationAction(ISD::ROTL,              MVT::v32i8, Custom);
01061     setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
01062     setOperationAction(ISD::ROTL,              MVT::v8i32, Custom);
01063     setOperationAction(ISD::ROTL,              MVT::v4i64, Custom);
01064   }
01065 
01066   if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
01067     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
01068     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
01069     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
01070     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
01071     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
01072     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
01073 
01074     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
01075     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
01076     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
01077 
01078     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
01079     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
01080     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
01081     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
01082     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
01083     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
01084     setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
01085     setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
01086     setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
01087     setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
01088     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
01089     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
01090 
01091     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
01092     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
01093     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
01094     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
01095     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
01096     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
01097     setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
01098     setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
01099     setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
01100     setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
01101     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
01102     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
01103 
01104     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
01105     // even though v8i16 is a legal type.
01106     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
01107     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
01108     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
01109 
01110     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
01111     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
01112     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
01113 
01114     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
01115     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
01116 
01117     for (MVT VT : MVT::fp_vector_valuetypes())
01118       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
01119 
01120     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
01121     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
01122 
01123     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
01124     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
01125 
01126     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
01127     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
01128 
01129     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
01130     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
01131     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
01132     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
01133 
01134     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
01135     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
01136     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
01137 
01138     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
01139     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
01140     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
01141     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
01142     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
01143     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
01144     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
01145     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
01146     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
01147     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
01148     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
01149     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
01150 
01151     setOperationAction(ISD::CTPOP,             MVT::v32i8, Custom);
01152     setOperationAction(ISD::CTPOP,             MVT::v16i16, Custom);
01153     setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
01154     setOperationAction(ISD::CTPOP,             MVT::v4i64, Custom);
01155 
01156     setOperationAction(ISD::CTTZ,              MVT::v32i8, Custom);
01157     setOperationAction(ISD::CTTZ,              MVT::v16i16, Custom);
01158     setOperationAction(ISD::CTTZ,              MVT::v8i32, Custom);
01159     setOperationAction(ISD::CTTZ,              MVT::v4i64, Custom);
01160     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v32i8, Custom);
01161     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v16i16, Custom);
01162     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v8i32, Custom);
01163     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v4i64, Custom);
01164 
01165     if (Subtarget->hasAnyFMA()) {
01166       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
01167       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
01168       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
01169       setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
01170       setOperationAction(ISD::FMA,             MVT::f32, Legal);
01171       setOperationAction(ISD::FMA,             MVT::f64, Legal);
01172     }
01173 
01174     if (Subtarget->hasInt256()) {
01175       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
01176       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
01177       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
01178       setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
01179 
01180       setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
01181       setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
01182       setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
01183       setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
01184 
01185       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01186       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
01187       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
01188       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01189 
01190       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
01191       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
01192       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
01193       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
01194 
01195       setOperationAction(ISD::SMAX,            MVT::v32i8,  Legal);
01196       setOperationAction(ISD::SMAX,            MVT::v16i16, Legal);
01197       setOperationAction(ISD::SMAX,            MVT::v8i32,  Legal);
01198       setOperationAction(ISD::UMAX,            MVT::v32i8,  Legal);
01199       setOperationAction(ISD::UMAX,            MVT::v16i16, Legal);
01200       setOperationAction(ISD::UMAX,            MVT::v8i32,  Legal);
01201       setOperationAction(ISD::SMIN,            MVT::v32i8,  Legal);
01202       setOperationAction(ISD::SMIN,            MVT::v16i16, Legal);
01203       setOperationAction(ISD::SMIN,            MVT::v8i32,  Legal);
01204       setOperationAction(ISD::UMIN,            MVT::v32i8,  Legal);
01205       setOperationAction(ISD::UMIN,            MVT::v16i16, Legal);
01206       setOperationAction(ISD::UMIN,            MVT::v8i32,  Legal);
01207 
01208       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
01209       // when we have a 256bit-wide blend with immediate.
01210       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
01211 
01212       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
01213       setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01214       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01215       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01216       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01217       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01218       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01219 
01220       setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
01221       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
01222       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
01223       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
01224       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
01225       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
01226     } else {
01227       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
01228       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
01229       setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
01230       setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
01231 
01232       setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
01233       setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
01234       setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
01235       setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
01236 
01237       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
01238       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
01239       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
01240       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
01241 
01242       setOperationAction(ISD::SMAX,            MVT::v32i8,  Custom);
01243       setOperationAction(ISD::SMAX,            MVT::v16i16, Custom);
01244       setOperationAction(ISD::SMAX,            MVT::v8i32,  Custom);
01245       setOperationAction(ISD::UMAX,            MVT::v32i8,  Custom);
01246       setOperationAction(ISD::UMAX,            MVT::v16i16, Custom);
01247       setOperationAction(ISD::UMAX,            MVT::v8i32,  Custom);
01248       setOperationAction(ISD::SMIN,            MVT::v32i8,  Custom);
01249       setOperationAction(ISD::SMIN,            MVT::v16i16, Custom);
01250       setOperationAction(ISD::SMIN,            MVT::v8i32,  Custom);
01251       setOperationAction(ISD::UMIN,            MVT::v32i8,  Custom);
01252       setOperationAction(ISD::UMIN,            MVT::v16i16, Custom);
01253       setOperationAction(ISD::UMIN,            MVT::v8i32,  Custom);
01254     }
01255 
01256     // In the customized shift lowering, the legal cases in AVX2 will be
01257     // recognized.
01258     setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
01259     setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
01260 
01261     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
01262     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
01263 
01264     setOperationAction(ISD::SRA,               MVT::v4i64, Custom);
01265     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
01266 
01267     // Custom lower several nodes for 256-bit types.
01268     for (MVT VT : MVT::vector_valuetypes()) {
01269       if (VT.getScalarSizeInBits() >= 32) {
01270         setOperationAction(ISD::MLOAD,  VT, Legal);
01271         setOperationAction(ISD::MSTORE, VT, Legal);
01272       }
01273       // Extract subvector is special because the value type
01274       // (result) is 128-bit but the source is 256-bit wide.
01275       if (VT.is128BitVector()) {
01276         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01277       }
01278       // Do not attempt to custom lower other non-256-bit vectors
01279       if (!VT.is256BitVector())
01280         continue;
01281 
01282       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
01283       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
01284       setOperationAction(ISD::VSELECT,            VT, Custom);
01285       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
01286       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
01287       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
01288       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
01289       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
01290     }
01291 
01292     if (Subtarget->hasInt256())
01293       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
01294 
01295     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
01296     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
01297       setOperationAction(ISD::AND,    VT, Promote);
01298       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
01299       setOperationAction(ISD::OR,     VT, Promote);
01300       AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
01301       setOperationAction(ISD::XOR,    VT, Promote);
01302       AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
01303       setOperationAction(ISD::LOAD,   VT, Promote);
01304       AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
01305       setOperationAction(ISD::SELECT, VT, Promote);
01306       AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
01307     }
01308   }
01309 
01310   if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
01311     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
01312     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
01313     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
01314     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
01315 
01316     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
01317     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
01318     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
01319 
01320     for (MVT VT : MVT::fp_vector_valuetypes())
01321       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
01322 
01323     setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
01324     setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
01325     setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
01326     setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
01327     setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
01328     setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
01329     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
01330     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
01331     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
01332     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
01333     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
01334     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
01335 
01336     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
01337     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
01338     setOperationAction(ISD::SELECT_CC,          MVT::i1,    Expand);
01339     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
01340     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
01341     setOperationAction(ISD::AND,                MVT::i1,    Legal);
01342     setOperationAction(ISD::SUB,                MVT::i1,    Custom);
01343     setOperationAction(ISD::ADD,                MVT::i1,    Custom);
01344     setOperationAction(ISD::MUL,                MVT::i1,    Custom);
01345     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
01346     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
01347     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
01348     setOperationAction(ISD::LOAD,               MVT::v16i32, Legal);
01349     setOperationAction(ISD::LOAD,               MVT::v16i1, Legal);
01350 
01351     setOperationAction(ISD::FADD,               MVT::v16f32, Legal);
01352     setOperationAction(ISD::FSUB,               MVT::v16f32, Legal);
01353     setOperationAction(ISD::FMUL,               MVT::v16f32, Legal);
01354     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
01355     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
01356     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
01357     setOperationAction(ISD::FABS,               MVT::v16f32, Custom);
01358 
01359     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
01360     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
01361     setOperationAction(ISD::FMUL,               MVT::v8f64, Legal);
01362     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
01363     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
01364     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
01365     setOperationAction(ISD::FABS,               MVT::v8f64, Custom);
01366     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
01367     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
01368 
01369     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
01370     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
01371     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
01372     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
01373     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
01374     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
01375     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
01376     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i8,  Promote);
01377     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Promote);
01378     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
01379     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
01380     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
01381     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
01382     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
01383     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
01384     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
01385 
01386     setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
01387     setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
01388     setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
01389     setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
01390     setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
01391     if (Subtarget->hasVLX()){
01392       setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
01393       setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
01394       setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
01395       setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
01396       setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
01397 
01398       setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
01399       setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
01400       setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
01401       setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
01402       setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
01403     } else {
01404       setOperationAction(ISD::MLOAD,    MVT::v8i32, Custom);
01405       setOperationAction(ISD::MLOAD,    MVT::v8f32, Custom);
01406       setOperationAction(ISD::MSTORE,   MVT::v8i32, Custom);
01407       setOperationAction(ISD::MSTORE,   MVT::v8f32, Custom);
01408     }
01409     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
01410     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
01411     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
01412     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i1,  Custom);
01413     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v16i1, Custom);
01414     if (Subtarget->hasDQI()) {
01415       setOperationAction(ISD::TRUNCATE,         MVT::v2i1, Custom);
01416       setOperationAction(ISD::TRUNCATE,         MVT::v4i1, Custom);
01417 
01418       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i64, Legal);
01419       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
01420       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
01421       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
01422       if (Subtarget->hasVLX()) {
01423         setOperationAction(ISD::SINT_TO_FP,    MVT::v4i64, Legal);
01424         setOperationAction(ISD::SINT_TO_FP,    MVT::v2i64, Legal);
01425         setOperationAction(ISD::UINT_TO_FP,    MVT::v4i64, Legal);
01426         setOperationAction(ISD::UINT_TO_FP,    MVT::v2i64, Legal);
01427         setOperationAction(ISD::FP_TO_SINT,    MVT::v4i64, Legal);
01428         setOperationAction(ISD::FP_TO_SINT,    MVT::v2i64, Legal);
01429         setOperationAction(ISD::FP_TO_UINT,    MVT::v4i64, Legal);
01430         setOperationAction(ISD::FP_TO_UINT,    MVT::v2i64, Legal);
01431       }
01432     }
01433     if (Subtarget->hasVLX()) {
01434       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i32, Legal);
01435       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i32, Legal);
01436       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
01437       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i32, Legal);
01438       setOperationAction(ISD::SINT_TO_FP,       MVT::v4i32, Legal);
01439       setOperationAction(ISD::UINT_TO_FP,       MVT::v4i32, Legal);
01440       setOperationAction(ISD::FP_TO_SINT,       MVT::v4i32, Legal);
01441       setOperationAction(ISD::FP_TO_UINT,       MVT::v4i32, Legal);
01442     }
01443     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
01444     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
01445     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
01446     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
01447     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
01448     setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
01449     setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
01450     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
01451     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
01452     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
01453     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
01454     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
01455     if (Subtarget->hasDQI()) {
01456       setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
01457       setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
01458     }
01459     setOperationAction(ISD::FFLOOR,             MVT::v16f32, Legal);
01460     setOperationAction(ISD::FFLOOR,             MVT::v8f64, Legal);
01461     setOperationAction(ISD::FCEIL,              MVT::v16f32, Legal);
01462     setOperationAction(ISD::FCEIL,              MVT::v8f64, Legal);
01463     setOperationAction(ISD::FTRUNC,             MVT::v16f32, Legal);
01464     setOperationAction(ISD::FTRUNC,             MVT::v8f64, Legal);
01465     setOperationAction(ISD::FRINT,              MVT::v16f32, Legal);
01466     setOperationAction(ISD::FRINT,              MVT::v8f64, Legal);
01467     setOperationAction(ISD::FNEARBYINT,         MVT::v16f32, Legal);
01468     setOperationAction(ISD::FNEARBYINT,         MVT::v8f64, Legal);
01469 
01470     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
01471     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
01472     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
01473     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
01474     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1,   Custom);
01475 
01476     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
01477     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
01478 
01479     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
01480 
01481     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
01482     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
01483     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
01484     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
01485     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
01486     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
01487     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
01488     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
01489     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
01490     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
01491     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
01492     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
01493 
01494     setOperationAction(ISD::SMAX,               MVT::v16i32, Legal);
01495     setOperationAction(ISD::SMAX,               MVT::v8i64, Legal);
01496     setOperationAction(ISD::UMAX,               MVT::v16i32, Legal);
01497     setOperationAction(ISD::UMAX,               MVT::v8i64, Legal);
01498     setOperationAction(ISD::SMIN,               MVT::v16i32, Legal);
01499     setOperationAction(ISD::SMIN,               MVT::v8i64, Legal);
01500     setOperationAction(ISD::UMIN,               MVT::v16i32, Legal);
01501     setOperationAction(ISD::UMIN,               MVT::v8i64, Legal);
01502 
01503     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
01504     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
01505 
01506     setOperationAction(ISD::SUB,                MVT::v8i64, Legal);
01507     setOperationAction(ISD::SUB,                MVT::v16i32, Legal);
01508 
01509     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
01510 
01511     setOperationAction(ISD::SRL,                MVT::v8i64, Custom);
01512     setOperationAction(ISD::SRL,                MVT::v16i32, Custom);
01513 
01514     setOperationAction(ISD::SHL,                MVT::v8i64, Custom);
01515     setOperationAction(ISD::SHL,                MVT::v16i32, Custom);
01516 
01517     setOperationAction(ISD::SRA,                MVT::v8i64, Custom);
01518     setOperationAction(ISD::SRA,                MVT::v16i32, Custom);
01519 
01520     setOperationAction(ISD::AND,                MVT::v8i64, Legal);
01521     setOperationAction(ISD::OR,                 MVT::v8i64, Legal);
01522     setOperationAction(ISD::XOR,                MVT::v8i64, Legal);
01523     setOperationAction(ISD::AND,                MVT::v16i32, Legal);
01524     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
01525     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
01526 
01527     if (Subtarget->hasCDI()) {
01528       setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
01529       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
01530       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i64,  Expand);
01531       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i32, Expand);
01532 
01533       setOperationAction(ISD::CTLZ,             MVT::v8i16,  Custom);
01534       setOperationAction(ISD::CTLZ,             MVT::v16i8,  Custom);
01535       setOperationAction(ISD::CTLZ,             MVT::v16i16, Custom);
01536       setOperationAction(ISD::CTLZ,             MVT::v32i8,  Custom);
01537       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i16,  Expand);
01538       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i8,  Expand);
01539       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i16, Expand);
01540       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v32i8,  Expand);
01541 
01542       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i64,  Custom);
01543       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v16i32, Custom);
01544 
01545       if (Subtarget->hasVLX()) {
01546         setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
01547         setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
01548         setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
01549         setOperationAction(ISD::CTLZ,             MVT::v4i32, Legal);
01550         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i64, Expand);
01551         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i32, Expand);
01552         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Expand);
01553         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Expand);
01554 
01555         setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i64, Custom);
01556         setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i32, Custom);
01557         setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v2i64, Custom);
01558         setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i32, Custom);
01559       } else {
01560         setOperationAction(ISD::CTLZ,             MVT::v4i64, Custom);
01561         setOperationAction(ISD::CTLZ,             MVT::v8i32, Custom);
01562         setOperationAction(ISD::CTLZ,             MVT::v2i64, Custom);
01563         setOperationAction(ISD::CTLZ,             MVT::v4i32, Custom);
01564         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i64, Expand);
01565         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i32, Expand);
01566         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Expand);
01567         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Expand);
01568       }
01569     } // Subtarget->hasCDI()
01570 
01571     if (Subtarget->hasDQI()) {
01572       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
01573       setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
01574       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
01575     }
01576     // Custom lower several nodes.
01577     for (MVT VT : MVT::vector_valuetypes()) {
01578       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
01579       if (EltSize == 1) {
01580         setOperationAction(ISD::AND, VT, Legal);
01581         setOperationAction(ISD::OR,  VT, Legal);
01582         setOperationAction(ISD::XOR,  VT, Legal);
01583       }
01584       if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
01585         setOperationAction(ISD::MGATHER,  VT, Custom);
01586         setOperationAction(ISD::MSCATTER, VT, Custom);
01587       }
01588       // Extract subvector is special because the value type
01589       // (result) is 256/128-bit but the source is 512-bit wide.
01590       if (VT.is128BitVector() || VT.is256BitVector()) {
01591         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
01592       }
01593       if (VT.getVectorElementType() == MVT::i1)
01594         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
01595 
01596       // Do not attempt to custom lower other non-512-bit vectors
01597       if (!VT.is512BitVector())
01598         continue;
01599 
01600       if (EltSize >= 32) {
01601         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
01602         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
01603         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01604         setOperationAction(ISD::VSELECT,             VT, Legal);
01605         setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
01606         setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
01607         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
01608         setOperationAction(ISD::MLOAD,               VT, Legal);
01609         setOperationAction(ISD::MSTORE,              VT, Legal);
01610         setOperationAction(ISD::MGATHER,  VT, Legal);
01611         setOperationAction(ISD::MSCATTER, VT, Custom);
01612       }
01613     }
01614     for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
01615       setOperationAction(ISD::SELECT, VT, Promote);
01616       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
01617     }
01618   }// has  AVX-512
01619 
01620   if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
01621     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
01622     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
01623 
01624     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
01625     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
01626 
01627     setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
01628     setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
01629     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
01630     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
01631     setOperationAction(ISD::ADD,                MVT::v32i16, Legal);
01632     setOperationAction(ISD::ADD,                MVT::v64i8, Legal);
01633     setOperationAction(ISD::SUB,                MVT::v32i16, Legal);
01634     setOperationAction(ISD::SUB,                MVT::v64i8, Legal);
01635     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
01636     setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
01637     setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
01638     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
01639     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
01640     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
01641     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i8, Custom);
01642     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
01643     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
01644     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i16, Custom);
01645     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Custom);
01646     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
01647     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
01648     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
01649     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
01650     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
01651     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
01652     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
01653     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
01654     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i16, Custom);
01655     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i8, Custom);
01656     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
01657     setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
01658     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
01659     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
01660     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i16, Custom);
01661     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i8, Custom);
01662     setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
01663     setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
01664     setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
01665     setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
01666     setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
01667     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i1, Custom);
01668     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i1, Custom);
01669 
01670     setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
01671     setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
01672     setOperationAction(ISD::UMAX,               MVT::v64i8, Legal);
01673     setOperationAction(ISD::UMAX,               MVT::v32i16, Legal);
01674     setOperationAction(ISD::SMIN,               MVT::v64i8, Legal);
01675     setOperationAction(ISD::SMIN,               MVT::v32i16, Legal);
01676     setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
01677     setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
01678 
01679     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
01680     setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
01681     if (Subtarget->hasVLX())
01682       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
01683 
01684     if (Subtarget->hasCDI()) {
01685       setOperationAction(ISD::CTLZ,            MVT::v32i16, Custom);
01686       setOperationAction(ISD::CTLZ,            MVT::v64i8,  Custom);
01687       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
01688       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8,  Expand);
01689     }
01690 
01691     for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
01692       setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
01693       setOperationAction(ISD::VSELECT,             VT, Legal);
01694       setOperationAction(ISD::SRL,                 VT, Custom);
01695       setOperationAction(ISD::SHL,                 VT, Custom);
01696       setOperationAction(ISD::SRA,                 VT, Custom);
01697 
01698       setOperationAction(ISD::AND,    VT, Promote);
01699       AddPromotedToType (ISD::AND,    VT, MVT::v8i64);
01700       setOperationAction(ISD::OR,     VT, Promote);
01701       AddPromotedToType (ISD::OR,     VT, MVT::v8i64);
01702       setOperationAction(ISD::XOR,    VT, Promote);
01703       AddPromotedToType (ISD::XOR,    VT, MVT::v8i64);
01704     }
01705   }
01706 
01707   if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
01708     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
01709     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
01710 
01711     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
01712     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
01713     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
01714     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
01715     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
01716     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
01717     setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
01718     setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
01719     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
01720     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
01721     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i1, Custom);
01722     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i1, Custom);
01723 
01724     setOperationAction(ISD::AND,                MVT::v8i32, Legal);
01725     setOperationAction(ISD::OR,                 MVT::v8i32, Legal);
01726     setOperationAction(ISD::XOR,                MVT::v8i32, Legal);
01727     setOperationAction(ISD::AND,                MVT::v4i32, Legal);
01728     setOperationAction(ISD::OR,                 MVT::v4i32, Legal);
01729     setOperationAction(ISD::XOR,                MVT::v4i32, Legal);
01730     setOperationAction(ISD::SRA,                MVT::v2i64, Custom);
01731     setOperationAction(ISD::SRA,                MVT::v4i64, Custom);
01732 
01733     setOperationAction(ISD::SMAX,               MVT::v2i64, Legal);
01734     setOperationAction(ISD::SMAX,               MVT::v4i64, Legal);
01735     setOperationAction(ISD::UMAX,               MVT::v2i64, Legal);
01736     setOperationAction(ISD::UMAX,               MVT::v4i64, Legal);
01737     setOperationAction(ISD::SMIN,               MVT::v2i64, Legal);
01738     setOperationAction(ISD::SMIN,               MVT::v4i64, Legal);
01739     setOperationAction(ISD::UMIN,               MVT::v2i64, Legal);
01740     setOperationAction(ISD::UMIN,               MVT::v4i64, Legal);
01741   }
01742 
01743   // We want to custom lower some of our intrinsics.
01744   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
01745   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
01746   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
01747   if (!Subtarget->is64Bit()) {
01748     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
01749     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
01750   }
01751 
01752   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
01753   // handle type legalization for these operations here.
01754   //
01755   // FIXME: We really should do custom legalization for addition and
01756   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
01757   // than generic legalization for 64-bit multiplication-with-overflow, though.
01758   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
01759     if (VT == MVT::i64 && !Subtarget->is64Bit())
01760       continue;
01761     // Add/Sub/Mul with overflow operations are custom lowered.
01762     setOperationAction(ISD::SADDO, VT, Custom);
01763     setOperationAction(ISD::UADDO, VT, Custom);
01764     setOperationAction(ISD::SSUBO, VT, Custom);
01765     setOperationAction(ISD::USUBO, VT, Custom);
01766     setOperationAction(ISD::SMULO, VT, Custom);
01767     setOperationAction(ISD::UMULO, VT, Custom);
01768   }
01769 
01770   if (!Subtarget->is64Bit()) {
01771     // These libcalls are not available in 32-bit.
01772     setLibcallName(RTLIB::SHL_I128, nullptr);
01773     setLibcallName(RTLIB::SRL_I128, nullptr);
01774     setLibcallName(RTLIB::SRA_I128, nullptr);
01775   }
01776 
01777   // Combine sin / cos into one node or libcall if possible.
01778   if (Subtarget->hasSinCos()) {
01779     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
01780     setLibcallName(RTLIB::SINCOS_F64, "sincos");
01781     if (Subtarget->isTargetDarwin()) {
01782       // For MacOSX, we don't want the normal expansion of a libcall to sincos.
01783       // We want to issue a libcall to __sincos_stret to avoid memory traffic.
01784       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
01785       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
01786     }
01787   }
01788 
01789   if (Subtarget->isTargetWin64()) {
01790     setOperationAction(ISD::SDIV, MVT::i128, Custom);
01791     setOperationAction(ISD::UDIV, MVT::i128, Custom);
01792     setOperationAction(ISD::SREM, MVT::i128, Custom);
01793     setOperationAction(ISD::UREM, MVT::i128, Custom);
01794     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
01795     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
01796   }
01797 
01798   // We have target-specific dag combine patterns for the following nodes:
01799   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
01800   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
01801   setTargetDAGCombine(ISD::BITCAST);
01802   setTargetDAGCombine(ISD::VSELECT);
01803   setTargetDAGCombine(ISD::SELECT);
01804   setTargetDAGCombine(ISD::SHL);
01805   setTargetDAGCombine(ISD::SRA);
01806   setTargetDAGCombine(ISD::SRL);
01807   setTargetDAGCombine(ISD::OR);
01808   setTargetDAGCombine(ISD::AND);
01809   setTargetDAGCombine(ISD::ADD);
01810   setTargetDAGCombine(ISD::FADD);
01811   setTargetDAGCombine(ISD::FSUB);
01812   setTargetDAGCombine(ISD::FNEG);
01813   setTargetDAGCombine(ISD::FMA);
01814   setTargetDAGCombine(ISD::FMINNUM);
01815   setTargetDAGCombine(ISD::FMAXNUM);
01816   setTargetDAGCombine(ISD::SUB);
01817   setTargetDAGCombine(ISD::LOAD);
01818   setTargetDAGCombine(ISD::MLOAD);
01819   setTargetDAGCombine(ISD::STORE);
01820   setTargetDAGCombine(ISD::MSTORE);
01821   setTargetDAGCombine(ISD::TRUNCATE);
01822   setTargetDAGCombine(ISD::ZERO_EXTEND);
01823   setTargetDAGCombine(ISD::ANY_EXTEND);
01824   setTargetDAGCombine(ISD::SIGN_EXTEND);
01825   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
01826   setTargetDAGCombine(ISD::SINT_TO_FP);
01827   setTargetDAGCombine(ISD::UINT_TO_FP);
01828   setTargetDAGCombine(ISD::SETCC);
01829   setTargetDAGCombine(ISD::BUILD_VECTOR);
01830   setTargetDAGCombine(ISD::MUL);
01831   setTargetDAGCombine(ISD::XOR);
01832   setTargetDAGCombine(ISD::MSCATTER);
01833   setTargetDAGCombine(ISD::MGATHER);
01834 
01835   computeRegisterProperties(Subtarget->getRegisterInfo());
01836 
01837   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
01838   MaxStoresPerMemsetOptSize = 8;
01839   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
01840   MaxStoresPerMemcpyOptSize = 4;
01841   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
01842   MaxStoresPerMemmoveOptSize = 4;
01843   setPrefLoopAlignment(4); // 2^4 bytes.
01844 
01845   // A predictable cmov does not hurt on an in-order CPU.
01846   // FIXME: Use a CPU attribute to trigger this, not a CPU model.
01847   PredictableSelectIsExpensive = !Subtarget->isAtom();
01848   EnableExtLdPromotion = true;
01849   setPrefFunctionAlignment(4); // 2^4 bytes.
01850 
01851   verifyIntrinsicTables();
01852 }
01853 
01854 // This has so far only been implemented for 64-bit MachO.
01855 bool X86TargetLowering::useLoadStackGuardNode() const {
01856   return Subtarget->isTargetMachO() && Subtarget->is64Bit();
01857 }
01858 
01859 TargetLoweringBase::LegalizeTypeAction
01860 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
01861   if (ExperimentalVectorWideningLegalization &&
01862       VT.getVectorNumElements() != 1 &&
01863       VT.getVectorElementType().getSimpleVT() != MVT::i1)
01864     return TypeWidenVector;
01865 
01866   return TargetLoweringBase::getPreferredVectorAction(VT);
01867 }
01868 
01869 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
01870                                           EVT VT) const {
01871   if (!VT.isVector())
01872     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
01873 
01874   if (VT.isSimple()) {
01875     MVT VVT = VT.getSimpleVT();
01876     const unsigned NumElts = VVT.getVectorNumElements();
01877     const MVT EltVT = VVT.getVectorElementType();
01878     if (VVT.is512BitVector()) {
01879       if (Subtarget->hasAVX512())
01880         if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01881             EltVT == MVT::f32 || EltVT == MVT::f64)
01882           switch(NumElts) {
01883           case  8: return MVT::v8i1;
01884           case 16: return MVT::v16i1;
01885         }
01886       if (Subtarget->hasBWI())
01887         if (EltVT == MVT::i8 || EltVT == MVT::i16)
01888           switch(NumElts) {
01889           case 32: return MVT::v32i1;
01890           case 64: return MVT::v64i1;
01891         }
01892     }
01893 
01894     if (VVT.is256BitVector() || VVT.is128BitVector()) {
01895       if (Subtarget->hasVLX())
01896         if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
01897             EltVT == MVT::f32 || EltVT == MVT::f64)
01898           switch(NumElts) {
01899           case 2: return MVT::v2i1;
01900           case 4: return MVT::v4i1;
01901           case 8: return MVT::v8i1;
01902         }
01903       if (Subtarget->hasBWI() && Subtarget->hasVLX())
01904         if (EltVT == MVT::i8 || EltVT == MVT::i16)
01905           switch(NumElts) {
01906           case  8: return MVT::v8i1;
01907           case 16: return MVT::v16i1;
01908           case 32: return MVT::v32i1;
01909         }
01910     }
01911   }
01912 
01913   return VT.changeVectorElementTypeToInteger();
01914 }
01915 
01916 /// Helper for getByValTypeAlignment to determine
01917 /// the desired ByVal argument alignment.
01918 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
01919   if (MaxAlign == 16)
01920     return;
01921   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
01922     if (VTy->getBitWidth() == 128)
01923       MaxAlign = 16;
01924   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
01925     unsigned EltAlign = 0;
01926     getMaxByValAlign(ATy->getElementType(), EltAlign);
01927     if (EltAlign > MaxAlign)
01928       MaxAlign = EltAlign;
01929   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
01930     for (auto *EltTy : STy->elements()) {
01931       unsigned EltAlign = 0;
01932       getMaxByValAlign(EltTy, EltAlign);
01933       if (EltAlign > MaxAlign)
01934         MaxAlign = EltAlign;
01935       if (MaxAlign == 16)
01936         break;
01937     }
01938   }
01939 }
01940 
01941 /// Return the desired alignment for ByVal aggregate
01942 /// function arguments in the caller parameter area. For X86, aggregates
01943 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
01944 /// are at 4-byte boundaries.
01945 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
01946                                                   const DataLayout &DL) const {
01947   if (Subtarget->is64Bit()) {
01948     // Max of 8 and alignment of type.
01949     unsigned TyAlign = DL.getABITypeAlignment(Ty);
01950     if (TyAlign > 8)
01951       return TyAlign;
01952     return 8;
01953   }
01954 
01955   unsigned Align = 4;
01956   if (Subtarget->hasSSE1())
01957     getMaxByValAlign(Ty, Align);
01958   return Align;
01959 }
01960 
01961 /// Returns the target specific optimal type for load
01962 /// and store operations as a result of memset, memcpy, and memmove
01963 /// lowering. If DstAlign is zero that means it's safe to destination
01964 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
01965 /// means there isn't a need to check it against alignment requirement,
01966 /// probably because the source does not need to be loaded. If 'IsMemset' is
01967 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
01968 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
01969 /// source is constant so it does not need to be loaded.
01970 /// It returns EVT::Other if the type should be determined using generic
01971 /// target-independent logic.
01972 EVT
01973 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
01974                                        unsigned DstAlign, unsigned SrcAlign,
01975                                        bool IsMemset, bool ZeroMemset,
01976                                        bool MemcpyStrSrc,
01977                                        MachineFunction &MF) const {
01978   const Function *F = MF.getFunction();
01979   if ((!IsMemset || ZeroMemset) &&
01980       !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
01981     if (Size >= 16 &&
01982         (!Subtarget->isUnalignedMem16Slow() ||
01983          ((DstAlign == 0 || DstAlign >= 16) &&
01984           (SrcAlign == 0 || SrcAlign >= 16)))) {
01985       if (Size >= 32) {
01986         // FIXME: Check if unaligned 32-byte accesses are slow.
01987         if (Subtarget->hasInt256())
01988           return MVT::v8i32;
01989         if (Subtarget->hasFp256())
01990           return MVT::v8f32;
01991       }
01992       if (Subtarget->hasSSE2())
01993         return MVT::v4i32;
01994       if (Subtarget->hasSSE1())
01995         return MVT::v4f32;
01996     } else if (!MemcpyStrSrc && Size >= 8 &&
01997                !Subtarget->is64Bit() &&
01998                Subtarget->hasSSE2()) {
01999       // Do not use f64 to lower memcpy if source is string constant. It's
02000       // better to use i32 to avoid the loads.
02001       return MVT::f64;
02002     }
02003   }
02004   // This is a compromise. If we reach here, unaligned accesses may be slow on
02005   // this target. However, creating smaller, aligned accesses could be even
02006   // slower and would certainly be a lot more code.
02007   if (Subtarget->is64Bit() && Size >= 8)
02008     return MVT::i64;
02009   return MVT::i32;
02010 }
02011 
02012 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
02013   if (VT == MVT::f32)
02014     return X86ScalarSSEf32;
02015   else if (VT == MVT::f64)
02016     return X86ScalarSSEf64;
02017   return true;
02018 }
02019 
02020 bool
02021 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
02022                                                   unsigned,
02023                                                   unsigned,
02024                                                   bool *Fast) const {
02025   if (Fast) {
02026     switch (VT.getSizeInBits()) {
02027     default:
02028       // 8-byte and under are always assumed to be fast.
02029       *Fast = true;
02030       break;
02031     case 128:
02032       *Fast = !Subtarget->isUnalignedMem16Slow();
02033       break;
02034     case 256:
02035       *Fast = !Subtarget->isUnalignedMem32Slow();
02036       break;
02037     // TODO: What about AVX-512 (512-bit) accesses?
02038     }
02039   }
02040   // Misaligned accesses of any size are always allowed.
02041   return true;
02042 }
02043 
02044 /// Return the entry encoding for a jump table in the
02045 /// current function.  The returned value is a member of the
02046 /// MachineJumpTableInfo::JTEntryKind enum.
02047 unsigned X86TargetLowering::getJumpTableEncoding() const {
02048   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
02049   // symbol.
02050   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
02051       Subtarget->isPICStyleGOT())
02052     return MachineJumpTableInfo::EK_Custom32;
02053 
02054   // Otherwise, use the normal jump table encoding heuristics.
02055   return TargetLowering::getJumpTableEncoding();
02056 }
02057 
02058 bool X86TargetLowering::useSoftFloat() const {
02059   return Subtarget->useSoftFloat();
02060 }
02061 
02062 const MCExpr *
02063 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
02064                                              const MachineBasicBlock *MBB,
02065                                              unsigned uid,MCContext &Ctx) const{
02066   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
02067          Subtarget->isPICStyleGOT());
02068   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
02069   // entries.
02070   return MCSymbolRefExpr::create(MBB->getSymbol(),
02071                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
02072 }
02073 
02074 /// Returns relocation base for the given PIC jumptable.
02075 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
02076                                                     SelectionDAG &DAG) const {
02077   if (!Subtarget->is64Bit())
02078     // This doesn't have SDLoc associated with it, but is not really the
02079     // same as a Register.
02080     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
02081                        getPointerTy(DAG.getDataLayout()));
02082   return Table;
02083 }
02084 
02085 /// This returns the relocation base for the given PIC jumptable,
02086 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
02087 const MCExpr *X86TargetLowering::
02088 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
02089                              MCContext &Ctx) const {
02090   // X86-64 uses RIP relative addressing based on the jump table label.
02091   if (Subtarget->isPICStyleRIPRel())
02092     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
02093 
02094   // Otherwise, the reference is relative to the PIC base.
02095   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
02096 }
02097 
02098 std::pair<const TargetRegisterClass *, uint8_t>
02099 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
02100                                            MVT VT) const {
02101   const TargetRegisterClass *RRC = nullptr;
02102   uint8_t Cost = 1;
02103   switch (VT.SimpleTy) {
02104   default:
02105     return TargetLowering::findRepresentativeClass(TRI, VT);
02106   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
02107     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
02108     break;
02109   case MVT::x86mmx:
02110     RRC = &X86::VR64RegClass;
02111     break;
02112   case MVT::f32: case MVT::f64:
02113   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
02114   case MVT::v4f32: case MVT::v2f64:
02115   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
02116   case MVT::v4f64:
02117     RRC = &X86::VR128RegClass;
02118     break;
02119   }
02120   return std::make_pair(RRC, Cost);
02121 }
02122 
02123 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
02124                                                unsigned &Offset) const {
02125   if (!Subtarget->isTargetLinux())
02126     return false;
02127 
02128   if (Subtarget->is64Bit()) {
02129     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
02130     Offset = 0x28;
02131     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
02132       AddressSpace = 256;
02133     else
02134       AddressSpace = 257;
02135   } else {
02136     // %gs:0x14 on i386
02137     Offset = 0x14;
02138     AddressSpace = 256;
02139   }
02140   return true;
02141 }
02142 
02143 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
02144   if (!Subtarget->isTargetAndroid())
02145     return TargetLowering::getSafeStackPointerLocation(IRB);
02146 
02147   // Android provides a fixed TLS slot for the SafeStack pointer. See the
02148   // definition of TLS_SLOT_SAFESTACK in
02149   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
02150   unsigned AddressSpace, Offset;
02151   if (Subtarget->is64Bit()) {
02152     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
02153     Offset = 0x48;
02154     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
02155       AddressSpace = 256;
02156     else
02157       AddressSpace = 257;
02158   } else {
02159     // %gs:0x24 on i386
02160     Offset = 0x24;
02161     AddressSpace = 256;
02162   }
02163 
02164   return ConstantExpr::getIntToPtr(
02165       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
02166       Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
02167 }
02168 
02169 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
02170                                             unsigned DestAS) const {
02171   assert(SrcAS != DestAS && "Expected different address spaces!");
02172 
02173   return SrcAS < 256 && DestAS < 256;
02174 }
02175 
02176 //===----------------------------------------------------------------------===//
02177 //               Return Value Calling Convention Implementation
02178 //===----------------------------------------------------------------------===//
02179 
02180 #include "X86GenCallingConv.inc"
02181 
02182 bool X86TargetLowering::CanLowerReturn(
02183     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
02184     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
02185   SmallVector<CCValAssign, 16> RVLocs;
02186   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
02187   return CCInfo.CheckReturn(Outs, RetCC_X86);
02188 }
02189 
02190 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
02191   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
02192   return ScratchRegs;
02193 }
02194 
02195 SDValue
02196 X86TargetLowering::LowerReturn(SDValue Chain,
02197                                CallingConv::ID CallConv, bool isVarArg,
02198                                const SmallVectorImpl<ISD::OutputArg> &Outs,
02199                                const SmallVectorImpl<SDValue> &OutVals,
02200                                SDLoc dl, SelectionDAG &DAG) const {
02201   MachineFunction &MF = DAG.getMachineFunction();
02202   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02203 
02204   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
02205     report_fatal_error("X86 interrupts may not return any value");
02206 
02207   SmallVector<CCValAssign, 16> RVLocs;
02208   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
02209   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
02210 
02211   SDValue Flag;
02212   SmallVector<SDValue, 6> RetOps;
02213   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
02214   // Operand #1 = Bytes To Pop
02215   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
02216                    MVT::i16));
02217 
02218   // Copy the result values into the output registers.
02219   for (unsigned i = 0; i != RVLocs.size(); ++i) {
02220     CCValAssign &VA = RVLocs[i];
02221     assert(VA.isRegLoc() && "Can only return in registers!");
02222     SDValue ValToCopy = OutVals[i];
02223     EVT ValVT = ValToCopy.getValueType();
02224 
02225     // Promote values to the appropriate types.
02226     if (VA.getLocInfo() == CCValAssign::SExt)
02227       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
02228     else if (VA.getLocInfo() == CCValAssign::ZExt)
02229       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
02230     else if (VA.getLocInfo() == CCValAssign::AExt) {
02231       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
02232         ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
02233       else
02234         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
02235     }
02236     else if (VA.getLocInfo() == CCValAssign::BCvt)
02237       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
02238 
02239     assert(VA.getLocInfo() != CCValAssign::FPExt &&
02240            "Unexpected FP-extend for return value.");
02241 
02242     // If this is x86-64, and we disabled SSE, we can't return FP values,
02243     // or SSE or MMX vectors.
02244     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
02245          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
02246           (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
02247       report_fatal_error("SSE register return with SSE disabled");
02248     }
02249     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
02250     // llvm-gcc has never done it right and no one has noticed, so this
02251     // should be OK for now.
02252     if (ValVT == MVT::f64 &&
02253         (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
02254       report_fatal_error("SSE2 register return with SSE2 disabled");
02255 
02256     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
02257     // the RET instruction and handled by the FP Stackifier.
02258     if (VA.getLocReg() == X86::FP0 ||
02259         VA.getLocReg() == X86::FP1) {
02260       // If this is a copy from an xmm register to ST(0), use an FPExtend to
02261       // change the value to the FP stack register class.
02262       if (isScalarFPTypeInSSEReg(VA.getValVT()))
02263         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
02264       RetOps.push_back(ValToCopy);
02265       // Don't emit a copytoreg.
02266       continue;
02267     }
02268 
02269     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
02270     // which is returned in RAX / RDX.
02271     if (Subtarget->is64Bit()) {
02272       if (ValVT == MVT::x86mmx) {
02273         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
02274           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
02275           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
02276                                   ValToCopy);
02277           // If we don't have SSE2 available, convert to v4f32 so the generated
02278           // register is legal.
02279           if (!Subtarget->hasSSE2())
02280             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
02281         }
02282       }
02283     }
02284 
02285     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
02286     Flag = Chain.getValue(1);
02287     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02288   }
02289 
02290   // All x86 ABIs require that for returning structs by value we copy
02291   // the sret argument into %rax/%eax (depending on ABI) for the return.
02292   // We saved the argument into a virtual register in the entry block,
02293   // so now we copy the value out and into %rax/%eax.
02294   //
02295   // Checking Function.hasStructRetAttr() here is insufficient because the IR
02296   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
02297   // false, then an sret argument may be implicitly inserted in the SelDAG. In
02298   // either case FuncInfo->setSRetReturnReg() will have been called.
02299   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
02300     SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
02301                                      getPointerTy(MF.getDataLayout()));
02302 
02303     unsigned RetValReg
02304         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
02305           X86::RAX : X86::EAX;
02306     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
02307     Flag = Chain.getValue(1);
02308 
02309     // RAX/EAX now acts like a return value.
02310     RetOps.push_back(
02311         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
02312   }
02313 
02314   const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
02315   const MCPhysReg *I =
02316       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
02317   if (I) {
02318     for (; *I; ++I) {
02319       if (X86::GR64RegClass.contains(*I))
02320         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
02321       else
02322         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
02323     }
02324   }
02325 
02326   RetOps[0] = Chain;  // Update chain.
02327 
02328   // Add the flag if we have it.
02329   if (Flag.getNode())
02330     RetOps.push_back(Flag);
02331 
02332   X86ISD::NodeType opcode = X86ISD::RET_FLAG;
02333   if (CallConv == CallingConv::X86_INTR)
02334     opcode = X86ISD::IRET;
02335   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
02336 }
02337 
02338 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02339   if (N->getNumValues() != 1)
02340     return false;
02341   if (!N->hasNUsesOfValue(1, 0))
02342     return false;
02343 
02344   SDValue TCChain = Chain;
02345   SDNode *Copy = *N->use_begin();
02346   if (Copy->getOpcode() == ISD::CopyToReg) {
02347     // If the copy has a glue operand, we conservatively assume it isn't safe to
02348     // perform a tail call.
02349     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02350       return false;
02351     TCChain = Copy->getOperand(0);
02352   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
02353     return false;
02354 
02355   bool HasRet = false;
02356   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02357        UI != UE; ++UI) {
02358     if (UI->getOpcode() != X86ISD::RET_FLAG)
02359       return false;
02360     // If we are returning more than one value, we can definitely
02361     // not make a tail call see PR19530
02362     if (UI->getNumOperands() > 4)
02363       return false;
02364     if (UI->getNumOperands() == 4 &&
02365         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
02366       return false;
02367     HasRet = true;
02368   }
02369 
02370   if (!HasRet)
02371     return false;
02372 
02373   Chain = TCChain;
02374   return true;
02375 }
02376 
02377 EVT
02378 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
02379                                             ISD::NodeType ExtendKind) const {
02380   MVT ReturnMVT;
02381   // TODO: Is this also valid on 32-bit?
02382   if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
02383     ReturnMVT = MVT::i8;
02384   else
02385     ReturnMVT = MVT::i32;
02386 
02387   EVT MinVT = getRegisterType(Context, ReturnMVT);
02388   return VT.bitsLT(MinVT) ? MinVT : VT;
02389 }
02390 
02391 /// Lower the result values of a call into the
02392 /// appropriate copies out of appropriate physical registers.
02393 ///
02394 SDValue
02395 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
02396                                    CallingConv::ID CallConv, bool isVarArg,
02397                                    const SmallVectorImpl<ISD::InputArg> &Ins,
02398                                    SDLoc dl, SelectionDAG &DAG,
02399                                    SmallVectorImpl<SDValue> &InVals) const {
02400 
02401   // Assign locations to each value returned by this call.
02402   SmallVector<CCValAssign, 16> RVLocs;
02403   bool Is64Bit = Subtarget->is64Bit();
02404   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02405                  *DAG.getContext());
02406   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
02407 
02408   // Copy all of the result registers out of their specified physreg.
02409   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
02410     CCValAssign &VA = RVLocs[i];
02411     EVT CopyVT = VA.getLocVT();
02412 
02413     // If this is x86-64, and we disabled SSE, we can't return FP values
02414     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
02415         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02416       report_fatal_error("SSE register return with SSE disabled");
02417     }
02418 
02419     // If we prefer to use the value in xmm registers, copy it out as f80 and
02420     // use a truncate to move it from fp stack reg to xmm reg.
02421     bool RoundAfterCopy = false;
02422     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02423         isScalarFPTypeInSSEReg(VA.getValVT())) {
02424       CopyVT = MVT::f80;
02425       RoundAfterCopy = (CopyVT != VA.getLocVT());
02426     }
02427 
02428     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
02429                                CopyVT, InFlag).getValue(1);
02430     SDValue Val = Chain.getValue(0);
02431 
02432     if (RoundAfterCopy)
02433       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
02434                         // This truncation won't change the value.
02435                         DAG.getIntPtrConstant(1, dl));
02436 
02437     if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
02438       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
02439 
02440     InFlag = Chain.getValue(2);
02441     InVals.push_back(Val);
02442   }
02443 
02444   return Chain;
02445 }
02446 
02447 //===----------------------------------------------------------------------===//
02448 //                C & StdCall & Fast Calling Convention implementation
02449 //===----------------------------------------------------------------------===//
02450 //  StdCall calling convention seems to be standard for many Windows' API
02451 //  routines and around. It differs from C calling convention just a little:
02452 //  callee should clean up the stack, not caller. Symbols should be also
02453 //  decorated in some fancy way :) It doesn't support any vector arguments.
02454 //  For info on fast calling convention see Fast Calling Convention (tail call)
02455 //  implementation LowerX86_32FastCCCallTo.
02456 
02457 /// CallIsStructReturn - Determines whether a call uses struct return
02458 /// semantics.
02459 enum StructReturnType {
02460   NotStructReturn,
02461   RegStructReturn,
02462   StackStructReturn
02463 };
02464 static StructReturnType
02465 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
02466   if (Outs.empty())
02467     return NotStructReturn;
02468 
02469   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
02470   if (!Flags.isSRet())
02471     return NotStructReturn;
02472   if (Flags.isInReg() || IsMCU)
02473     return RegStructReturn;
02474   return StackStructReturn;
02475 }
02476 
02477 /// Determines whether a function uses struct return semantics.
02478 static StructReturnType
02479 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
02480   if (Ins.empty())
02481     return NotStructReturn;
02482 
02483   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
02484   if (!Flags.isSRet())
02485     return NotStructReturn;
02486   if (Flags.isInReg() || IsMCU)
02487     return RegStructReturn;
02488   return StackStructReturn;
02489 }
02490 
02491 /// Make a copy of an aggregate at address specified by "Src" to address
02492 /// "Dst" with size and alignment information specified by the specific
02493 /// parameter attribute. The copy will be passed as a byval function parameter.
02494 static SDValue
02495 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
02496                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
02497                           SDLoc dl) {
02498   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
02499 
02500   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
02501                        /*isVolatile*/false, /*AlwaysInline=*/true,
02502                        /*isTailCall*/false,
02503                        MachinePointerInfo(), MachinePointerInfo());
02504 }
02505 
02506 /// Return true if the calling convention is one that we can guarantee TCO for.
02507 static bool canGuaranteeTCO(CallingConv::ID CC) {
02508   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02509           CC == CallingConv::HiPE || CC == CallingConv::HHVM);
02510 }
02511 
02512 /// Return true if we might ever do TCO for calls with this calling convention.
02513 static bool mayTailCallThisCC(CallingConv::ID CC) {
02514   switch (CC) {
02515   // C calling conventions:
02516   case CallingConv::C:
02517   case CallingConv::X86_64_Win64:
02518   case CallingConv::X86_64_SysV:
02519   // Callee pop conventions:
02520   case CallingConv::X86_ThisCall:
02521   case CallingConv::X86_StdCall:
02522   case CallingConv::X86_VectorCall:
02523   case CallingConv::X86_FastCall:
02524     return true;
02525   default:
02526     return canGuaranteeTCO(CC);
02527   }
02528 }
02529 
02530 /// Return true if the function is being made into a tailcall target by
02531 /// changing its ABI.
02532 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
02533   return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
02534 }
02535 
02536 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02537   auto Attr =
02538       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
02539   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
02540     return false;
02541 
02542   CallSite CS(CI);
02543   CallingConv::ID CalleeCC = CS.getCallingConv();
02544   if (!mayTailCallThisCC(CalleeCC))
02545     return false;
02546 
02547   return true;
02548 }
02549 
02550 SDValue
02551 X86TargetLowering::LowerMemArgument(SDValue Chain,
02552                                     CallingConv::ID CallConv,
02553                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02554                                     SDLoc dl, SelectionDAG &DAG,
02555                                     const CCValAssign &VA,
02556                                     MachineFrameInfo *MFI,
02557                                     unsigned i) const {
02558   // Create the nodes corresponding to a load from this parameter slot.
02559   ISD::ArgFlagsTy Flags = Ins[i].Flags;
02560   bool AlwaysUseMutable = shouldGuaranteeTCO(
02561       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
02562   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
02563   EVT ValVT;
02564 
02565   // If value is passed by pointer we have address passed instead of the value
02566   // itself.
02567   bool ExtendedInMem = VA.isExtInLoc() &&
02568     VA.getValVT().getScalarType() == MVT::i1;
02569 
02570   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
02571     ValVT = VA.getLocVT();
02572   else
02573     ValVT = VA.getValVT();
02574 
02575   // Calculate SP offset of interrupt parameter, re-arrange the slot normally
02576   // taken by a return address.
02577   int Offset = 0;
02578   if (CallConv == CallingConv::X86_INTR) {
02579     const X86Subtarget& Subtarget =
02580         static_cast<const X86Subtarget&>(DAG.getSubtarget());
02581     // X86 interrupts may take one or two arguments.
02582     // On the stack there will be no return address as in regular call.
02583     // Offset of last argument need to be set to -4/-8 bytes.
02584     // Where offset of the first argument out of two, should be set to 0 bytes.
02585     Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
02586   }
02587 
02588   // FIXME: For now, all byval parameter objects are marked mutable. This can be
02589   // changed with more analysis.
02590   // In case of tail call optimization mark all arguments mutable. Since they
02591   // could be overwritten by lowering of arguments in case of a tail call.
02592   if (Flags.isByVal()) {
02593     unsigned Bytes = Flags.getByValSize();
02594     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
02595     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
02596     // Adjust SP offset of interrupt parameter.
02597     if (CallConv == CallingConv::X86_INTR) {
02598       MFI->setObjectOffset(FI, Offset);
02599     }
02600     return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
02601   } else {
02602     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
02603                                     VA.getLocMemOffset(), isImmutable);
02604     // Adjust SP offset of interrupt parameter.
02605     if (CallConv == CallingConv::X86_INTR) {
02606       MFI->setObjectOffset(FI, Offset);
02607     }
02608 
02609     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
02610     SDValue Val = DAG.getLoad(
02611         ValVT, dl, Chain, FIN,
02612         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
02613         false, false, 0);
02614     return ExtendedInMem ?
02615       DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
02616   }
02617 }
02618 
02619 // FIXME: Get this from tablegen.
02620 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
02621                                                 const X86Subtarget *Subtarget) {
02622   assert(Subtarget->is64Bit());
02623 
02624   if (Subtarget->isCallingConvWin64(CallConv)) {
02625     static const MCPhysReg GPR64ArgRegsWin64[] = {
02626       X86::RCX, X86::RDX, X86::R8,  X86::R9
02627     };
02628     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
02629   }
02630 
02631   static const MCPhysReg GPR64ArgRegs64Bit[] = {
02632     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
02633   };
02634   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
02635 }
02636 
02637 // FIXME: Get this from tablegen.
02638 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
02639                                                 CallingConv::ID CallConv,
02640                                                 const X86Subtarget *Subtarget) {
02641   assert(Subtarget->is64Bit());
02642   if (Subtarget->isCallingConvWin64(CallConv)) {
02643     // The XMM registers which might contain var arg parameters are shadowed
02644     // in their paired GPR.  So we only need to save the GPR to their home
02645     // slots.
02646     // TODO: __vectorcall will change this.
02647     return None;
02648   }
02649 
02650   const Function *Fn = MF.getFunction();
02651   bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
02652   bool isSoftFloat = Subtarget->useSoftFloat();
02653   assert(!(isSoftFloat && NoImplicitFloatOps) &&
02654          "SSE register cannot be used when SSE is disabled!");
02655   if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
02656     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
02657     // registers.
02658     return None;
02659 
02660   static const MCPhysReg XMMArgRegs64Bit[] = {
02661     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02662     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02663   };
02664   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
02665 }
02666 
02667 SDValue X86TargetLowering::LowerFormalArguments(
02668     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
02669     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
02670     SmallVectorImpl<SDValue> &InVals) const {
02671   MachineFunction &MF = DAG.getMachineFunction();
02672   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
02673   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
02674 
02675   const Function* Fn = MF.getFunction();
02676   if (Fn->hasExternalLinkage() &&
02677       Subtarget->isTargetCygMing() &&
02678       Fn->getName() == "main")
02679     FuncInfo->setForceFramePointer(true);
02680 
02681   MachineFrameInfo *MFI = MF.getFrameInfo();
02682   bool Is64Bit = Subtarget->is64Bit();
02683   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
02684 
02685   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
02686          "Var args not supported with calling convention fastcc, ghc or hipe");
02687 
02688   if (CallConv == CallingConv::X86_INTR) {
02689     bool isLegal = Ins.size() == 1 ||
02690                    (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
02691                                         (!Is64Bit && Ins[1].VT == MVT::i32)));
02692     if (!isLegal)
02693       report_fatal_error("X86 interrupts may take one or two arguments");
02694   }
02695 
02696   // Assign locations to all of the incoming arguments.
02697   SmallVector<CCValAssign, 16> ArgLocs;
02698   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
02699 
02700   // Allocate shadow area for Win64
02701   if (IsWin64)
02702     CCInfo.AllocateStack(32, 8);
02703 
02704   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
02705 
02706   unsigned LastVal = ~0U;
02707   SDValue ArgValue;
02708   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02709     CCValAssign &VA = ArgLocs[i];
02710     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
02711     // places.
02712     assert(VA.getValNo() != LastVal &&
02713            "Don't support value assigned to multiple locs yet");
02714     (void)LastVal;
02715     LastVal = VA.getValNo();
02716 
02717     if (VA.isRegLoc()) {
02718       EVT RegVT = VA.getLocVT();
02719       const TargetRegisterClass *RC;
02720       if (RegVT == MVT::i32)
02721         RC = &X86::GR32RegClass;
02722       else if (Is64Bit && RegVT == MVT::i64)
02723         RC = &X86::GR64RegClass;
02724       else if (RegVT == MVT::f32)
02725         RC = &X86::FR32RegClass;
02726       else if (RegVT == MVT::f64)
02727         RC = &X86::FR64RegClass;
02728       else if (RegVT == MVT::f128)
02729         RC = &X86::FR128RegClass;
02730       else if (RegVT.is512BitVector())
02731         RC = &X86::VR512RegClass;
02732       else if (RegVT.is256BitVector())
02733         RC = &X86::VR256RegClass;
02734       else if (RegVT.is128BitVector())
02735         RC = &X86::VR128RegClass;
02736       else if (RegVT == MVT::x86mmx)
02737         RC = &X86::VR64RegClass;
02738       else if (RegVT == MVT::i1)
02739         RC = &X86::VK1RegClass;
02740       else if (RegVT == MVT::v8i1)
02741         RC = &X86::VK8RegClass;
02742       else if (RegVT == MVT::v16i1)
02743         RC = &X86::VK16RegClass;
02744       else if (RegVT == MVT::v32i1)
02745         RC = &X86::VK32RegClass;
02746       else if (RegVT == MVT::v64i1)
02747         RC = &X86::VK64RegClass;
02748       else
02749         llvm_unreachable("Unknown argument type!");
02750 
02751       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02752       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02753 
02754       // If this is an 8 or 16-bit value, it is really passed promoted to 32
02755       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
02756       // right size.
02757       if (VA.getLocInfo() == CCValAssign::SExt)
02758         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02759                                DAG.getValueType(VA.getValVT()));
02760       else if (VA.getLocInfo() == CCValAssign::ZExt)
02761         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02762                                DAG.getValueType(VA.getValVT()));
02763       else if (VA.getLocInfo() == CCValAssign::BCvt)
02764         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
02765 
02766       if (VA.isExtInLoc()) {
02767         // Handle MMX values passed in XMM regs.
02768         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
02769           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
02770         else
02771           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02772       }
02773     } else {
02774       assert(VA.isMemLoc());
02775       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
02776     }
02777 
02778     // If value is passed via pointer - do a load.
02779     if (VA.getLocInfo() == CCValAssign::Indirect)
02780       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
02781                              MachinePointerInfo(), false, false, false, 0);
02782 
02783     InVals.push_back(ArgValue);
02784   }
02785 
02786   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02787     // All x86 ABIs require that for returning structs by value we copy the
02788     // sret argument into %rax/%eax (depending on ABI) for the return. Save
02789     // the argument into a virtual register so that we can access it from the
02790     // return points.
02791     if (Ins[i].Flags.isSRet()) {
02792       unsigned Reg = FuncInfo->getSRetReturnReg();
02793       if (!Reg) {
02794         MVT PtrTy = getPointerTy(DAG.getDataLayout());
02795         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
02796         FuncInfo->setSRetReturnReg(Reg);
02797       }
02798       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
02799       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
02800       break;
02801     }
02802   }
02803 
02804   unsigned StackSize = CCInfo.getNextStackOffset();
02805   // Align stack specially for tail calls.
02806   if (shouldGuaranteeTCO(CallConv,
02807                          MF.getTarget().Options.GuaranteedTailCallOpt))
02808     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
02809 
02810   // If the function takes variable number of arguments, make a frame index for
02811   // the start of the first vararg value... for expansion of llvm.va_start. We
02812   // can skip this if there are no va_start calls.
02813   if (MFI->hasVAStart() &&
02814       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
02815                    CallConv != CallingConv::X86_ThisCall))) {
02816     FuncInfo->setVarArgsFrameIndex(
02817         MFI->CreateFixedObject(1, StackSize, true));
02818   }
02819 
02820   // Figure out if XMM registers are in use.
02821   assert(!(Subtarget->useSoftFloat() &&
02822            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
02823          "SSE register cannot be used when SSE is disabled!");
02824 
02825   // 64-bit calling conventions support varargs and register parameters, so we
02826   // have to do extra work to spill them in the prologue.
02827   if (Is64Bit && isVarArg && MFI->hasVAStart()) {
02828     // Find the first unallocated argument registers.
02829     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
02830     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
02831     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
02832     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
02833     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
02834            "SSE register cannot be used when SSE is disabled!");
02835 
02836     // Gather all the live in physical registers.
02837     SmallVector<SDValue, 6> LiveGPRs;
02838     SmallVector<SDValue, 8> LiveXMMRegs;
02839     SDValue ALVal;
02840     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
02841       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
02842       LiveGPRs.push_back(
02843           DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
02844     }
02845     if (!ArgXMMs.empty()) {
02846       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02847       ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
02848       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
02849         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
02850         LiveXMMRegs.push_back(
02851             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
02852       }
02853     }
02854 
02855     if (IsWin64) {
02856       // Get to the caller-allocated home save location.  Add 8 to account
02857       // for the return address.
02858       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
02859       FuncInfo->setRegSaveFrameIndex(
02860           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
02861       // Fixup to set vararg frame on shadow area (4 x i64).
02862       if (NumIntRegs < 4)
02863         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
02864     } else {
02865       // For X86-64, if there are vararg parameters that are passed via
02866       // registers, then we must store them to their spots on the stack so
02867       // they may be loaded by deferencing the result of va_next.
02868       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
02869       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
02870       FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
02871           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
02872     }
02873 
02874     // Store the integer parameter registers.
02875     SmallVector<SDValue, 8> MemOps;
02876     SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
02877                                       getPointerTy(DAG.getDataLayout()));
02878     unsigned Offset = FuncInfo->getVarArgsGPOffset();
02879     for (SDValue Val : LiveGPRs) {
02880       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
02881                                 RSFIN, DAG.getIntPtrConstant(Offset, dl));
02882       SDValue Store =
02883           DAG.getStore(Val.getValue(1), dl, Val, FIN,
02884                        MachinePointerInfo::getFixedStack(
02885                            DAG.getMachineFunction(),
02886                            FuncInfo->getRegSaveFrameIndex(), Offset),
02887                        false, false, 0);
02888       MemOps.push_back(Store);
02889       Offset += 8;
02890     }
02891 
02892     if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
02893       // Now store the XMM (fp + vector) parameter registers.
02894       SmallVector<SDValue, 12> SaveXMMOps;
02895       SaveXMMOps.push_back(Chain);
02896       SaveXMMOps.push_back(ALVal);
02897       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02898                              FuncInfo->getRegSaveFrameIndex(), dl));
02899       SaveXMMOps.push_back(DAG.getIntPtrConstant(
02900                              FuncInfo->getVarArgsFPOffset(), dl));
02901       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
02902                         LiveXMMRegs.end());
02903       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
02904                                    MVT::Other, SaveXMMOps));
02905     }
02906 
02907     if (!MemOps.empty())
02908       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02909   }
02910 
02911   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
02912     // Find the largest legal vector type.
02913     MVT VecVT = MVT::Other;
02914     // FIXME: Only some x86_32 calling conventions support AVX512.
02915     if (Subtarget->hasAVX512() &&
02916         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
02917                      CallConv == CallingConv::Intel_OCL_BI)))
02918       VecVT = MVT::v16f32;
02919     else if (Subtarget->hasAVX())
02920       VecVT = MVT::v8f32;
02921     else if (Subtarget->hasSSE2())
02922       VecVT = MVT::v4f32;
02923 
02924     // We forward some GPRs and some vector types.
02925     SmallVector<MVT, 2> RegParmTypes;
02926     MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
02927     RegParmTypes.push_back(IntVT);
02928     if (VecVT != MVT::Other)
02929       RegParmTypes.push_back(VecVT);
02930 
02931     // Compute the set of forwarded registers. The rest are scratch.
02932     SmallVectorImpl<ForwardedRegister> &Forwards =
02933         FuncInfo->getForwardedMustTailRegParms();
02934     CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
02935 
02936     // Conservatively forward AL on x86_64, since it might be used for varargs.
02937     if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
02938       unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
02939       Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
02940     }
02941 
02942     // Copy all forwards from physical to virtual registers.
02943     for (ForwardedRegister &F : Forwards) {
02944       // FIXME: Can we use a less constrained schedule?
02945       SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
02946       F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
02947       Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
02948     }
02949   }
02950 
02951   // Some CCs need callee pop.
02952   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
02953                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
02954     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
02955   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
02956     // X86 interrupts must pop the error code if present
02957     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
02958   } else {
02959     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
02960     // If this is an sret function, the return should pop the hidden pointer.
02961     if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
02962         !Subtarget->getTargetTriple().isOSMSVCRT() &&
02963         argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
02964       FuncInfo->setBytesToPopOnReturn(4);
02965   }
02966 
02967   if (!Is64Bit) {
02968     // RegSaveFrameIndex is X86-64 only.
02969     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
02970     if (CallConv == CallingConv::X86_FastCall ||
02971         CallConv == CallingConv::X86_ThisCall)
02972       // fastcc functions can't have varargs.
02973       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
02974   }
02975 
02976   FuncInfo->setArgumentStackSize(StackSize);
02977 
02978   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
02979     EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
02980     if (Personality == EHPersonality::CoreCLR) {
02981       assert(Is64Bit);
02982       // TODO: Add a mechanism to frame lowering that will allow us to indicate
02983       // that we'd prefer this slot be allocated towards the bottom of the frame
02984       // (i.e. near the stack pointer after allocating the frame).  Every
02985       // funclet needs a copy of this slot in its (mostly empty) frame, and the
02986       // offset from the bottom of this and each funclet's frame must be the
02987       // same, so the size of funclets' (mostly empty) frames is dictated by
02988       // how far this slot is from the bottom (since they allocate just enough
02989       // space to accomodate holding this slot at the correct offset).
02990       int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
02991       EHInfo->PSPSymFrameIdx = PSPSymFI;
02992     }
02993   }
02994 
02995   return Chain;
02996 }
02997 
02998 SDValue
02999 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
03000                                     SDValue StackPtr, SDValue Arg,
03001                                     SDLoc dl, SelectionDAG &DAG,
03002                                     const CCValAssign &VA,
03003                                     ISD::ArgFlagsTy Flags) const {
03004   unsigned LocMemOffset = VA.getLocMemOffset();
03005   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
03006   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
03007                        StackPtr, PtrOff);
03008   if (Flags.isByVal())
03009     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
03010 
03011   return DAG.getStore(
03012       Chain, dl, Arg, PtrOff,
03013       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
03014       false, false, 0);
03015 }
03016 
03017 /// Emit a load of return address if tail call
03018 /// optimization is performed and it is required.
03019 SDValue
03020 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
03021                                            SDValue &OutRetAddr, SDValue Chain,
03022                                            bool IsTailCall, bool Is64Bit,
03023                                            int FPDiff, SDLoc dl) const {
03024   // Adjust the Return address stack slot.
03025   EVT VT = getPointerTy(DAG.getDataLayout());
03026   OutRetAddr = getReturnAddressFrameIndex(DAG);
03027 
03028   // Load the "old" Return address.
03029   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
03030                            false, false, false, 0);
03031   return SDValue(OutRetAddr.getNode(), 1);
03032 }
03033 
03034 /// Emit a store of the return address if tail call
03035 /// optimization is performed and it is required (FPDiff!=0).
03036 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
03037                                         SDValue Chain, SDValue RetAddrFrIdx,
03038                                         EVT PtrVT, unsigned SlotSize,
03039                                         int FPDiff, SDLoc dl) {
03040   // Store the return address to the appropriate stack slot.
03041   if (!FPDiff) return Chain;
03042   // Calculate the new stack slot for the return address.
03043   int NewReturnAddrFI =
03044     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
03045                                          false);
03046   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
03047   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
03048                        MachinePointerInfo::getFixedStack(
03049                            DAG.getMachineFunction(), NewReturnAddrFI),
03050                        false, false, 0);
03051   return Chain;
03052 }
03053 
03054 /// Returns a vector_shuffle mask for an movs{s|d}, movd
03055 /// operation of specified width.
03056 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
03057                        SDValue V2) {
03058   unsigned NumElems = VT.getVectorNumElements();
03059   SmallVector<int, 8> Mask;
03060   Mask.push_back(NumElems);
03061   for (unsigned i = 1; i != NumElems; ++i)
03062     Mask.push_back(i);
03063   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
03064 }
03065 
03066 SDValue
03067 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
03068                              SmallVectorImpl<SDValue> &InVals) const {
03069   SelectionDAG &DAG                     = CLI.DAG;
03070   SDLoc &dl                             = CLI.DL;
03071   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
03072   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
03073   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
03074   SDValue Chain                         = CLI.Chain;
03075   SDValue Callee                        = CLI.Callee;
03076   CallingConv::ID CallConv              = CLI.CallConv;
03077   bool &isTailCall                      = CLI.IsTailCall;
03078   bool isVarArg                         = CLI.IsVarArg;
03079 
03080   MachineFunction &MF = DAG.getMachineFunction();
03081   bool Is64Bit        = Subtarget->is64Bit();
03082   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
03083   StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
03084   bool IsSibcall      = false;
03085   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
03086   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
03087 
03088   if (CallConv == CallingConv::X86_INTR)
03089     report_fatal_error("X86 interrupts may not be called directly");
03090 
03091   if (Attr.getValueAsString() == "true")
03092     isTailCall = false;
03093 
03094   if (Subtarget->isPICStyleGOT() &&
03095       !MF.getTarget().Options.GuaranteedTailCallOpt) {
03096     // If we are using a GOT, disable tail calls to external symbols with
03097     // default visibility. Tail calling such a symbol requires using a GOT
03098     // relocation, which forces early binding of the symbol. This breaks code
03099     // that require lazy function symbol resolution. Using musttail or
03100     // GuaranteedTailCallOpt will override this.
03101     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
03102     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
03103                G->getGlobal()->hasDefaultVisibility()))
03104       isTailCall = false;
03105   }
03106 
03107   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
03108   if (IsMustTail) {
03109     // Force this to be a tail call.  The verifier rules are enough to ensure
03110     // that we can lower this successfully without moving the return address
03111     // around.
03112     isTailCall = true;
03113   } else if (isTailCall) {
03114     // Check if it's really possible to do a tail call.
03115     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
03116                     isVarArg, SR != NotStructReturn,
03117                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
03118                     Outs, OutVals, Ins, DAG);
03119 
03120     // Sibcalls are automatically detected tailcalls which do not require
03121     // ABI changes.
03122     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
03123       IsSibcall = true;
03124 
03125     if (isTailCall)
03126       ++NumTailCalls;
03127   }
03128 
03129   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
03130          "Var args not supported with calling convention fastcc, ghc or hipe");
03131 
03132   // Analyze operands of the call, assigning locations to each operand.
03133   SmallVector<CCValAssign, 16> ArgLocs;
03134   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
03135 
03136   // Allocate shadow area for Win64
03137   if (IsWin64)
03138     CCInfo.AllocateStack(32, 8);
03139 
03140   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03141 
03142   // Get a count of how many bytes are to be pushed on the stack.
03143   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
03144   if (IsSibcall)
03145     // This is a sibcall. The memory operands are available in caller's
03146     // own caller's stack.
03147     NumBytes = 0;
03148   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
03149            canGuaranteeTCO(CallConv))
03150     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
03151 
03152   int FPDiff = 0;
03153   if (isTailCall && !IsSibcall && !IsMustTail) {
03154     // Lower arguments at fp - stackoffset + fpdiff.
03155     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
03156 
03157     FPDiff = NumBytesCallerPushed - NumBytes;
03158 
03159     // Set the delta of movement of the returnaddr stackslot.
03160     // But only set if delta is greater than previous delta.
03161     if (FPDiff < X86Info->getTCReturnAddrDelta())
03162       X86Info->setTCReturnAddrDelta(FPDiff);
03163   }
03164 
03165   unsigned NumBytesToPush = NumBytes;
03166   unsigned NumBytesToPop = NumBytes;
03167 
03168   // If we have an inalloca argument, all stack space has already been allocated
03169   // for us and be right at the top of the stack.  We don't support multiple
03170   // arguments passed in memory when using inalloca.
03171   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
03172     NumBytesToPush = 0;
03173     if (!ArgLocs.back().isMemLoc())
03174       report_fatal_error("cannot use inalloca attribute on a register "
03175                          "parameter");
03176     if (ArgLocs.back().getLocMemOffset() != 0)
03177       report_fatal_error("any parameter with the inalloca attribute must be "
03178                          "the only memory argument");
03179   }
03180 
03181   if (!IsSibcall)
03182     Chain = DAG.getCALLSEQ_START(
03183         Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
03184 
03185   SDValue RetAddrFrIdx;
03186   // Load return address for tail calls.
03187   if (isTailCall && FPDiff)
03188     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
03189                                     Is64Bit, FPDiff, dl);
03190 
03191   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
03192   SmallVector<SDValue, 8> MemOpChains;
03193   SDValue StackPtr;
03194 
03195   // Walk the register/memloc assignments, inserting copies/loads.  In the case
03196   // of tail call optimization arguments are handle later.
03197   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03198   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03199     // Skip inalloca arguments, they have already been written.
03200     ISD::ArgFlagsTy Flags = Outs[i].Flags;
03201     if (Flags.isInAlloca())
03202       continue;
03203 
03204     CCValAssign &VA = ArgLocs[i];
03205     EVT RegVT = VA.getLocVT();
03206     SDValue Arg = OutVals[i];
03207     bool isByVal = Flags.isByVal();
03208 
03209     // Promote the value if needed.
03210     switch (VA.getLocInfo()) {
03211     default: llvm_unreachable("Unknown loc info!");
03212     case CCValAssign::Full: break;
03213     case CCValAssign::SExt:
03214       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
03215       break;
03216     case CCValAssign::ZExt:
03217       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
03218       break;
03219     case CCValAssign::AExt:
03220       if (Arg.getValueType().isVector() &&
03221           Arg.getValueType().getVectorElementType() == MVT::i1)
03222         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
03223       else if (RegVT.is128BitVector()) {
03224         // Special case: passing MMX values in XMM registers.
03225         Arg = DAG.getBitcast(MVT::i64, Arg);
03226         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
03227         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
03228       } else
03229         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
03230       break;
03231     case CCValAssign::BCvt:
03232       Arg = DAG.getBitcast(RegVT, Arg);
03233       break;
03234     case CCValAssign::Indirect: {
03235       // Store the argument.
03236       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
03237       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
03238       Chain = DAG.getStore(
03239           Chain, dl, Arg, SpillSlot,
03240           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
03241           false, false, 0);
03242       Arg = SpillSlot;
03243       break;
03244     }
03245     }
03246 
03247     if (VA.isRegLoc()) {
03248       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
03249       if (isVarArg && IsWin64) {
03250         // Win64 ABI requires argument XMM reg to be copied to the corresponding
03251         // shadow reg if callee is a varargs function.
03252         unsigned ShadowReg = 0;
03253         switch (VA.getLocReg()) {
03254         case X86::XMM0: ShadowReg = X86::RCX; break;
03255         case X86::XMM1: ShadowReg = X86::RDX; break;
03256         case X86::XMM2: ShadowReg = X86::R8; break;
03257         case X86::XMM3: ShadowReg = X86::R9; break;
03258         }
03259         if (ShadowReg)
03260           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
03261       }
03262     } else if (!IsSibcall && (!isTailCall || isByVal)) {
03263       assert(VA.isMemLoc());
03264       if (!StackPtr.getNode())
03265         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
03266                                       getPointerTy(DAG.getDataLayout()));
03267       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
03268                                              dl, DAG, VA, Flags));
03269     }
03270   }
03271 
03272   if (!MemOpChains.empty())
03273     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
03274 
03275   if (Subtarget->isPICStyleGOT()) {
03276     // ELF / PIC requires GOT in the EBX register before function calls via PLT
03277     // GOT pointer.
03278     if (!isTailCall) {
03279       RegsToPass.push_back(std::make_pair(
03280           unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
03281                                           getPointerTy(DAG.getDataLayout()))));
03282     } else {
03283       // If we are tail calling and generating PIC/GOT style code load the
03284       // address of the callee into ECX. The value in ecx is used as target of
03285       // the tail jump. This is done to circumvent the ebx/callee-saved problem
03286       // for tail calls on PIC/GOT architectures. Normally we would just put the
03287       // address of GOT into ebx and then call target@PLT. But for tail calls
03288       // ebx would be restored (since ebx is callee saved) before jumping to the
03289       // target@PLT.
03290 
03291       // Note: The actual moving to ECX is done further down.
03292       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
03293       if (G && !G->getGlobal()->hasLocalLinkage() &&
03294           G->getGlobal()->hasDefaultVisibility())
03295         Callee = LowerGlobalAddress(Callee, DAG);
03296       else if (isa<ExternalSymbolSDNode>(Callee))
03297         Callee = LowerExternalSymbol(Callee, DAG);
03298     }
03299   }
03300 
03301   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
03302     // From AMD64 ABI document:
03303     // For calls that may call functions that use varargs or stdargs
03304     // (prototype-less calls or calls to functions containing ellipsis (...) in
03305     // the declaration) %al is used as hidden argument to specify the number
03306     // of SSE registers used. The contents of %al do not need to match exactly
03307     // the number of registers, but must be an ubound on the number of SSE
03308     // registers used and is in the range 0 - 8 inclusive.
03309 
03310     // Count the number of XMM registers allocated.
03311     static const MCPhysReg XMMArgRegs[] = {
03312       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
03313       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
03314     };
03315     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
03316     assert((Subtarget->hasSSE1() || !NumXMMRegs)
03317            && "SSE registers cannot be used when SSE is disabled");
03318 
03319     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
03320                                         DAG.getConstant(NumXMMRegs, dl,
03321                                                         MVT::i8)));
03322   }
03323 
03324   if (isVarArg && IsMustTail) {
03325     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
03326     for (const auto &F : Forwards) {
03327       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
03328       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
03329     }
03330   }
03331 
03332   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
03333   // don't need this because the eligibility check rejects calls that require
03334   // shuffling arguments passed in memory.
03335   if (!IsSibcall && isTailCall) {
03336     // Force all the incoming stack arguments to be loaded from the stack
03337     // before any new outgoing arguments are stored to the stack, because the
03338     // outgoing stack slots may alias the incoming argument stack slots, and
03339     // the alias isn't otherwise explicit. This is slightly more conservative
03340     // than necessary, because it means that each store effectively depends
03341     // on every argument instead of just those arguments it would clobber.
03342     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
03343 
03344     SmallVector<SDValue, 8> MemOpChains2;
03345     SDValue FIN;
03346     int FI = 0;
03347     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03348       CCValAssign &VA = ArgLocs[i];
03349       if (VA.isRegLoc())
03350         continue;
03351       assert(VA.isMemLoc());
03352       SDValue Arg = OutVals[i];
03353       ISD::ArgFlagsTy Flags = Outs[i].Flags;
03354       // Skip inalloca arguments.  They don't require any work.
03355       if (Flags.isInAlloca())
03356         continue;
03357       // Create frame index.
03358       int32_t Offset = VA.getLocMemOffset()+FPDiff;
03359       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
03360       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03361       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
03362 
03363       if (Flags.isByVal()) {
03364         // Copy relative to framepointer.
03365         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
03366         if (!StackPtr.getNode())
03367           StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
03368                                         getPointerTy(DAG.getDataLayout()));
03369         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
03370                              StackPtr, Source);
03371 
03372         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
03373                                                          ArgChain,
03374                                                          Flags, DAG, dl));
03375       } else {
03376         // Store relative to framepointer.
03377         MemOpChains2.push_back(DAG.getStore(
03378             ArgChain, dl, Arg, FIN,
03379             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
03380             false, false, 0));
03381       }
03382     }
03383 
03384     if (!MemOpChains2.empty())
03385       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03386 
03387     // Store the return address to the appropriate stack slot.
03388     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
03389                                      getPointerTy(DAG.getDataLayout()),
03390                                      RegInfo->getSlotSize(), FPDiff, dl);
03391   }
03392 
03393   // Build a sequence of copy-to-reg nodes chained together with token chain
03394   // and flag operands which copy the outgoing args into registers.
03395   SDValue InFlag;
03396   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
03397     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
03398                              RegsToPass[i].second, InFlag);
03399     InFlag = Chain.getValue(1);
03400   }
03401 
03402   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
03403     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
03404     // In the 64-bit large code model, we have to make all calls
03405     // through a register, since the call instruction's 32-bit
03406     // pc-relative offset may not be large enough to hold the whole
03407     // address.
03408   } else if (Callee->getOpcode() == ISD::GlobalAddress) {
03409     // If the callee is a GlobalAddress node (quite common, every direct call
03410     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
03411     // it.
03412     GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
03413 
03414     // We should use extra load for direct calls to dllimported functions in
03415     // non-JIT mode.
03416     const GlobalValue *GV = G->getGlobal();
03417     if (!GV->hasDLLImportStorageClass()) {
03418       unsigned char OpFlags = 0;
03419       bool ExtraLoad = false;
03420       unsigned WrapperKind = ISD::DELETED_NODE;
03421 
03422       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
03423       // external symbols most go through the PLT in PIC mode.  If the symbol
03424       // has hidden or protected visibility, or if it is static or local, then
03425       // we don't need to use the PLT - we can directly call it.
03426       if (Subtarget->isTargetELF() &&
03427           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
03428           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
03429         OpFlags = X86II::MO_PLT;
03430       } else if (Subtarget->isPICStyleStubAny() &&
03431                  !GV->isStrongDefinitionForLinker() &&
03432                  (!Subtarget->getTargetTriple().isMacOSX() ||
03433                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03434         // PC-relative references to external symbols should go through $stub,
03435         // unless we're building with the leopard linker or later, which
03436         // automatically synthesizes these stubs.
03437         OpFlags = X86II::MO_DARWIN_STUB;
03438       } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
03439                  cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
03440         // If the function is marked as non-lazy, generate an indirect call
03441         // which loads from the GOT directly. This avoids runtime overhead
03442         // at the cost of eager binding (and one extra byte of encoding).
03443         OpFlags = X86II::MO_GOTPCREL;
03444         WrapperKind = X86ISD::WrapperRIP;
03445         ExtraLoad = true;
03446       }
03447 
03448       Callee = DAG.getTargetGlobalAddress(
03449           GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
03450 
03451       // Add a wrapper if needed.
03452       if (WrapperKind != ISD::DELETED_NODE)
03453         Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
03454                              getPointerTy(DAG.getDataLayout()), Callee);
03455       // Add extra indirection if needed.
03456       if (ExtraLoad)
03457         Callee = DAG.getLoad(
03458             getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
03459             MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false,
03460             false, 0);
03461     }
03462   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03463     unsigned char OpFlags = 0;
03464 
03465     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
03466     // external symbols should go through the PLT.
03467     if (Subtarget->isTargetELF() &&
03468         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
03469       OpFlags = X86II::MO_PLT;
03470     } else if (Subtarget->isPICStyleStubAny() &&
03471                (!Subtarget->getTargetTriple().isMacOSX() ||
03472                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
03473       // PC-relative references to external symbols should go through $stub,
03474       // unless we're building with the leopard linker or later, which
03475       // automatically synthesizes these stubs.
03476       OpFlags = X86II::MO_DARWIN_STUB;
03477     }
03478 
03479     Callee = DAG.getTargetExternalSymbol(
03480         S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
03481   } else if (Subtarget->isTarget64BitILP32() &&
03482              Callee->getValueType(0) == MVT::i32) {
03483     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
03484     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
03485   }
03486 
03487   // Returns a chain & a flag for retval copy to use.
03488   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
03489   SmallVector<SDValue, 8> Ops;
03490 
03491   if (!IsSibcall && isTailCall) {
03492     Chain = DAG.getCALLSEQ_END(Chain,
03493                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03494                                DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
03495     InFlag = Chain.getValue(1);
03496   }
03497 
03498   Ops.push_back(Chain);
03499   Ops.push_back(Callee);
03500 
03501   if (isTailCall)
03502     Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
03503 
03504   // Add argument registers to the end of the list so that they are known live
03505   // into the call.
03506   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
03507     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
03508                                   RegsToPass[i].second.getValueType()));
03509 
03510   // Add a register mask operand representing the call-preserved registers.
03511   const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
03512   assert(Mask && "Missing call preserved mask for calling convention");
03513 
03514   // If this is an invoke in a 32-bit function using a funclet-based
03515   // personality, assume the function clobbers all registers. If an exception
03516   // is thrown, the runtime will not restore CSRs.
03517   // FIXME: Model this more precisely so that we can register allocate across
03518   // the normal edge and spill and fill across the exceptional edge.
03519   if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
03520     const Function *CallerFn = MF.getFunction();
03521     EHPersonality Pers =
03522         CallerFn->hasPersonalityFn()
03523             ? classifyEHPersonality(CallerFn->getPersonalityFn())
03524             : EHPersonality::Unknown;
03525     if (isFuncletEHPersonality(Pers))
03526       Mask = RegInfo->getNoPreservedMask();
03527   }
03528 
03529   Ops.push_back(DAG.getRegisterMask(Mask));
03530 
03531   if (InFlag.getNode())
03532     Ops.push_back(InFlag);
03533 
03534   if (isTailCall) {
03535     // We used to do:
03536     //// If this is the first return lowered for this function, add the regs
03537     //// to the liveout set for the function.
03538     // This isn't right, although it's probably harmless on x86; liveouts
03539     // should be computed from returns not tail calls.  Consider a void
03540     // function making a tail call to a function returning int.
03541     MF.getFrameInfo()->setHasTailCall();
03542     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
03543   }
03544 
03545   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
03546   InFlag = Chain.getValue(1);
03547 
03548   // Create the CALLSEQ_END node.
03549   unsigned NumBytesForCalleeToPop;
03550   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
03551                        DAG.getTarget().Options.GuaranteedTailCallOpt))
03552     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
03553   else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
03554            !Subtarget->getTargetTriple().isOSMSVCRT() &&
03555            SR == StackStructReturn)
03556     // If this is a call to a struct-return function, the callee
03557     // pops the hidden struct pointer, so we have to push it back.
03558     // This is common for Darwin/X86, Linux & Mingw32 targets.
03559     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
03560     NumBytesForCalleeToPop = 4;
03561   else
03562     NumBytesForCalleeToPop = 0;  // Callee pops nothing.
03563 
03564   // Returns a flag for retval copy to use.
03565   if (!IsSibcall) {
03566     Chain = DAG.getCALLSEQ_END(Chain,
03567                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
03568                                DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
03569                                                      true),
03570                                InFlag, dl);
03571     InFlag = Chain.getValue(1);
03572   }
03573 
03574   // Handle result values, copying them out of physregs into vregs that we
03575   // return.
03576   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
03577                          Ins, dl, DAG, InVals);
03578 }
03579 
03580 //===----------------------------------------------------------------------===//
03581 //                Fast Calling Convention (tail call) implementation
03582 //===----------------------------------------------------------------------===//
03583 
03584 //  Like std call, callee cleans arguments, convention except that ECX is
03585 //  reserved for storing the tail called function address. Only 2 registers are
03586 //  free for argument passing (inreg). Tail call optimization is performed
03587 //  provided:
03588 //                * tailcallopt is enabled
03589 //                * caller/callee are fastcc
03590 //  On X86_64 architecture with GOT-style position independent code only local
03591 //  (within module) calls are supported at the moment.
03592 //  To keep the stack aligned according to platform abi the function
03593 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
03594 //  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
03595 //  If a tail called function callee has more arguments than the caller the
03596 //  caller needs to make sure that there is room to move the RETADDR to. This is
03597 //  achieved by reserving an area the size of the argument delta right after the
03598 //  original RETADDR, but before the saved framepointer or the spilled registers
03599 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
03600 //  stack layout:
03601 //    arg1
03602 //    arg2
03603 //    RETADDR
03604 //    [ new RETADDR
03605 //      move area ]
03606 //    (possible EBP)
03607 //    ESI
03608 //    EDI
03609 //    local1 ..
03610 
03611 /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
03612 /// requirement.
03613 unsigned
03614 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
03615                                                SelectionDAG& DAG) const {
03616   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03617   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
03618   unsigned StackAlignment = TFI.getStackAlignment();
03619   uint64_t AlignMask = StackAlignment - 1;
03620   int64_t Offset = StackSize;
03621   unsigned SlotSize = RegInfo->getSlotSize();
03622   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
03623     // Number smaller than 12 so just add the difference.
03624     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
03625   } else {
03626     // Mask out lower bits, add stackalignment once plus the 12 bytes.
03627     Offset = ((~AlignMask) & Offset) + StackAlignment +
03628       (StackAlignment-SlotSize);
03629   }
03630   return Offset;
03631 }
03632 
03633 /// Return true if the given stack call argument is already available in the
03634 /// same position (relatively) of the caller's incoming argument stack.
03635 static
03636 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
03637                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
03638                          const X86InstrInfo *TII) {
03639   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
03640   int FI = INT_MAX;
03641   if (Arg.getOpcode() == ISD::CopyFromReg) {
03642     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
03643     if (!TargetRegisterInfo::isVirtualRegister(VR))
03644       return false;
03645     MachineInstr *Def = MRI->getVRegDef(VR);
03646     if (!Def)
03647       return false;
03648     if (!Flags.isByVal()) {
03649       if (!TII->isLoadFromStackSlot(Def, FI))
03650         return false;
03651     } else {
03652       unsigned Opcode = Def->getOpcode();
03653       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
03654            Opcode == X86::LEA64_32r) &&
03655           Def->getOperand(1).isFI()) {
03656         FI = Def->getOperand(1).getIndex();
03657         Bytes = Flags.getByValSize();
03658       } else
03659         return false;
03660     }
03661   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
03662     if (Flags.isByVal())
03663       // ByVal argument is passed in as a pointer but it's now being
03664       // dereferenced. e.g.
03665       // define @foo(%struct.X* %A) {
03666       //   tail call @bar(%struct.X* byval %A)
03667       // }
03668       return false;
03669     SDValue Ptr = Ld->getBasePtr();
03670     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
03671     if (!FINode)
03672       return false;
03673     FI = FINode->getIndex();
03674   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
03675     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
03676     FI = FINode->getIndex();
03677     Bytes = Flags.getByValSize();
03678   } else
03679     return false;
03680 
03681   assert(FI != INT_MAX);
03682   if (!MFI->isFixedObjectIndex(FI))
03683     return false;
03684   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
03685 }
03686 
03687 /// Check whether the call is eligible for tail call optimization. Targets
03688 /// that want to do tail call optimization should implement this function.
03689 bool X86TargetLowering::IsEligibleForTailCallOptimization(
03690     SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
03691     bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
03692     const SmallVectorImpl<ISD::OutputArg> &Outs,
03693     const SmallVectorImpl<SDValue> &OutVals,
03694     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
03695   if (!mayTailCallThisCC(CalleeCC))
03696     return false;
03697 
03698   // If -tailcallopt is specified, make fastcc functions tail-callable.
03699   MachineFunction &MF = DAG.getMachineFunction();
03700   const Function *CallerF = MF.getFunction();
03701 
03702   // If the function return type is x86_fp80 and the callee return type is not,
03703   // then the FP_EXTEND of the call result is not a nop. It's not safe to
03704   // perform a tailcall optimization here.
03705   if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
03706     return false;
03707 
03708   CallingConv::ID CallerCC = CallerF->getCallingConv();
03709   bool CCMatch = CallerCC == CalleeCC;
03710   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
03711   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
03712 
03713   // Win64 functions have extra shadow space for argument homing. Don't do the
03714   // sibcall if the caller and callee have mismatched expectations for this
03715   // space.
03716   if (IsCalleeWin64 != IsCallerWin64)
03717     return false;
03718 
03719   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
03720     if (canGuaranteeTCO(CalleeCC) && CCMatch)
03721       return true;
03722     return false;
03723   }
03724 
03725   // Look for obvious safe cases to perform tail call optimization that do not
03726   // require ABI changes. This is what gcc calls sibcall.
03727 
03728   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
03729   // emit a special epilogue.
03730   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03731   if (RegInfo->needsStackRealignment(MF))
03732     return false;
03733 
03734   // Also avoid sibcall optimization if either caller or callee uses struct
03735   // return semantics.
03736   if (isCalleeStructRet || isCallerStructRet)
03737     return false;
03738 
03739   // Do not sibcall optimize vararg calls unless all arguments are passed via
03740   // registers.
03741   if (isVarArg && !Outs.empty()) {
03742     // Optimizing for varargs on Win64 is unlikely to be safe without
03743     // additional testing.
03744     if (IsCalleeWin64 || IsCallerWin64)
03745       return false;
03746 
03747     SmallVector<CCValAssign, 16> ArgLocs;
03748     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03749                    *DAG.getContext());
03750 
03751     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03752     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
03753       if (!ArgLocs[i].isRegLoc())
03754         return false;
03755   }
03756 
03757   // If the call result is in ST0 / ST1, it needs to be popped off the x87
03758   // stack.  Therefore, if it's not used by the call it is not safe to optimize
03759   // this into a sibcall.
03760   bool Unused = false;
03761   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
03762     if (!Ins[i].Used) {
03763       Unused = true;
03764       break;
03765     }
03766   }
03767   if (Unused) {
03768     SmallVector<CCValAssign, 16> RVLocs;
03769     CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
03770                    *DAG.getContext());
03771     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
03772     for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
03773       CCValAssign &VA = RVLocs[i];
03774       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
03775         return false;
03776     }
03777   }
03778 
03779   // If the calling conventions do not match, then we'd better make sure the
03780   // results are returned in the same way as what the caller expects.
03781   if (!CCMatch) {
03782     SmallVector<CCValAssign, 16> RVLocs1;
03783     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
03784                     *DAG.getContext());
03785     CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
03786 
03787     SmallVector<CCValAssign, 16> RVLocs2;
03788     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
03789                     *DAG.getContext());
03790     CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
03791 
03792     if (RVLocs1.size() != RVLocs2.size())
03793       return false;
03794     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
03795       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
03796         return false;
03797       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
03798         return false;
03799       if (RVLocs1[i].isRegLoc()) {
03800         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
03801           return false;
03802       } else {
03803         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
03804           return false;
03805       }
03806     }
03807   }
03808 
03809   unsigned StackArgsSize = 0;
03810 
03811   // If the callee takes no arguments then go on to check the results of the
03812   // call.
03813   if (!Outs.empty()) {
03814     // Check if stack adjustment is needed. For now, do not do this if any
03815     // argument is passed on the stack.
03816     SmallVector<CCValAssign, 16> ArgLocs;
03817     CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
03818                    *DAG.getContext());
03819 
03820     // Allocate shadow area for Win64
03821     if (IsCalleeWin64)
03822       CCInfo.AllocateStack(32, 8);
03823 
03824     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
03825     StackArgsSize = CCInfo.getNextStackOffset();
03826 
03827     if (CCInfo.getNextStackOffset()) {
03828       // Check if the arguments are already laid out in the right way as
03829       // the caller's fixed stack objects.
03830       MachineFrameInfo *MFI = MF.getFrameInfo();
03831       const MachineRegisterInfo *MRI = &MF.getRegInfo();
03832       const X86InstrInfo *TII = Subtarget->getInstrInfo();
03833       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03834         CCValAssign &VA = ArgLocs[i];
03835         SDValue Arg = OutVals[i];
03836         ISD::ArgFlagsTy Flags = Outs[i].Flags;
03837         if (VA.getLocInfo() == CCValAssign::Indirect)
03838           return false;
03839         if (!VA.isRegLoc()) {
03840           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
03841                                    MFI, MRI, TII))
03842             return false;
03843         }
03844       }
03845     }
03846 
03847     // If the tailcall address may be in a register, then make sure it's
03848     // possible to register allocate for it. In 32-bit, the call address can
03849     // only target EAX, EDX, or ECX since the tail call must be scheduled after
03850     // callee-saved registers are restored. These happen to be the same
03851     // registers used to pass 'inreg' arguments so watch out for those.
03852     if (!Subtarget->is64Bit() &&
03853         ((!isa<GlobalAddressSDNode>(Callee) &&
03854           !isa<ExternalSymbolSDNode>(Callee)) ||
03855          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03856       unsigned NumInRegs = 0;
03857       // In PIC we need an extra register to formulate the address computation
03858       // for the callee.
03859       unsigned MaxInRegs =
03860         (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
03861 
03862       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03863         CCValAssign &VA = ArgLocs[i];
03864         if (!VA.isRegLoc())
03865           continue;
03866         unsigned Reg = VA.getLocReg();
03867         switch (Reg) {
03868         default: break;
03869         case X86::EAX: case X86::EDX: case X86::ECX:
03870           if (++NumInRegs == MaxInRegs)
03871             return false;
03872           break;
03873         }
03874       }
03875     }
03876   }
03877 
03878   bool CalleeWillPop =
03879       X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
03880                        MF.getTarget().Options.GuaranteedTailCallOpt);
03881 
03882   if (unsigned BytesToPop =
03883           MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
03884     // If we have bytes to pop, the callee must pop them.
03885     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
03886     if (!CalleePopMatches)
03887       return false;
03888   } else if (CalleeWillPop && StackArgsSize > 0) {
03889     // If we don't have bytes to pop, make sure the callee doesn't pop any.
03890     return false;
03891   }
03892 
03893   return true;
03894 }
03895 
03896 FastISel *
03897 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
03898                                   const TargetLibraryInfo *libInfo) const {
03899   return X86::createFastISel(funcInfo, libInfo);
03900 }
03901 
03902 //===----------------------------------------------------------------------===//
03903 //                           Other Lowering Hooks
03904 //===----------------------------------------------------------------------===//
03905 
03906 static bool MayFoldLoad(SDValue Op) {
03907   return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
03908 }
03909 
03910 static bool MayFoldIntoStore(SDValue Op) {
03911   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
03912 }
03913 
03914 static bool isTargetShuffle(unsigned Opcode) {
03915   switch(Opcode) {
03916   default: return false;
03917   case X86ISD::BLENDI:
03918   case X86ISD::PSHUFB:
03919   case X86ISD::PSHUFD:
03920   case X86ISD::PSHUFHW:
03921   case X86ISD::PSHUFLW:
03922   case X86ISD::SHUFP:
03923   case X86ISD::INSERTPS:
03924   case X86ISD::PALIGNR:
03925   case X86ISD::MOVLHPS:
03926   case X86ISD::MOVLHPD:
03927   case X86ISD::MOVHLPS:
03928   case X86ISD::MOVLPS:
03929   case X86ISD::MOVLPD:
03930   case X86ISD::MOVSHDUP:
03931   case X86ISD::MOVSLDUP:
03932   case X86ISD::MOVDDUP:
03933   case X86ISD::MOVSS:
03934   case X86ISD::MOVSD:
03935   case X86ISD::UNPCKL:
03936   case X86ISD::UNPCKH:
03937   case X86ISD::VPERMILPI:
03938   case X86ISD::VPERM2X128:
03939   case X86ISD::VPERMI:
03940   case X86ISD::VPERMV:
03941   case X86ISD::VPERMV3:
03942   case X86ISD::VZEXT_MOVL:
03943     return true;
03944   }
03945 }
03946 
03947 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
03948                                     SDValue V1, unsigned TargetMask,
03949                                     SelectionDAG &DAG) {
03950   switch(Opc) {
03951   default: llvm_unreachable("Unknown x86 shuffle node");
03952   case X86ISD::PSHUFD:
03953   case X86ISD::PSHUFHW:
03954   case X86ISD::PSHUFLW:
03955   case X86ISD::VPERMILPI:
03956   case X86ISD::VPERMI:
03957     return DAG.getNode(Opc, dl, VT, V1,
03958                        DAG.getConstant(TargetMask, dl, MVT::i8));
03959   }
03960 }
03961 
03962 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
03963                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
03964   switch(Opc) {
03965   default: llvm_unreachable("Unknown x86 shuffle node");
03966   case X86ISD::MOVLHPS:
03967   case X86ISD::MOVLHPD:
03968   case X86ISD::MOVHLPS:
03969   case X86ISD::MOVLPS:
03970   case X86ISD::MOVLPD:
03971   case X86ISD::MOVSS:
03972   case X86ISD::MOVSD:
03973   case X86ISD::UNPCKL:
03974   case X86ISD::UNPCKH:
03975     return DAG.getNode(Opc, dl, VT, V1, V2);
03976   }
03977 }
03978 
03979 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
03980   MachineFunction &MF = DAG.getMachineFunction();
03981   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
03982   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
03983   int ReturnAddrIndex = FuncInfo->getRAIndex();
03984 
03985   if (ReturnAddrIndex == 0) {
03986     // Set up a frame object for the return address.
03987     unsigned SlotSize = RegInfo->getSlotSize();
03988     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03989                                                            -(int64_t)SlotSize,
03990                                                            false);
03991     FuncInfo->setRAIndex(ReturnAddrIndex);
03992   }
03993 
03994   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
03995 }
03996 
03997 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
03998                                        bool hasSymbolicDisplacement) {
03999   // Offset should fit into 32 bit immediate field.
04000   if (!isInt<32>(Offset))
04001     return false;
04002 
04003   // If we don't have a symbolic displacement - we don't have any extra
04004   // restrictions.
04005   if (!hasSymbolicDisplacement)
04006     return true;
04007 
04008   // FIXME: Some tweaks might be needed for medium code model.
04009   if (M != CodeModel::Small && M != CodeModel::Kernel)
04010     return false;
04011 
04012   // For small code model we assume that latest object is 16MB before end of 31
04013   // bits boundary. We may also accept pretty large negative constants knowing
04014   // that all objects are in the positive half of address space.
04015   if (M == CodeModel::Small && Offset < 16*1024*1024)
04016     return true;
04017 
04018   // For kernel code model we know that all object resist in the negative half
04019   // of 32bits address space. We may not accept negative offsets, since they may
04020   // be just off and we may accept pretty large positive ones.
04021   if (M == CodeModel::Kernel && Offset >= 0)
04022     return true;
04023 
04024   return false;
04025 }
04026 
04027 /// Determines whether the callee is required to pop its own arguments.
04028 /// Callee pop is necessary to support tail calls.
04029 bool X86::isCalleePop(CallingConv::ID CallingConv,
04030                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
04031   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
04032   // can guarantee TCO.
04033   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
04034     return true;
04035 
04036   switch (CallingConv) {
04037   default:
04038     return false;
04039   case CallingConv::X86_StdCall:
04040   case CallingConv::X86_FastCall:
04041   case CallingConv::X86_ThisCall:
04042   case CallingConv::X86_VectorCall:
04043     return !is64Bit;
04044   }
04045 }
04046 
04047 /// \brief Return true if the condition is an unsigned comparison operation.
04048 static bool isX86CCUnsigned(unsigned X86CC) {
04049   switch (X86CC) {
04050   default: llvm_unreachable("Invalid integer condition!");
04051   case X86::COND_E:     return true;
04052   case X86::COND_G:     return false;
04053   case X86::COND_GE:    return false;
04054   case X86::COND_L:     return false;
04055   case X86::COND_LE:    return false;
04056   case X86::COND_NE:    return true;
04057   case X86::COND_B:     return true;
04058   case X86::COND_A:     return true;
04059   case X86::COND_BE:    return true;
04060   case X86::COND_AE:    return true;
04061   }
04062 }
04063 
04064 static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
04065   switch (SetCCOpcode) {
04066   default: llvm_unreachable("Invalid integer condition!");
04067   case ISD::SETEQ:  return X86::COND_E;
04068   case ISD::SETGT:  return X86::COND_G;
04069   case ISD::SETGE:  return X86::COND_GE;
04070   case ISD::SETLT:  return X86::COND_L;
04071   case ISD::SETLE:  return X86::COND_LE;
04072   case ISD::SETNE:  return X86::COND_NE;
04073   case ISD::SETULT: return X86::COND_B;
04074   case ISD::SETUGT: return X86::COND_A;
04075   case ISD::SETULE: return X86::COND_BE;
04076   case ISD::SETUGE: return X86::COND_AE;
04077   }
04078 }
04079 
04080 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific
04081 /// condition code, returning the condition code and the LHS/RHS of the
04082 /// comparison to make.
04083 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
04084                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
04085   if (!isFP) {
04086     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
04087       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
04088         // X > -1   -> X == 0, jump !sign.
04089         RHS = DAG.getConstant(0, DL, RHS.getValueType());
04090         return X86::COND_NS;
04091       }
04092       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
04093         // X < 0   -> X == 0, jump on sign.
04094         return X86::COND_S;
04095       }
04096       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
04097         // X < 1   -> X <= 0
04098         RHS = DAG.getConstant(0, DL, RHS.getValueType());
04099         return X86::COND_LE;
04100       }
04101     }
04102 
04103     return TranslateIntegerX86CC(SetCCOpcode);
04104   }
04105 
04106   // First determine if it is required or is profitable to flip the operands.
04107 
04108   // If LHS is a foldable load, but RHS is not, flip the condition.
04109   if (ISD::isNON_EXTLoad(LHS.getNode()) &&
04110       !ISD::isNON_EXTLoad(RHS.getNode())) {
04111     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
04112     std::swap(LHS, RHS);
04113   }
04114 
04115   switch (SetCCOpcode) {
04116   default: break;
04117   case ISD::SETOLT:
04118   case ISD::SETOLE:
04119   case ISD::SETUGT:
04120   case ISD::SETUGE:
04121     std::swap(LHS, RHS);
04122     break;
04123   }
04124 
04125   // On a floating point condition, the flags are set as follows:
04126   // ZF  PF  CF   op
04127   //  0 | 0 | 0 | X > Y
04128   //  0 | 0 | 1 | X < Y
04129   //  1 | 0 | 0 | X == Y
04130   //  1 | 1 | 1 | unordered
04131   switch (SetCCOpcode) {
04132   default: llvm_unreachable("Condcode should be pre-legalized away");
04133   case ISD::SETUEQ:
04134   case ISD::SETEQ:   return X86::COND_E;
04135   case ISD::SETOLT:              // flipped
04136   case ISD::SETOGT:
04137   case ISD::SETGT:   return X86::COND_A;
04138   case ISD::SETOLE:              // flipped
04139   case ISD::SETOGE:
04140   case ISD::SETGE:   return X86::COND_AE;
04141   case ISD::SETUGT:              // flipped
04142   case ISD::SETULT:
04143   case ISD::SETLT:   return X86::COND_B;
04144   case ISD::SETUGE:              // flipped
04145   case ISD::SETULE:
04146   case ISD::SETLE:   return X86::COND_BE;
04147   case ISD::SETONE:
04148   case ISD::SETNE:   return X86::COND_NE;
04149   case ISD::SETUO:   return X86::COND_P;
04150   case ISD::SETO:    return X86::COND_NP;
04151   case ISD::SETOEQ:
04152   case ISD::SETUNE:  return X86::COND_INVALID;
04153   }
04154 }
04155 
04156 /// Is there a floating point cmov for the specific X86 condition code?
04157 /// Current x86 isa includes the following FP cmov instructions:
04158 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
04159 static bool hasFPCMov(unsigned X86CC) {
04160   switch (X86CC) {
04161   default:
04162     return false;
04163   case X86::COND_B:
04164   case X86::COND_BE:
04165   case X86::COND_E:
04166   case X86::COND_P:
04167   case X86::COND_A:
04168   case X86::COND_AE:
04169   case X86::COND_NE:
04170   case X86::COND_NP:
04171     return true;
04172   }
04173 }
04174 
04175 
04176 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
04177                                            const CallInst &I,
04178                                            unsigned Intrinsic) const {
04179 
04180   const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
04181   if (!IntrData)
04182     return false;
04183 
04184   Info.opc = ISD::INTRINSIC_W_CHAIN;
04185   Info.readMem = false;
04186   Info.writeMem = false;
04187   Info.vol = false;
04188   Info.offset = 0;
04189 
04190   switch (IntrData->Type) {
04191   case LOADA:
04192   case LOADU: {
04193     Info.ptrVal = I.getArgOperand(0);
04194     Info.memVT = MVT::getVT(I.getType());
04195     Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1);
04196     Info.readMem = true;
04197     break;
04198   }
04199   case EXPAND_FROM_MEM: {
04200     Info.ptrVal = I.getArgOperand(0);
04201     Info.memVT = MVT::getVT(I.getType());
04202     Info.align = 1;
04203     Info.readMem = true;
04204     break;
04205   }
04206   case COMPRESS_TO_MEM: {
04207     Info.ptrVal = I.getArgOperand(0);
04208     Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
04209     Info.align = 1;
04210     Info.writeMem = true;
04211     break;
04212   }
04213   case TRUNCATE_TO_MEM_VI8:
04214   case TRUNCATE_TO_MEM_VI16:
04215   case TRUNCATE_TO_MEM_VI32: {
04216     Info.ptrVal = I.getArgOperand(0);
04217     MVT VT  = MVT::getVT(I.getArgOperand(1)->getType());
04218     MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
04219     if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
04220       ScalarVT = MVT::i8;
04221     else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
04222       ScalarVT = MVT::i16;
04223     else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
04224       ScalarVT = MVT::i32;
04225 
04226     Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
04227     Info.align = 1;
04228     Info.writeMem = true;
04229     break;
04230   }
04231   case STOREA:
04232   case STOREANT:
04233   case STOREU: {
04234     Info.ptrVal = I.getArgOperand(0);
04235     Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
04236     Info.align = (IntrData->Type == STOREU ? 1 : Info.memVT.getSizeInBits()/8);
04237     Info.writeMem = true;
04238     break;
04239   }
04240   default:
04241     return false;
04242   }
04243 
04244   return true;
04245 }
04246 
04247 /// Returns true if the target can instruction select the
04248 /// specified FP immediate natively. If false, the legalizer will
04249 /// materialize the FP immediate as a load from a constant pool.
04250 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
04251   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
04252     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
04253       return true;
04254   }
04255   return false;
04256 }
04257 
04258 bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
04259                                               ISD::LoadExtType ExtTy,
04260                                               EVT NewVT) const {
04261   // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
04262   // relocation target a movq or addq instruction: don't let the load shrink.
04263   SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
04264   if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
04265     if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
04266       return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
04267   return true;
04268 }
04269 
04270 /// \brief Returns true if it is beneficial to convert a load of a constant
04271 /// to just the constant itself.
04272 bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
04273                                                           Type *Ty) const {
04274   assert(Ty->isIntegerTy());
04275 
04276   unsigned BitSize = Ty->getPrimitiveSizeInBits();
04277   if (BitSize == 0 || BitSize > 64)
04278     return false;
04279   return true;
04280 }
04281 
04282 bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
04283                                                 unsigned Index) const {
04284   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
04285     return false;
04286 
04287   return (Index == 0 || Index == ResVT.getVectorNumElements());
04288 }
04289 
04290 bool X86TargetLowering::isCheapToSpeculateCttz() const {
04291   // Speculate cttz only if we can directly use TZCNT.
04292   return Subtarget->hasBMI();
04293 }
04294 
04295 bool X86TargetLowering::isCheapToSpeculateCtlz() const {
04296   // Speculate ctlz only if we can directly use LZCNT.
04297   return Subtarget->hasLZCNT();
04298 }
04299 
04300 /// Return true if every element in Mask, beginning
04301 /// from position Pos and ending in Pos+Size is undef.
04302 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
04303   for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
04304     if (0 <= Mask[i])
04305       return false;
04306   return true;
04307 }
04308 
04309 /// Return true if Val is undef or if its value falls within the
04310 /// specified range (L, H].
04311 static bool isUndefOrInRange(int Val, int Low, int Hi) {
04312   return (Val < 0) || (Val >= Low && Val < Hi);
04313 }
04314 
04315 /// Val is either less than zero (undef) or equal to the specified value.
04316 static bool isUndefOrEqual(int Val, int CmpVal) {
04317   return (Val < 0 || Val == CmpVal);
04318 }
04319 
04320 /// Return true if every element in Mask, beginning
04321 /// from position Pos and ending in Pos+Size, falls within the specified
04322 /// sequential range (Low, Low+Size]. or is undef.
04323 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
04324                                        unsigned Pos, unsigned Size, int Low) {
04325   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
04326     if (!isUndefOrEqual(Mask[i], Low))
04327       return false;
04328   return true;
04329 }
04330 
04331 /// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
04332 /// extract that is suitable for instruction that extract 128 or 256 bit vectors
04333 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
04334   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04335   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
04336     return false;
04337 
04338   // The index should be aligned on a vecWidth-bit boundary.
04339   uint64_t Index =
04340     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04341 
04342   MVT VT = N->getSimpleValueType(0);
04343   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04344   bool Result = (Index * ElSize) % vecWidth == 0;
04345 
04346   return Result;
04347 }
04348 
04349 /// Return true if the specified INSERT_SUBVECTOR
04350 /// operand specifies a subvector insert that is suitable for input to
04351 /// insertion of 128 or 256-bit subvectors
04352 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
04353   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
04354   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
04355     return false;
04356   // The index should be aligned on a vecWidth-bit boundary.
04357   uint64_t Index =
04358     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04359 
04360   MVT VT = N->getSimpleValueType(0);
04361   unsigned ElSize = VT.getVectorElementType().getSizeInBits();
04362   bool Result = (Index * ElSize) % vecWidth == 0;
04363 
04364   return Result;
04365 }
04366 
04367 bool X86::isVINSERT128Index(SDNode *N) {
04368   return isVINSERTIndex(N, 128);
04369 }
04370 
04371 bool X86::isVINSERT256Index(SDNode *N) {
04372   return isVINSERTIndex(N, 256);
04373 }
04374 
04375 bool X86::isVEXTRACT128Index(SDNode *N) {
04376   return isVEXTRACTIndex(N, 128);
04377 }
04378 
04379 bool X86::isVEXTRACT256Index(SDNode *N) {
04380   return isVEXTRACTIndex(N, 256);
04381 }
04382 
04383 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
04384   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04385   assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
04386          "Illegal extract subvector for VEXTRACT");
04387 
04388   uint64_t Index =
04389     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
04390 
04391   MVT VecVT = N->getOperand(0).getSimpleValueType();
04392   MVT ElVT = VecVT.getVectorElementType();
04393 
04394   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04395   return Index / NumElemsPerChunk;
04396 }
04397 
04398 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
04399   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
04400   assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
04401          "Illegal insert subvector for VINSERT");
04402 
04403   uint64_t Index =
04404     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
04405 
04406   MVT VecVT = N->getSimpleValueType(0);
04407   MVT ElVT = VecVT.getVectorElementType();
04408 
04409   unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
04410   return Index / NumElemsPerChunk;
04411 }
04412 
04413 /// Return the appropriate immediate to extract the specified
04414 /// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
04415 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
04416   return getExtractVEXTRACTImmediate(N, 128);
04417 }
04418 
04419 /// Return the appropriate immediate to extract the specified
04420 /// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
04421 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
04422   return getExtractVEXTRACTImmediate(N, 256);
04423 }
04424 
04425 /// Return the appropriate immediate to insert at the specified
04426 /// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
04427 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
04428   return getInsertVINSERTImmediate(N, 128);
04429 }
04430 
04431 /// Return the appropriate immediate to insert at the specified
04432 /// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
04433 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
04434   return getInsertVINSERTImmediate(N, 256);
04435 }
04436 
04437 /// Returns true if Elt is a constant zero or a floating point constant +0.0.
04438 bool X86::isZeroNode(SDValue Elt) {
04439   return isNullConstant(Elt) || isNullFPConstant(Elt);
04440 }
04441 
04442 // Build a vector of constants
04443 // Use an UNDEF node if MaskElt == -1.
04444 // Spilt 64-bit constants in the 32-bit mode.
04445 static SDValue getConstVector(ArrayRef<int> Values, MVT VT,
04446                               SelectionDAG &DAG,
04447                               SDLoc dl, bool IsMask = false) {
04448 
04449   SmallVector<SDValue, 32>  Ops;
04450   bool Split = false;
04451 
04452   MVT ConstVecVT = VT;
04453   unsigned NumElts = VT.getVectorNumElements();
04454   bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
04455   if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
04456     ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
04457     Split = true;
04458   }
04459 
04460   MVT EltVT = ConstVecVT.getVectorElementType();
04461   for (unsigned i = 0; i < NumElts; ++i) {
04462     bool IsUndef = Values[i] < 0 && IsMask;
04463     SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
04464       DAG.getConstant(Values[i], dl, EltVT);
04465     Ops.push_back(OpNode);
04466     if (Split)
04467       Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
04468                     DAG.getConstant(0, dl, EltVT));
04469   }
04470   SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops);
04471   if (Split)
04472     ConstsNode = DAG.getBitcast(VT, ConstsNode);
04473   return ConstsNode;
04474 }
04475 
04476 /// Returns a vector of specified type with all zero elements.
04477 static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
04478                              SelectionDAG &DAG, SDLoc dl) {
04479   assert(VT.isVector() && "Expected a vector type");
04480 
04481   // Always build SSE zero vectors as <4 x i32> bitcasted
04482   // to their dest type. This ensures they get CSE'd.
04483   SDValue Vec;
04484   if (VT.is128BitVector()) {  // SSE
04485     if (Subtarget->hasSSE2()) {  // SSE2
04486       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04487       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04488     } else { // SSE1
04489       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04490       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
04491     }
04492   } else if (VT.is256BitVector()) { // AVX
04493     if (Subtarget->hasInt256()) { // AVX2
04494       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04495       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04496       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04497     } else {
04498       // 256-bit logic and arithmetic instructions in AVX are all
04499       // floating-point, no support for integer ops. Emit fp zeroed vectors.
04500       SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
04501       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04502       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
04503     }
04504   } else if (VT.is512BitVector()) { // AVX-512
04505       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
04506       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
04507                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04508       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
04509   } else if (VT.getVectorElementType() == MVT::i1) {
04510 
04511     assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
04512             && "Unexpected vector type");
04513     assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
04514             && "Unexpected vector type");
04515     SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
04516     SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
04517     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
04518   } else
04519     llvm_unreachable("Unexpected vector type");
04520 
04521   return DAG.getBitcast(VT, Vec);
04522 }
04523 
04524 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
04525                                 SelectionDAG &DAG, SDLoc dl,
04526                                 unsigned vectorWidth) {
04527   assert((vectorWidth == 128 || vectorWidth == 256) &&
04528          "Unsupported vector width");
04529   EVT VT = Vec.getValueType();
04530   EVT ElVT = VT.getVectorElementType();
04531   unsigned Factor = VT.getSizeInBits()/vectorWidth;
04532   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
04533                                   VT.getVectorNumElements()/Factor);
04534 
04535   // Extract from UNDEF is UNDEF.
04536   if (Vec.getOpcode() == ISD::UNDEF)
04537     return DAG.getUNDEF(ResultVT);
04538 
04539   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
04540   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
04541   assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
04542 
04543   // This is the index of the first element of the vectorWidth-bit chunk
04544   // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
04545   IdxVal &= ~(ElemsPerChunk - 1);
04546 
04547   // If the input is a buildvector just emit a smaller one.
04548   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
04549     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
04550                        makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
04551 
04552   SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
04553   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
04554 }
04555 
04556 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
04557 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
04558 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
04559 /// instructions or a simple subregister reference. Idx is an index in the
04560 /// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
04561 /// lowering EXTRACT_VECTOR_ELT operations easier.
04562 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
04563                                    SelectionDAG &DAG, SDLoc dl) {
04564   assert((Vec.getValueType().is256BitVector() ||
04565           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
04566   return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
04567 }
04568 
04569 /// Generate a DAG to grab 256-bits from a 512-bit vector.
04570 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
04571                                    SelectionDAG &DAG, SDLoc dl) {
04572   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
04573   return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
04574 }
04575 
04576 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
04577                                unsigned IdxVal, SelectionDAG &DAG,
04578                                SDLoc dl, unsigned vectorWidth) {
04579   assert((vectorWidth == 128 || vectorWidth == 256) &&
04580          "Unsupported vector width");
04581   // Inserting UNDEF is Result
04582   if (Vec.getOpcode() == ISD::UNDEF)
04583     return Result;
04584   EVT VT = Vec.getValueType();
04585   EVT ElVT = VT.getVectorElementType();
04586   EVT ResultVT = Result.getValueType();
04587 
04588   // Insert the relevant vectorWidth bits.
04589   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
04590   assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
04591 
04592   // This is the index of the first element of the vectorWidth-bit chunk
04593   // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
04594   IdxVal &= ~(ElemsPerChunk - 1);
04595 
04596   SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
04597   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
04598 }
04599 
04600 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
04601 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
04602 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
04603 /// simple superregister reference.  Idx is an index in the 128 bits
04604 /// we want.  It need not be aligned to a 128-bit boundary.  That makes
04605 /// lowering INSERT_VECTOR_ELT operations easier.
04606 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04607                                   SelectionDAG &DAG, SDLoc dl) {
04608   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
04609 
04610   // For insertion into the zero index (low half) of a 256-bit vector, it is
04611   // more efficient to generate a blend with immediate instead of an insert*128.
04612   // We are still creating an INSERT_SUBVECTOR below with an undef node to
04613   // extend the subvector to the size of the result vector. Make sure that
04614   // we are not recursing on that node by checking for undef here.
04615   if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
04616       Result.getOpcode() != ISD::UNDEF) {
04617     EVT ResultVT = Result.getValueType();
04618     SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
04619     SDValue Undef = DAG.getUNDEF(ResultVT);
04620     SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
04621                                  Vec, ZeroIndex);
04622 
04623     // The blend instruction, and therefore its mask, depend on the data type.
04624     MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
04625     if (ScalarType.isFloatingPoint()) {
04626       // Choose either vblendps (float) or vblendpd (double).
04627       unsigned ScalarSize = ScalarType.getSizeInBits();
04628       assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
04629       unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
04630       SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
04631       return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
04632     }
04633 
04634     const X86Subtarget &Subtarget =
04635     static_cast<const X86Subtarget &>(DAG.getSubtarget());
04636 
04637     // AVX2 is needed for 256-bit integer blend support.
04638     // Integers must be cast to 32-bit because there is only vpblendd;
04639     // vpblendw can't be used for this because it has a handicapped mask.
04640 
04641     // If we don't have AVX2, then cast to float. Using a wrong domain blend
04642     // is still more efficient than using the wrong domain vinsertf128 that
04643     // will be created by InsertSubVector().
04644     MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
04645 
04646     SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
04647     Result = DAG.getBitcast(CastVT, Result);
04648     Vec256 = DAG.getBitcast(CastVT, Vec256);
04649     Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
04650     return DAG.getBitcast(ResultVT, Vec256);
04651   }
04652 
04653   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
04654 }
04655 
04656 static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
04657                                   SelectionDAG &DAG, SDLoc dl) {
04658   assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
04659   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
04660 }
04661 
04662 /// Insert i1-subvector to i1-vector.
04663 static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) {
04664 
04665   SDLoc dl(Op);
04666   SDValue Vec = Op.getOperand(0);
04667   SDValue SubVec = Op.getOperand(1);
04668   SDValue Idx = Op.getOperand(2);
04669 
04670   if (!isa<ConstantSDNode>(Idx))
04671     return SDValue();
04672 
04673   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
04674   if (IdxVal == 0  && Vec.isUndef()) // the operation is legal
04675     return Op;
04676 
04677   MVT OpVT = Op.getSimpleValueType();
04678   MVT SubVecVT = SubVec.getSimpleValueType();
04679   unsigned NumElems = OpVT.getVectorNumElements();
04680   unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
04681 
04682   assert(IdxVal + SubVecNumElems <= NumElems &&
04683          IdxVal % SubVecVT.getSizeInBits() == 0 &&
04684          "Unexpected index value in INSERT_SUBVECTOR");
04685 
04686   // There are 3 possible cases:
04687   // 1. Subvector should be inserted in the lower part (IdxVal == 0)
04688   // 2. Subvector should be inserted in the upper part
04689   //    (IdxVal + SubVecNumElems == NumElems)
04690   // 3. Subvector should be inserted in the middle (for example v2i1
04691   //    to v16i1, index 2)
04692 
04693   SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
04694   SDValue Undef = DAG.getUNDEF(OpVT);
04695   SDValue WideSubVec =
04696     DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx);
04697   if (Vec.isUndef())
04698     return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
04699       DAG.getConstant(IdxVal, dl, MVT::i8));
04700 
04701   if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
04702     unsigned ShiftLeft = NumElems - SubVecNumElems;
04703     unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
04704     WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
04705       DAG.getConstant(ShiftLeft, dl, MVT::i8));
04706     return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec,
04707       DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec;
04708   }
04709 
04710   if (IdxVal == 0) {
04711     // Zero lower bits of the Vec
04712     SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
04713     Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
04714     Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
04715     // Merge them together
04716     return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
04717   }
04718 
04719   // Simple case when we put subvector in the upper part
04720   if (IdxVal + SubVecNumElems == NumElems) {
04721     // Zero upper bits of the Vec
04722     WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec,
04723                         DAG.getConstant(IdxVal, dl, MVT::i8));
04724     SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
04725     Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
04726     Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
04727     return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
04728   }
04729   // Subvector should be inserted in the middle - use shuffle
04730   SmallVector<int, 64> Mask;
04731   for (unsigned i = 0; i < NumElems; ++i)
04732     Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
04733                     i : i + NumElems);
04734   return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
04735 }
04736 
04737 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
04738 /// instructions. This is used because creating CONCAT_VECTOR nodes of
04739 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
04740 /// large BUILD_VECTORS.
04741 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
04742                                    unsigned NumElems, SelectionDAG &DAG,
04743                                    SDLoc dl) {
04744   SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04745   return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
04746 }
04747 
04748 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
04749                                    unsigned NumElems, SelectionDAG &DAG,
04750                                    SDLoc dl) {
04751   SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
04752   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
04753 }
04754 
04755 /// Returns a vector of specified type with all bits set.
04756 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
04757 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
04758 /// Then bitcast to their original type, ensuring they get CSE'd.
04759 static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
04760                              SelectionDAG &DAG, SDLoc dl) {
04761   assert(VT.isVector() && "Expected a vector type");
04762 
04763   SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
04764   SDValue Vec;
04765   if (VT.is512BitVector()) {
04766     SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
04767                       Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04768     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
04769   } else if (VT.is256BitVector()) {
04770     if (Subtarget->hasInt256()) { // AVX2
04771       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
04772       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
04773     } else { // AVX
04774       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04775       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
04776     }
04777   } else if (VT.is128BitVector()) {
04778     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
04779   } else
04780     llvm_unreachable("Unexpected vector type");
04781 
04782   return DAG.getBitcast(VT, Vec);
04783 }
04784 
04785 /// Returns a vector_shuffle node for an unpackl operation.
04786 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04787                           SDValue V2) {
04788   unsigned NumElems = VT.getVectorNumElements();
04789   SmallVector<int, 8> Mask;
04790   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
04791     Mask.push_back(i);
04792     Mask.push_back(i + NumElems);
04793   }
04794   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04795 }
04796 
04797 /// Returns a vector_shuffle node for an unpackh operation.
04798 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
04799                           SDValue V2) {
04800   unsigned NumElems = VT.getVectorNumElements();
04801   SmallVector<int, 8> Mask;
04802   for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
04803     Mask.push_back(i + Half);
04804     Mask.push_back(i + NumElems + Half);
04805   }
04806   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
04807 }
04808 
04809 /// Return a vector_shuffle of the specified vector of zero or undef vector.
04810 /// This produces a shuffle where the low element of V2 is swizzled into the
04811 /// zero/undef vector, landing at element Idx.
04812 /// This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
04813 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
04814                                            bool IsZero,
04815                                            const X86Subtarget *Subtarget,
04816                                            SelectionDAG &DAG) {
04817   MVT VT = V2.getSimpleValueType();
04818   SDValue V1 = IsZero
04819     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
04820   unsigned NumElems = VT.getVectorNumElements();
04821   SmallVector<int, 16> MaskVec;
04822   for (unsigned i = 0; i != NumElems; ++i)
04823     // If this is the insertion idx, put the low elt of V2 here.
04824     MaskVec.push_back(i == Idx ? NumElems : i);
04825   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
04826 }
04827 
04828 /// Calculates the shuffle mask corresponding to the target-specific opcode.
04829 /// Returns true if the Mask could be calculated. Sets IsUnary to true if only
04830 /// uses one source. Note that this will set IsUnary for shuffles which use a
04831 /// single input multiple times, and in those cases it will
04832 /// adjust the mask to only have indices within that single input.
04833 static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
04834                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
04835   unsigned NumElems = VT.getVectorNumElements();
04836   SDValue ImmN;
04837 
04838   IsUnary = false;
04839   bool IsFakeUnary = false;
04840   switch(N->getOpcode()) {
04841   case X86ISD::BLENDI:
04842     ImmN = N->getOperand(N->getNumOperands()-1);
04843     DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04844     break;
04845   case X86ISD::SHUFP:
04846     ImmN = N->getOperand(N->getNumOperands()-1);
04847     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04848     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04849     break;
04850   case X86ISD::INSERTPS:
04851     ImmN = N->getOperand(N->getNumOperands()-1);
04852     DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04853     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04854     break;
04855   case X86ISD::UNPCKH:
04856     DecodeUNPCKHMask(VT, Mask);
04857     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04858     break;
04859   case X86ISD::UNPCKL:
04860     DecodeUNPCKLMask(VT, Mask);
04861     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04862     break;
04863   case X86ISD::MOVHLPS:
04864     DecodeMOVHLPSMask(NumElems, Mask);
04865     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04866     break;
04867   case X86ISD::MOVLHPS:
04868     DecodeMOVLHPSMask(NumElems, Mask);
04869     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04870     break;
04871   case X86ISD::PALIGNR:
04872     ImmN = N->getOperand(N->getNumOperands()-1);
04873     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04874     break;
04875   case X86ISD::PSHUFD:
04876   case X86ISD::VPERMILPI:
04877     ImmN = N->getOperand(N->getNumOperands()-1);
04878     DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04879     IsUnary = true;
04880     break;
04881   case X86ISD::PSHUFHW:
04882     ImmN = N->getOperand(N->getNumOperands()-1);
04883     DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04884     IsUnary = true;
04885     break;
04886   case X86ISD::PSHUFLW:
04887     ImmN = N->getOperand(N->getNumOperands()-1);
04888     DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04889     IsUnary = true;
04890     break;
04891   case X86ISD::VZEXT_MOVL:
04892     DecodeZeroMoveLowMask(VT, Mask);
04893     IsUnary = true;
04894     break;
04895   case X86ISD::PSHUFB: {
04896     IsUnary = true;
04897     SDValue MaskNode = N->getOperand(1);
04898     while (MaskNode->getOpcode() == ISD::BITCAST)
04899       MaskNode = MaskNode->getOperand(0);
04900 
04901     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
04902       // If we have a build-vector, then things are easy.
04903       MVT VT = MaskNode.getSimpleValueType();
04904       assert(VT.isVector() &&
04905              "Can't produce a non-vector with a build_vector!");
04906       if (!VT.isInteger())
04907         return false;
04908 
04909       int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
04910 
04911       SmallVector<uint64_t, 32> RawMask;
04912       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
04913         SDValue Op = MaskNode->getOperand(i);
04914         if (Op->getOpcode() == ISD::UNDEF) {
04915           RawMask.push_back((uint64_t)SM_SentinelUndef);
04916           continue;
04917         }
04918         auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
04919         if (!CN)
04920           return false;
04921         APInt MaskElement = CN->getAPIntValue();
04922 
04923         // We now have to decode the element which could be any integer size and
04924         // extract each byte of it.
04925         for (int j = 0; j < NumBytesPerElement; ++j) {
04926           // Note that this is x86 and so always little endian: the low byte is
04927           // the first byte of the mask.
04928           RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
04929           MaskElement = MaskElement.lshr(8);
04930         }
04931       }
04932       DecodePSHUFBMask(RawMask, Mask);
04933       break;
04934     }
04935 
04936     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
04937     if (!MaskLoad)
04938       return false;
04939 
04940     SDValue Ptr = MaskLoad->getBasePtr();
04941     if (Ptr->getOpcode() == X86ISD::Wrapper ||
04942         Ptr->getOpcode() == X86ISD::WrapperRIP)
04943       Ptr = Ptr->getOperand(0);
04944 
04945     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
04946     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
04947       return false;
04948 
04949     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
04950       DecodePSHUFBMask(C, Mask);
04951       break;
04952     }
04953 
04954     return false;
04955   }
04956   case X86ISD::VPERMI:
04957     ImmN = N->getOperand(N->getNumOperands()-1);
04958     DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04959     IsUnary = true;
04960     break;
04961   case X86ISD::MOVSS:
04962   case X86ISD::MOVSD:
04963     DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
04964     break;
04965   case X86ISD::VPERM2X128:
04966     ImmN = N->getOperand(N->getNumOperands()-1);
04967     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
04968     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
04969     break;
04970   case X86ISD::MOVSLDUP:
04971     DecodeMOVSLDUPMask(VT, Mask);
04972     IsUnary = true;
04973     break;
04974   case X86ISD::MOVSHDUP:
04975     DecodeMOVSHDUPMask(VT, Mask);
04976     IsUnary = true;
04977     break;
04978   case X86ISD::MOVDDUP:
04979     DecodeMOVDDUPMask(VT, Mask);
04980     IsUnary = true;
04981     break;
04982   case X86ISD::MOVLHPD:
04983   case X86ISD::MOVLPD:
04984   case X86ISD::MOVLPS:
04985     // Not yet implemented
04986     return false;
04987   case X86ISD::VPERMV: {
04988     IsUnary = true;
04989     SDValue MaskNode = N->getOperand(0);
04990     while (MaskNode->getOpcode() == ISD::BITCAST)
04991       MaskNode = MaskNode->getOperand(0);
04992 
04993     unsigned MaskLoBits = Log2_64(VT.getVectorNumElements());
04994     SmallVector<uint64_t, 32> RawMask;
04995     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
04996       // If we have a build-vector, then things are easy.
04997       assert(MaskNode.getSimpleValueType().isInteger() &&
04998              MaskNode.getSimpleValueType().getVectorNumElements() ==
04999              VT.getVectorNumElements());
05000 
05001       for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
05002         SDValue Op = MaskNode->getOperand(i);
05003         if (Op->getOpcode() == ISD::UNDEF)
05004           RawMask.push_back((uint64_t)SM_SentinelUndef);
05005         else if (isa<ConstantSDNode>(Op)) {
05006           APInt MaskElement = cast<ConstantSDNode>(Op)->getAPIntValue();
05007           RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
05008         } else
05009           return false;
05010       }
05011       DecodeVPERMVMask(RawMask, Mask);
05012       break;
05013     }
05014     if (MaskNode->getOpcode() == X86ISD::VBROADCAST) {
05015       unsigned NumEltsInMask = MaskNode->getNumOperands();
05016       MaskNode = MaskNode->getOperand(0);
05017       if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode)) {
05018         APInt MaskEltValue = CN->getAPIntValue();
05019         for (unsigned i = 0; i < NumEltsInMask; ++i)
05020           RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue());
05021         DecodeVPERMVMask(RawMask, Mask);
05022         break;
05023       }
05024       // It may be a scalar load
05025     }
05026 
05027     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
05028     if (!MaskLoad)
05029       return false;
05030 
05031     SDValue Ptr = MaskLoad->getBasePtr();
05032     if (Ptr->getOpcode() == X86ISD::Wrapper ||
05033         Ptr->getOpcode() == X86ISD::WrapperRIP)
05034       Ptr = Ptr->getOperand(0);
05035 
05036     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
05037     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
05038       return false;
05039 
05040     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
05041       DecodeVPERMVMask(C, VT, Mask);
05042       break;
05043     }
05044     return false;
05045   }
05046   case X86ISD::VPERMV3: {
05047     IsUnary = false;
05048     SDValue MaskNode = N->getOperand(1);
05049     while (MaskNode->getOpcode() == ISD::BITCAST)
05050       MaskNode = MaskNode->getOperand(1);
05051 
05052     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
05053       // If we have a build-vector, then things are easy.
05054       assert(MaskNode.getSimpleValueType().isInteger() &&
05055              MaskNode.getSimpleValueType().getVectorNumElements() ==
05056              VT.getVectorNumElements());
05057 
05058       SmallVector<uint64_t, 32> RawMask;
05059       unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2);
05060 
05061       for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
05062         SDValue Op = MaskNode->getOperand(i);
05063         if (Op->getOpcode() == ISD::UNDEF)
05064           RawMask.push_back((uint64_t)SM_SentinelUndef);
05065         else {
05066           auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
05067           if (!CN)
05068             return false;
05069           APInt MaskElement = CN->getAPIntValue();
05070           RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
05071         }
05072       }
05073       DecodeVPERMV3Mask(RawMask, Mask);
05074       break;
05075     }
05076 
05077     auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
05078     if (!MaskLoad)
05079       return false;
05080 
05081     SDValue Ptr = MaskLoad->getBasePtr();
05082     if (Ptr->getOpcode() == X86ISD::Wrapper ||
05083         Ptr->getOpcode() == X86ISD::WrapperRIP)
05084       Ptr = Ptr->getOperand(0);
05085 
05086     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
05087     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
05088       return false;
05089 
05090     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
05091       DecodeVPERMV3Mask(C, VT, Mask);
05092       break;
05093     }
05094     return false;
05095   }
05096   default: llvm_unreachable("unknown target shuffle node");
05097   }
05098 
05099   // Empty mask indicates the decode failed.
05100   if (Mask.empty())
05101     return false;
05102 
05103   // Check if we're getting a shuffle mask with zero'd elements.
05104   if (!AllowSentinelZero)
05105     if (std::any_of(Mask.begin(), Mask.end(),
05106                     [](int M){ return M == SM_SentinelZero; }))
05107       return false;
05108 
05109   // If we have a fake unary shuffle, the shuffle mask is spread across two
05110   // inputs that are actually the same node. Re-map the mask to always point
05111   // into the first input.
05112   if (IsFakeUnary)
05113     for (int &M : Mask)
05114       if (M >= (int)Mask.size())
05115         M -= Mask.size();
05116 
05117   return true;
05118 }
05119 
05120 /// Returns the scalar element that will make up the ith
05121 /// element of the result of the vector shuffle.
05122 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
05123                                    unsigned Depth) {
05124   if (Depth == 6)
05125     return SDValue();  // Limit search depth.
05126 
05127   SDValue V = SDValue(N, 0);
05128   EVT VT = V.getValueType();
05129   unsigned Opcode = V.getOpcode();
05130 
05131   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
05132   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
05133     int Elt = SV->getMaskElt(Index);
05134 
05135     if (Elt < 0)
05136       return DAG.getUNDEF(VT.getVectorElementType());
05137 
05138     unsigned NumElems = VT.getVectorNumElements();
05139     SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
05140                                          : SV->getOperand(1);
05141     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
05142   }
05143 
05144   // Recurse into target specific vector shuffles to find scalars.
05145   if (isTargetShuffle(Opcode)) {
05146     MVT ShufVT = V.getSimpleValueType();
05147     int NumElems = (int)ShufVT.getVectorNumElements();
05148     SmallVector<int, 16> ShuffleMask;
05149     bool IsUnary;
05150 
05151     if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary))
05152       return SDValue();
05153 
05154     int Elt = ShuffleMask[Index];
05155     if (Elt == SM_SentinelUndef)
05156       return DAG.getUNDEF(ShufVT.getVectorElementType());
05157 
05158     assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
05159     SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1);
05160     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
05161                                Depth+1);
05162   }
05163 
05164   // Actual nodes that may contain scalar elements
05165   if (Opcode == ISD::BITCAST) {
05166     V = V.getOperand(0);
05167     EVT SrcVT = V.getValueType();
05168     unsigned NumElems = VT.getVectorNumElements();
05169 
05170     if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
05171       return SDValue();
05172   }
05173 
05174   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
05175     return (Index == 0) ? V.getOperand(0)
05176                         : DAG.getUNDEF(VT.getVectorElementType());
05177 
05178   if (V.getOpcode() == ISD::BUILD_VECTOR)
05179     return V.getOperand(Index);
05180 
05181   return SDValue();
05182 }
05183 
05184 /// Custom lower build_vector of v16i8.
05185 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
05186                                        unsigned NumNonZero, unsigned NumZero,
05187                                        SelectionDAG &DAG,
05188                                        const X86Subtarget* Subtarget,
05189                                        const TargetLowering &TLI) {
05190   if (NumNonZero > 8)
05191     return SDValue();
05192 
05193   SDLoc dl(Op);
05194   SDValue V;
05195   bool First = true;
05196 
05197   // SSE4.1 - use PINSRB to insert each byte directly.
05198   if (Subtarget->hasSSE41()) {
05199     for (unsigned i = 0; i < 16; ++i) {
05200       bool isNonZero = (NonZeros & (1 << i)) != 0;
05201       if (isNonZero) {
05202         if (First) {
05203           if (NumZero)
05204             V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
05205           else
05206             V = DAG.getUNDEF(MVT::v16i8);
05207           First = false;
05208         }
05209         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
05210                         MVT::v16i8, V, Op.getOperand(i),
05211                         DAG.getIntPtrConstant(i, dl));
05212       }
05213     }
05214 
05215     return V;
05216   }
05217 
05218   // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
05219   for (unsigned i = 0; i < 16; ++i) {
05220     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
05221     if (ThisIsNonZero && First) {
05222       if (NumZero)
05223         V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05224       else
05225         V = DAG.getUNDEF(MVT::v8i16);
05226       First = false;
05227     }
05228 
05229     if ((i & 1) != 0) {
05230       SDValue ThisElt, LastElt;
05231       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
05232       if (LastIsNonZero) {
05233         LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
05234                               MVT::i16, Op.getOperand(i-1));
05235       }
05236       if (ThisIsNonZero) {
05237         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
05238         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
05239                               ThisElt, DAG.getConstant(8, dl, MVT::i8));
05240         if (LastIsNonZero)
05241           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
05242       } else
05243         ThisElt = LastElt;
05244 
05245       if (ThisElt.getNode())
05246         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
05247                         DAG.getIntPtrConstant(i/2, dl));
05248     }
05249   }
05250 
05251   return DAG.getBitcast(MVT::v16i8, V);
05252 }
05253 
05254 /// Custom lower build_vector of v8i16.
05255 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
05256                                      unsigned NumNonZero, unsigned NumZero,
05257                                      SelectionDAG &DAG,
05258                                      const X86Subtarget* Subtarget,
05259                                      const TargetLowering &TLI) {
05260   if (NumNonZero > 4)
05261     return SDValue();
05262 
05263   SDLoc dl(Op);
05264   SDValue V;
05265   bool First = true;
05266   for (unsigned i = 0; i < 8; ++i) {
05267     bool isNonZero = (NonZeros & (1 << i)) != 0;
05268     if (isNonZero) {
05269       if (First) {
05270         if (NumZero)
05271           V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
05272         else
05273           V = DAG.getUNDEF(MVT::v8i16);
05274         First = false;
05275       }
05276       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
05277                       MVT::v8i16, V, Op.getOperand(i),
05278                       DAG.getIntPtrConstant(i, dl));
05279     }
05280   }
05281 
05282   return V;
05283 }
05284 
05285 /// Custom lower build_vector of v4i32 or v4f32.
05286 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
05287                                      const X86Subtarget *Subtarget,
05288                                      const TargetLowering &TLI) {
05289   // Find all zeroable elements.
05290   std::bitset<4> Zeroable;
05291   for (int i=0; i < 4; ++i) {
05292     SDValue Elt = Op->getOperand(i);
05293     Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
05294   }
05295   assert(Zeroable.size() - Zeroable.count() > 1 &&
05296          "We expect at least two non-zero elements!");
05297 
05298   // We only know how to deal with build_vector nodes where elements are either
05299   // zeroable or extract_vector_elt with constant index.
05300   SDValue FirstNonZero;
05301   unsigned FirstNonZeroIdx;
05302   for (unsigned i=0; i < 4; ++i) {
05303     if (Zeroable[i])
05304       continue;
05305     SDValue Elt = Op->getOperand(i);
05306     if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
05307         !isa<ConstantSDNode>(Elt.getOperand(1)))
05308       return SDValue();
05309     // Make sure that this node is extracting from a 128-bit vector.
05310     MVT VT = Elt.getOperand(0).getSimpleValueType();
05311     if (!VT.is128BitVector())
05312       return SDValue();
05313     if (!FirstNonZero.getNode()) {
05314       FirstNonZero = Elt;
05315       FirstNonZeroIdx = i;
05316     }
05317   }
05318 
05319   assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
05320   SDValue V1 = FirstNonZero.getOperand(0);
05321   MVT VT = V1.getSimpleValueType();
05322 
05323   // See if this build_vector can be lowered as a blend with zero.
05324   SDValue Elt;
05325   unsigned EltMaskIdx, EltIdx;
05326   int Mask[4];
05327   for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
05328     if (Zeroable[EltIdx]) {
05329       // The zero vector will be on the right hand side.
05330       Mask[EltIdx] = EltIdx+4;
05331       continue;
05332     }
05333 
05334     Elt = Op->getOperand(EltIdx);
05335     // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
05336     EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
05337     if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
05338       break;
05339     Mask[EltIdx] = EltIdx;
05340   }
05341 
05342   if (EltIdx == 4) {
05343     // Let the shuffle legalizer deal with blend operations.
05344     SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
05345     if (V1.getSimpleValueType() != VT)
05346       V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
05347     return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
05348   }
05349 
05350   // See if we can lower this build_vector to a INSERTPS.
05351   if (!Subtarget->hasSSE41())
05352     return SDValue();
05353 
05354   SDValue V2 = Elt.getOperand(0);
05355   if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
05356     V1 = SDValue();
05357 
05358   bool CanFold = true;
05359   for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
05360     if (Zeroable[i])
05361       continue;
05362 
05363     SDValue Current = Op->getOperand(i);
05364     SDValue SrcVector = Current->getOperand(0);
05365     if (!V1.getNode())
05366       V1 = SrcVector;
05367     CanFold = SrcVector == V1 &&
05368       cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
05369   }
05370 
05371   if (!CanFold)
05372     return SDValue();
05373 
05374   assert(V1.getNode() && "Expected at least two non-zero elements!");
05375   if (V1.getSimpleValueType() != MVT::v4f32)
05376     V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
05377   if (V2.getSimpleValueType() != MVT::v4f32)
05378     V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
05379 
05380   // Ok, we can emit an INSERTPS instruction.
05381   unsigned ZMask = Zeroable.to_ulong();
05382 
05383   unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
05384   assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
05385   SDLoc DL(Op);
05386   SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
05387                                DAG.getIntPtrConstant(InsertPSMask, DL));
05388   return DAG.getBitcast(VT, Result);
05389 }
05390 
05391 /// Return a vector logical shift node.
05392 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
05393                          unsigned NumBits, SelectionDAG &DAG,
05394                          const TargetLowering &TLI, SDLoc dl) {
05395   assert(VT.is128BitVector() && "Unknown type for VShift");
05396   MVT ShVT = MVT::v2i64;
05397   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
05398   SrcOp = DAG.getBitcast(ShVT, SrcOp);
05399   MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
05400   assert(NumBits % 8 == 0 && "Only support byte sized shifts");
05401   SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
05402   return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
05403 }
05404 
05405 static SDValue
05406 LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
05407 
05408   // Check if the scalar load can be widened into a vector load. And if
05409   // the address is "base + cst" see if the cst can be "absorbed" into
05410   // the shuffle mask.
05411   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
05412     SDValue Ptr = LD->getBasePtr();
05413     if (!ISD::isNormalLoad(LD) || LD->isVolatile())
05414       return SDValue();
05415     EVT PVT = LD->getValueType(0);
05416     if (PVT != MVT::i32 && PVT != MVT::f32)
05417       return SDValue();
05418 
05419     int FI = -1;
05420     int64_t Offset = 0;
05421     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
05422       FI = FINode->getIndex();
05423       Offset = 0;
05424     } else if (DAG.isBaseWithConstantOffset(Ptr) &&
05425                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
05426       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
05427       Offset = Ptr.getConstantOperandVal(1);
05428       Ptr = Ptr.getOperand(0);
05429     } else {
05430       return SDValue();
05431     }
05432 
05433     // FIXME: 256-bit vector instructions don't require a strict alignment,
05434     // improve this code to support it better.
05435     unsigned RequiredAlign = VT.getSizeInBits()/8;
05436     SDValue Chain = LD->getChain();
05437     // Make sure the stack object alignment is at least 16 or 32.
05438     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
05439     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
05440       if (MFI->isFixedObjectIndex(FI)) {
05441         // Can't change the alignment. FIXME: It's possible to compute
05442         // the exact stack offset and reference FI + adjust offset instead.
05443         // If someone *really* cares about this. That's the way to implement it.
05444         return SDValue();
05445       } else {
05446         MFI->setObjectAlignment(FI, RequiredAlign);
05447       }
05448     }
05449 
05450     // (Offset % 16 or 32) must be multiple of 4. Then address is then
05451     // Ptr + (Offset & ~15).
05452     if (Offset < 0)
05453       return SDValue();
05454     if ((Offset % RequiredAlign) & 3)
05455       return SDValue();
05456     int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
05457     if (StartOffset) {
05458       SDLoc DL(Ptr);
05459       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
05460                         DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
05461     }
05462 
05463     int EltNo = (Offset - StartOffset) >> 2;
05464     unsigned NumElems = VT.getVectorNumElements();
05465 
05466     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
05467     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
05468                              LD->getPointerInfo().getWithOffset(StartOffset),
05469                              false, false, false, 0);
05470 
05471     SmallVector<int, 8> Mask(NumElems, EltNo);
05472 
05473     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
05474   }
05475 
05476   return SDValue();
05477 }
05478 
05479 /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
05480 /// elements can be replaced by a single large load which has the same value as
05481 /// a build_vector or insert_subvector whose loaded operands are 'Elts'.
05482 ///
05483 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
05484 ///
05485 /// FIXME: we'd also like to handle the case where the last elements are zero
05486 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
05487 /// There's even a handy isZeroNode for that purpose.
05488 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
05489                                         SDLoc &DL, SelectionDAG &DAG,
05490                                         bool isAfterLegalize) {
05491   unsigned NumElems = Elts.size();
05492 
05493   LoadSDNode *LDBase = nullptr;
05494   unsigned LastLoadedElt = -1U;
05495 
05496   // For each element in the initializer, see if we've found a load or an undef.
05497   // If we don't find an initial load element, or later load elements are
05498   // non-consecutive, bail out.
05499   for (unsigned i = 0; i < NumElems; ++i) {
05500     SDValue Elt = Elts[i];
05501     // Look through a bitcast.
05502     if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST)
05503       Elt = Elt.getOperand(0);
05504     if (!Elt.getNode() ||
05505         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
05506       return SDValue();
05507     if (!LDBase) {
05508       if (Elt.getNode()->getOpcode() == ISD::UNDEF)
05509         return SDValue();
05510       LDBase = cast<LoadSDNode>(Elt.getNode());
05511       LastLoadedElt = i;
05512       continue;
05513     }
05514     if (Elt.getOpcode() == ISD::UNDEF)
05515       continue;
05516 
05517     LoadSDNode *LD = cast<LoadSDNode>(Elt);
05518     EVT LdVT = Elt.getValueType();
05519     // Each loaded element must be the correct fractional portion of the
05520     // requested vector load.
05521     if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems)
05522       return SDValue();
05523     if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i))
05524       return SDValue();
05525     LastLoadedElt = i;
05526   }
05527 
05528   // If we have found an entire vector of loads and undefs, then return a large
05529   // load of the entire vector width starting at the base pointer.  If we found
05530   // consecutive loads for the low half, generate a vzext_load node.
05531   if (LastLoadedElt == NumElems - 1) {
05532     assert(LDBase && "Did not find base load for merging consecutive loads");
05533     EVT EltVT = LDBase->getValueType(0);
05534     // Ensure that the input vector size for the merged loads matches the
05535     // cumulative size of the input elements.
05536     if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
05537       return SDValue();
05538 
05539     if (isAfterLegalize &&
05540         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
05541       return SDValue();
05542 
05543     SDValue NewLd = SDValue();
05544 
05545     NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
05546                         LDBase->getPointerInfo(), LDBase->isVolatile(),
05547                         LDBase->isNonTemporal(), LDBase->isInvariant(),
05548                         LDBase->getAlignment());
05549 
05550     if (LDBase->hasAnyUseOfValue(1)) {
05551       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05552                                      SDValue(LDBase, 1),
05553                                      SDValue(NewLd.getNode(), 1));
05554       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05555       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05556                              SDValue(NewLd.getNode(), 1));
05557     }
05558 
05559     return NewLd;
05560   }
05561 
05562   //TODO: The code below fires only for for loading the low v2i32 / v2f32
05563   //of a v4i32 / v4f32. It's probably worth generalizing.
05564   EVT EltVT = VT.getVectorElementType();
05565   if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
05566       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
05567     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
05568     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
05569     SDValue ResNode =
05570         DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
05571                                 LDBase->getPointerInfo(),
05572                                 LDBase->getAlignment(),
05573                                 false/*isVolatile*/, true/*ReadMem*/,
05574                                 false/*WriteMem*/);
05575 
05576     // Make sure the newly-created LOAD is in the same position as LDBase in
05577     // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
05578     // update uses of LDBase's output chain to use the TokenFactor.
05579     if (LDBase->hasAnyUseOfValue(1)) {
05580       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
05581                              SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
05582       DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
05583       DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
05584                              SDValue(ResNode.getNode(), 1));
05585     }
05586 
05587     return DAG.getBitcast(VT, ResNode);
05588   }
05589   return SDValue();
05590 }
05591 
05592 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
05593 /// to generate a splat value for the following cases:
05594 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
05595 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
05596 /// a scalar load, or a constant.
05597 /// The VBROADCAST node is returned when a pattern is found,
05598 /// or SDValue() otherwise.
05599 static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
05600                                     SelectionDAG &DAG) {
05601   // VBROADCAST requires AVX.
05602   // TODO: Splats could be generated for non-AVX CPUs using SSE
05603   // instructions, but there's less potential gain for only 128-bit vectors.
05604   if (!Subtarget->hasAVX())
05605     return SDValue();
05606 
05607   MVT VT = Op.getSimpleValueType();
05608   SDLoc dl(Op);
05609 
05610   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
05611          "Unsupported vector type for broadcast.");
05612 
05613   SDValue Ld;
05614   bool ConstSplatVal;
05615 
05616   switch (Op.getOpcode()) {
05617     default:
05618       // Unknown pattern found.
05619       return SDValue();
05620 
05621     case ISD::BUILD_VECTOR: {
05622       auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
05623       BitVector UndefElements;
05624       SDValue Splat = BVOp->getSplatValue(&UndefElements);
05625 
05626       // We need a splat of a single value to use broadcast, and it doesn't
05627       // make any sense if the value is only in one element of the vector.
05628       if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
05629         return SDValue();
05630 
05631       Ld = Splat;
05632       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
05633                        Ld.getOpcode() == ISD::ConstantFP);
05634 
05635       // Make sure that all of the users of a non-constant load are from the
05636       // BUILD_VECTOR node.
05637       if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
05638         return SDValue();
05639       break;
05640     }
05641 
05642     case ISD::VECTOR_SHUFFLE: {
05643       ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
05644 
05645       // Shuffles must have a splat mask where the first