LLVM API Documentation
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the interfaces that X86 uses to lower LLVM code into a 00011 // selection DAG. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #define DEBUG_TYPE "x86-isel" 00016 #include "X86ISelLowering.h" 00017 #include "Utils/X86ShuffleDecode.h" 00018 #include "X86.h" 00019 #include "X86InstrBuilder.h" 00020 #include "X86TargetMachine.h" 00021 #include "X86TargetObjectFile.h" 00022 #include "llvm/ADT/SmallSet.h" 00023 #include "llvm/ADT/Statistic.h" 00024 #include "llvm/ADT/StringExtras.h" 00025 #include "llvm/ADT/VariadicFunction.h" 00026 #include "llvm/CodeGen/IntrinsicLowering.h" 00027 #include "llvm/CodeGen/MachineFrameInfo.h" 00028 #include "llvm/CodeGen/MachineFunction.h" 00029 #include "llvm/CodeGen/MachineInstrBuilder.h" 00030 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00031 #include "llvm/CodeGen/MachineModuleInfo.h" 00032 #include "llvm/CodeGen/MachineRegisterInfo.h" 00033 #include "llvm/IR/CallingConv.h" 00034 #include "llvm/IR/Constants.h" 00035 #include "llvm/IR/DerivedTypes.h" 00036 #include "llvm/IR/Function.h" 00037 #include "llvm/IR/GlobalAlias.h" 00038 #include "llvm/IR/GlobalVariable.h" 00039 #include "llvm/IR/Instructions.h" 00040 #include "llvm/IR/Intrinsics.h" 00041 #include "llvm/IR/LLVMContext.h" 00042 #include "llvm/MC/MCAsmInfo.h" 00043 #include "llvm/MC/MCContext.h" 00044 #include "llvm/MC/MCExpr.h" 00045 #include "llvm/MC/MCSymbol.h" 00046 #include "llvm/Support/CallSite.h" 00047 #include "llvm/Support/Debug.h" 00048 #include "llvm/Support/ErrorHandling.h" 00049 #include "llvm/Support/MathExtras.h" 00050 #include "llvm/Target/TargetOptions.h" 00051 #include <bitset> 00052 #include <cctype> 00053 using namespace llvm; 00054 00055 STATISTIC(NumTailCalls, "Number of tail calls"); 00056 00057 // Forward declarations. 00058 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, 00059 SDValue V2); 00060 00061 /// Generate a DAG to grab 128-bits from a vector > 128 bits. This 00062 /// sets things up to match to an AVX VEXTRACTF128 instruction or a 00063 /// simple subregister reference. Idx is an index in the 128 bits we 00064 /// want. It need not be aligned to a 128-bit bounday. That makes 00065 /// lowering EXTRACT_VECTOR_ELT operations easier. 00066 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, 00067 SelectionDAG &DAG, SDLoc dl) { 00068 EVT VT = Vec.getValueType(); 00069 assert(VT.is256BitVector() && "Unexpected vector size!"); 00070 EVT ElVT = VT.getVectorElementType(); 00071 unsigned Factor = VT.getSizeInBits()/128; 00072 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, 00073 VT.getVectorNumElements()/Factor); 00074 00075 // Extract from UNDEF is UNDEF. 00076 if (Vec.getOpcode() == ISD::UNDEF) 00077 return DAG.getUNDEF(ResultVT); 00078 00079 // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR 00080 // we can match to VEXTRACTF128. 00081 unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); 00082 00083 // This is the index of the first element of the 128-bit chunk 00084 // we want. 00085 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) 00086 * ElemsPerChunk); 00087 00088 // If the input is a buildvector just emit a smaller one. 00089 if (Vec.getOpcode() == ISD::BUILD_VECTOR) 00090 return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, 00091 Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); 00092 00093 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); 00094 SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, 00095 VecIdx); 00096 00097 return Result; 00098 } 00099 00100 /// Generate a DAG to put 128-bits into a vector > 128 bits. This 00101 /// sets things up to match to an AVX VINSERTF128 instruction or a 00102 /// simple superregister reference. Idx is an index in the 128 bits 00103 /// we want. It need not be aligned to a 128-bit bounday. That makes 00104 /// lowering INSERT_VECTOR_ELT operations easier. 00105 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, 00106 unsigned IdxVal, SelectionDAG &DAG, 00107 SDLoc dl) { 00108 // Inserting UNDEF is Result 00109 if (Vec.getOpcode() == ISD::UNDEF) 00110 return Result; 00111 00112 EVT VT = Vec.getValueType(); 00113 assert(VT.is128BitVector() && "Unexpected vector size!"); 00114 00115 EVT ElVT = VT.getVectorElementType(); 00116 EVT ResultVT = Result.getValueType(); 00117 00118 // Insert the relevant 128 bits. 00119 unsigned ElemsPerChunk = 128/ElVT.getSizeInBits(); 00120 00121 // This is the index of the first element of the 128-bit chunk 00122 // we want. 00123 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) 00124 * ElemsPerChunk); 00125 00126 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); 00127 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, 00128 VecIdx); 00129 } 00130 00131 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 00132 /// instructions. This is used because creating CONCAT_VECTOR nodes of 00133 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower 00134 /// large BUILD_VECTORS. 00135 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, 00136 unsigned NumElems, SelectionDAG &DAG, 00137 SDLoc dl) { 00138 SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); 00139 return Insert128BitVector(V, V2, NumElems/2, DAG, dl); 00140 } 00141 00142 static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { 00143 const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); 00144 bool is64Bit = Subtarget->is64Bit(); 00145 00146 if (Subtarget->isTargetEnvMacho()) { 00147 if (is64Bit) 00148 return new X86_64MachoTargetObjectFile(); 00149 return new TargetLoweringObjectFileMachO(); 00150 } 00151 00152 if (Subtarget->isTargetLinux()) 00153 return new X86LinuxTargetObjectFile(); 00154 if (Subtarget->isTargetELF()) 00155 return new TargetLoweringObjectFileELF(); 00156 if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) 00157 return new TargetLoweringObjectFileCOFF(); 00158 llvm_unreachable("unknown subtarget type"); 00159 } 00160 00161 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 00162 : TargetLowering(TM, createTLOF(TM)) { 00163 Subtarget = &TM.getSubtarget<X86Subtarget>(); 00164 X86ScalarSSEf64 = Subtarget->hasSSE2(); 00165 X86ScalarSSEf32 = Subtarget->hasSSE1(); 00166 RegInfo = TM.getRegisterInfo(); 00167 TD = getDataLayout(); 00168 00169 resetOperationActions(); 00170 } 00171 00172 void X86TargetLowering::resetOperationActions() { 00173 const TargetMachine &TM = getTargetMachine(); 00174 static bool FirstTimeThrough = true; 00175 00176 // If none of the target options have changed, then we don't need to reset the 00177 // operation actions. 00178 if (!FirstTimeThrough && TO == TM.Options) return; 00179 00180 if (!FirstTimeThrough) { 00181 // Reinitialize the actions. 00182 initActions(); 00183 FirstTimeThrough = false; 00184 } 00185 00186 TO = TM.Options; 00187 00188 // Set up the TargetLowering object. 00189 static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; 00190 00191 // X86 is weird, it always uses i8 for shift amounts and setcc results. 00192 setBooleanContents(ZeroOrOneBooleanContent); 00193 // X86-SSE is even stranger. It uses -1 or 0 for vector masks. 00194 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 00195 00196 // For 64-bit since we have so many registers use the ILP scheduler, for 00197 // 32-bit code use the register pressure specific scheduling. 00198 // For Atom, always use ILP scheduling. 00199 if (Subtarget->isAtom()) 00200 setSchedulingPreference(Sched::ILP); 00201 else if (Subtarget->is64Bit()) 00202 setSchedulingPreference(Sched::ILP); 00203 else 00204 setSchedulingPreference(Sched::RegPressure); 00205 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); 00206 00207 // Bypass expensive divides on Atom when compiling with O2 00208 if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { 00209 addBypassSlowDiv(32, 8); 00210 if (Subtarget->is64Bit()) 00211 addBypassSlowDiv(64, 16); 00212 } 00213 00214 if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { 00215 // Setup Windows compiler runtime calls. 00216 setLibcallName(RTLIB::SDIV_I64, "_alldiv"); 00217 setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); 00218 setLibcallName(RTLIB::SREM_I64, "_allrem"); 00219 setLibcallName(RTLIB::UREM_I64, "_aullrem"); 00220 setLibcallName(RTLIB::MUL_I64, "_allmul"); 00221 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); 00222 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); 00223 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); 00224 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); 00225 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); 00226 00227 // The _ftol2 runtime function has an unusual calling conv, which 00228 // is modeled by a special pseudo-instruction. 00229 setLibcallName(RTLIB::FPTOUINT_F64_I64, 0); 00230 setLibcallName(RTLIB::FPTOUINT_F32_I64, 0); 00231 setLibcallName(RTLIB::FPTOUINT_F64_I32, 0); 00232 setLibcallName(RTLIB::FPTOUINT_F32_I32, 0); 00233 } 00234 00235 if (Subtarget->isTargetDarwin()) { 00236 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 00237 setUseUnderscoreSetJmp(false); 00238 setUseUnderscoreLongJmp(false); 00239 } else if (Subtarget->isTargetMingw()) { 00240 // MS runtime is weird: it exports _setjmp, but longjmp! 00241 setUseUnderscoreSetJmp(true); 00242 setUseUnderscoreLongJmp(false); 00243 } else { 00244 setUseUnderscoreSetJmp(true); 00245 setUseUnderscoreLongJmp(true); 00246 } 00247 00248 // Set up the register classes. 00249 addRegisterClass(MVT::i8, &X86::GR8RegClass); 00250 addRegisterClass(MVT::i16, &X86::GR16RegClass); 00251 addRegisterClass(MVT::i32, &X86::GR32RegClass); 00252 if (Subtarget->is64Bit()) 00253 addRegisterClass(MVT::i64, &X86::GR64RegClass); 00254 00255 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 00256 00257 // We don't accept any truncstore of integer registers. 00258 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 00259 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 00260 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 00261 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 00262 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 00263 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 00264 00265 // SETOEQ and SETUNE require checking two conditions. 00266 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 00267 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 00268 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 00269 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 00270 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 00271 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 00272 00273 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 00274 // operation. 00275 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 00276 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 00277 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 00278 00279 if (Subtarget->is64Bit()) { 00280 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 00281 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 00282 } else if (!TM.Options.UseSoftFloat) { 00283 // We have an algorithm for SSE2->double, and we turn this into a 00284 // 64-bit FILD followed by conditional FADD for other targets. 00285 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 00286 // We have an algorithm for SSE2, and we turn this into a 64-bit 00287 // FILD for other targets. 00288 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); 00289 } 00290 00291 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 00292 // this operation. 00293 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 00294 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 00295 00296 if (!TM.Options.UseSoftFloat) { 00297 // SSE has no i16 to fp conversion, only i32 00298 if (X86ScalarSSEf32) { 00299 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 00300 // f32 and f64 cases are Legal, f80 case is not 00301 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 00302 } else { 00303 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 00304 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 00305 } 00306 } else { 00307 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 00308 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); 00309 } 00310 00311 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 00312 // are Legal, f80 is custom lowered. 00313 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 00314 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 00315 00316 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 00317 // this operation. 00318 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 00319 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 00320 00321 if (X86ScalarSSEf32) { 00322 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 00323 // f32 and f64 cases are Legal, f80 case is not 00324 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 00325 } else { 00326 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 00327 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 00328 } 00329 00330 // Handle FP_TO_UINT by promoting the destination to a larger signed 00331 // conversion. 00332 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 00333 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 00334 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 00335 00336 if (Subtarget->is64Bit()) { 00337 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 00338 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 00339 } else if (!TM.Options.UseSoftFloat) { 00340 // Since AVX is a superset of SSE3, only check for SSE here. 00341 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) 00342 // Expand FP_TO_UINT into a select. 00343 // FIXME: We would like to use a Custom expander here eventually to do 00344 // the optimal thing for SSE vs. the default expansion in the legalizer. 00345 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 00346 else 00347 // With SSE3 we can use fisttpll to convert to a signed i64; without 00348 // SSE, we're stuck with a fistpll. 00349 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); 00350 } 00351 00352 if (isTargetFTOL()) { 00353 // Use the _ftol2 runtime function, which has a pseudo-instruction 00354 // to handle its weird calling convention. 00355 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); 00356 } 00357 00358 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 00359 if (!X86ScalarSSEf64) { 00360 setOperationAction(ISD::BITCAST , MVT::f32 , Expand); 00361 setOperationAction(ISD::BITCAST , MVT::i32 , Expand); 00362 if (Subtarget->is64Bit()) { 00363 setOperationAction(ISD::BITCAST , MVT::f64 , Expand); 00364 // Without SSE, i64->f64 goes through memory. 00365 setOperationAction(ISD::BITCAST , MVT::i64 , Expand); 00366 } 00367 } 00368 00369 // Scalar integer divide and remainder are lowered to use operations that 00370 // produce two results, to match the available instructions. This exposes 00371 // the two-result form to trivial CSE, which is able to combine x/y and x%y 00372 // into a single instruction. 00373 // 00374 // Scalar integer multiply-high is also lowered to use two-result 00375 // operations, to match the available instructions. However, plain multiply 00376 // (low) operations are left as Legal, as there are single-result 00377 // instructions for this in x86. Using the two-result multiply instructions 00378 // when both high and low results are needed must be arranged by dagcombine. 00379 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { 00380 MVT VT = IntVTs[i]; 00381 setOperationAction(ISD::MULHS, VT, Expand); 00382 setOperationAction(ISD::MULHU, VT, Expand); 00383 setOperationAction(ISD::SDIV, VT, Expand); 00384 setOperationAction(ISD::UDIV, VT, Expand); 00385 setOperationAction(ISD::SREM, VT, Expand); 00386 setOperationAction(ISD::UREM, VT, Expand); 00387 00388 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. 00389 setOperationAction(ISD::ADDC, VT, Custom); 00390 setOperationAction(ISD::ADDE, VT, Custom); 00391 setOperationAction(ISD::SUBC, VT, Custom); 00392 setOperationAction(ISD::SUBE, VT, Custom); 00393 } 00394 00395 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 00396 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 00397 setOperationAction(ISD::BR_CC , MVT::f32, Expand); 00398 setOperationAction(ISD::BR_CC , MVT::f64, Expand); 00399 setOperationAction(ISD::BR_CC , MVT::f80, Expand); 00400 setOperationAction(ISD::BR_CC , MVT::i8, Expand); 00401 setOperationAction(ISD::BR_CC , MVT::i16, Expand); 00402 setOperationAction(ISD::BR_CC , MVT::i32, Expand); 00403 setOperationAction(ISD::BR_CC , MVT::i64, Expand); 00404 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 00405 if (Subtarget->is64Bit()) 00406 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 00407 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 00408 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 00409 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 00410 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 00411 setOperationAction(ISD::FREM , MVT::f32 , Expand); 00412 setOperationAction(ISD::FREM , MVT::f64 , Expand); 00413 setOperationAction(ISD::FREM , MVT::f80 , Expand); 00414 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 00415 00416 // Promote the i8 variants and force them on up to i32 which has a shorter 00417 // encoding. 00418 setOperationAction(ISD::CTTZ , MVT::i8 , Promote); 00419 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32); 00420 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote); 00421 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32); 00422 if (Subtarget->hasBMI()) { 00423 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand); 00424 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand); 00425 if (Subtarget->is64Bit()) 00426 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 00427 } else { 00428 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 00429 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 00430 if (Subtarget->is64Bit()) 00431 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 00432 } 00433 00434 if (Subtarget->hasLZCNT()) { 00435 // When promoting the i8 variants, force them to i32 for a shorter 00436 // encoding. 00437 setOperationAction(ISD::CTLZ , MVT::i8 , Promote); 00438 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32); 00439 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote); 00440 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); 00441 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand); 00442 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand); 00443 if (Subtarget->is64Bit()) 00444 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 00445 } else { 00446 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 00447 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 00448 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 00449 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); 00450 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); 00451 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); 00452 if (Subtarget->is64Bit()) { 00453 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 00454 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 00455 } 00456 } 00457 00458 if (Subtarget->hasPOPCNT()) { 00459 setOperationAction(ISD::CTPOP , MVT::i8 , Promote); 00460 } else { 00461 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 00462 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 00463 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 00464 if (Subtarget->is64Bit()) 00465 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 00466 } 00467 00468 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 00469 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 00470 00471 // These should be promoted to a larger select which is supported. 00472 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 00473 // X86 wants to expand cmov itself. 00474 setOperationAction(ISD::SELECT , MVT::i8 , Custom); 00475 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 00476 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 00477 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 00478 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 00479 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 00480 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 00481 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 00482 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 00483 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 00484 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 00485 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 00486 if (Subtarget->is64Bit()) { 00487 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 00488 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 00489 } 00490 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 00491 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 00492 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 00493 // support continuation, user-level threading, and etc.. As a result, no 00494 // other SjLj exception interfaces are implemented and please don't build 00495 // your own exception handling based on them. 00496 // LLVM/Clang supports zero-cost DWARF exception handling. 00497 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 00498 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 00499 00500 // Darwin ABI issue. 00501 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 00502 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 00503 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 00504 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 00505 if (Subtarget->is64Bit()) 00506 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 00507 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 00508 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom); 00509 if (Subtarget->is64Bit()) { 00510 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 00511 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 00512 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 00513 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 00514 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom); 00515 } 00516 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 00517 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 00518 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 00519 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 00520 if (Subtarget->is64Bit()) { 00521 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 00522 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 00523 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 00524 } 00525 00526 if (Subtarget->hasSSE1()) 00527 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 00528 00529 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); 00530 00531 // Expand certain atomics 00532 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { 00533 MVT VT = IntVTs[i]; 00534 setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); 00535 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 00536 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 00537 } 00538 00539 if (!Subtarget->is64Bit()) { 00540 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); 00541 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 00542 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 00543 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 00544 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 00545 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 00546 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); 00547 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 00548 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 00549 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 00550 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 00551 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 00552 } 00553 00554 if (Subtarget->hasCmpxchg16b()) { 00555 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); 00556 } 00557 00558 // FIXME - use subtarget debug flags 00559 if (!Subtarget->isTargetDarwin() && 00560 !Subtarget->isTargetELF() && 00561 !Subtarget->isTargetCygMing()) { 00562 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 00563 } 00564 00565 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 00566 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 00567 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 00568 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 00569 if (Subtarget->is64Bit()) { 00570 setExceptionPointerRegister(X86::RAX); 00571 setExceptionSelectorRegister(X86::RDX); 00572 } else { 00573 setExceptionPointerRegister(X86::EAX); 00574 setExceptionSelectorRegister(X86::EDX); 00575 } 00576 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 00577 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 00578 00579 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 00580 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 00581 00582 setOperationAction(ISD::TRAP, MVT::Other, Legal); 00583 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 00584 00585 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 00586 setOperationAction(ISD::VASTART , MVT::Other, Custom); 00587 setOperationAction(ISD::VAEND , MVT::Other, Expand); 00588 if (Subtarget->is64Bit()) { 00589 setOperationAction(ISD::VAARG , MVT::Other, Custom); 00590 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 00591 } else { 00592 setOperationAction(ISD::VAARG , MVT::Other, Expand); 00593 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 00594 } 00595 00596 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 00597 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 00598 00599 if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) 00600 setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? 00601 MVT::i64 : MVT::i32, Custom); 00602 else if (TM.Options.EnableSegmentedStacks) 00603 setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? 00604 MVT::i64 : MVT::i32, Custom); 00605 else 00606 setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? 00607 MVT::i64 : MVT::i32, Expand); 00608 00609 if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { 00610 // f32 and f64 use SSE. 00611 // Set up the FP register classes. 00612 addRegisterClass(MVT::f32, &X86::FR32RegClass); 00613 addRegisterClass(MVT::f64, &X86::FR64RegClass); 00614 00615 // Use ANDPD to simulate FABS. 00616 setOperationAction(ISD::FABS , MVT::f64, Custom); 00617 setOperationAction(ISD::FABS , MVT::f32, Custom); 00618 00619 // Use XORP to simulate FNEG. 00620 setOperationAction(ISD::FNEG , MVT::f64, Custom); 00621 setOperationAction(ISD::FNEG , MVT::f32, Custom); 00622 00623 // Use ANDPD and ORPD to simulate FCOPYSIGN. 00624 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 00625 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 00626 00627 // Lower this to FGETSIGNx86 plus an AND. 00628 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); 00629 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); 00630 00631 // We don't support sin/cos/fmod 00632 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00633 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00634 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 00635 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00636 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00637 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 00638 00639 // Expand FP immediates into loads from the stack, except for the special 00640 // cases we handle. 00641 addLegalFPImmediate(APFloat(+0.0)); // xorpd 00642 addLegalFPImmediate(APFloat(+0.0f)); // xorps 00643 } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) { 00644 // Use SSE for f32, x87 for f64. 00645 // Set up the FP register classes. 00646 addRegisterClass(MVT::f32, &X86::FR32RegClass); 00647 addRegisterClass(MVT::f64, &X86::RFP64RegClass); 00648 00649 // Use ANDPS to simulate FABS. 00650 setOperationAction(ISD::FABS , MVT::f32, Custom); 00651 00652 // Use XORP to simulate FNEG. 00653 setOperationAction(ISD::FNEG , MVT::f32, Custom); 00654 00655 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 00656 00657 // Use ANDPS and ORPS to simulate FCOPYSIGN. 00658 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00659 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 00660 00661 // We don't support sin/cos/fmod 00662 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00663 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00664 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 00665 00666 // Special cases we handle for FP constants. 00667 addLegalFPImmediate(APFloat(+0.0f)); // xorps 00668 addLegalFPImmediate(APFloat(+0.0)); // FLD0 00669 addLegalFPImmediate(APFloat(+1.0)); // FLD1 00670 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 00671 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 00672 00673 if (!TM.Options.UnsafeFPMath) { 00674 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00675 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00676 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 00677 } 00678 } else if (!TM.Options.UseSoftFloat) { 00679 // f32 and f64 in x87. 00680 // Set up the FP register classes. 00681 addRegisterClass(MVT::f64, &X86::RFP64RegClass); 00682 addRegisterClass(MVT::f32, &X86::RFP32RegClass); 00683 00684 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 00685 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 00686 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00687 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 00688 00689 if (!TM.Options.UnsafeFPMath) { 00690 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00691 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00692 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00693 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00694 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 00695 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 00696 } 00697 addLegalFPImmediate(APFloat(+0.0)); // FLD0 00698 addLegalFPImmediate(APFloat(+1.0)); // FLD1 00699 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 00700 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 00701 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 00702 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 00703 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 00704 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 00705 } 00706 00707 // We don't support FMA. 00708 setOperationAction(ISD::FMA, MVT::f64, Expand); 00709 setOperationAction(ISD::FMA, MVT::f32, Expand); 00710 00711 // Long double always uses X87. 00712 if (!TM.Options.UseSoftFloat) { 00713 addRegisterClass(MVT::f80, &X86::RFP80RegClass); 00714 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 00715 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 00716 { 00717 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); 00718 addLegalFPImmediate(TmpFlt); // FLD0 00719 TmpFlt.changeSign(); 00720 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 00721 00722 bool ignored; 00723 APFloat TmpFlt2(+1.0); 00724 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 00725 &ignored); 00726 addLegalFPImmediate(TmpFlt2); // FLD1 00727 TmpFlt2.changeSign(); 00728 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 00729 } 00730 00731 if (!TM.Options.UnsafeFPMath) { 00732 setOperationAction(ISD::FSIN , MVT::f80, Expand); 00733 setOperationAction(ISD::FCOS , MVT::f80, Expand); 00734 setOperationAction(ISD::FSINCOS, MVT::f80, Expand); 00735 } 00736 00737 setOperationAction(ISD::FFLOOR, MVT::f80, Expand); 00738 setOperationAction(ISD::FCEIL, MVT::f80, Expand); 00739 setOperationAction(ISD::FTRUNC, MVT::f80, Expand); 00740 setOperationAction(ISD::FRINT, MVT::f80, Expand); 00741 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); 00742 setOperationAction(ISD::FMA, MVT::f80, Expand); 00743 } 00744 00745 // Always use a library call for pow. 00746 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 00747 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 00748 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 00749 00750 setOperationAction(ISD::FLOG, MVT::f80, Expand); 00751 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 00752 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 00753 setOperationAction(ISD::FEXP, MVT::f80, Expand); 00754 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 00755 00756 // First set operation action for all vector types to either promote 00757 // (for widening) or expand (for scalarization). Then we will selectively 00758 // turn on ones that can be effectively codegen'd. 00759 for (int i = MVT::FIRST_VECTOR_VALUETYPE; 00760 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 00761 MVT VT = (MVT::SimpleValueType)i; 00762 setOperationAction(ISD::ADD , VT, Expand); 00763 setOperationAction(ISD::SUB , VT, Expand); 00764 setOperationAction(ISD::FADD, VT, Expand); 00765 setOperationAction(ISD::FNEG, VT, Expand); 00766 setOperationAction(ISD::FSUB, VT, Expand); 00767 setOperationAction(ISD::MUL , VT, Expand); 00768 setOperationAction(ISD::FMUL, VT, Expand); 00769 setOperationAction(ISD::SDIV, VT, Expand); 00770 setOperationAction(ISD::UDIV, VT, Expand); 00771 setOperationAction(ISD::FDIV, VT, Expand); 00772 setOperationAction(ISD::SREM, VT, Expand); 00773 setOperationAction(ISD::UREM, VT, Expand); 00774 setOperationAction(ISD::LOAD, VT, Expand); 00775 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 00776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); 00777 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 00778 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); 00779 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); 00780 setOperationAction(ISD::FABS, VT, Expand); 00781 setOperationAction(ISD::FSIN, VT, Expand); 00782 setOperationAction(ISD::FSINCOS, VT, Expand); 00783 setOperationAction(ISD::FCOS, VT, Expand); 00784 setOperationAction(ISD::FSINCOS, VT, Expand); 00785 setOperationAction(ISD::FREM, VT, Expand); 00786 setOperationAction(ISD::FMA, VT, Expand); 00787 setOperationAction(ISD::FPOWI, VT, Expand); 00788 setOperationAction(ISD::FSQRT, VT, Expand); 00789 setOperationAction(ISD::FCOPYSIGN, VT, Expand); 00790 setOperationAction(ISD::FFLOOR, VT, Expand); 00791 setOperationAction(ISD::FCEIL, VT, Expand); 00792 setOperationAction(ISD::FTRUNC, VT, Expand); 00793 setOperationAction(ISD::FRINT, VT, Expand); 00794 setOperationAction(ISD::FNEARBYINT, VT, Expand); 00795 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 00796 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 00797 setOperationAction(ISD::SDIVREM, VT, Expand); 00798 setOperationAction(ISD::UDIVREM, VT, Expand); 00799 setOperationAction(ISD::FPOW, VT, Expand); 00800 setOperationAction(ISD::CTPOP, VT, Expand); 00801 setOperationAction(ISD::CTTZ, VT, Expand); 00802 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 00803 setOperationAction(ISD::CTLZ, VT, Expand); 00804 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 00805 setOperationAction(ISD::SHL, VT, Expand); 00806 setOperationAction(ISD::SRA, VT, Expand); 00807 setOperationAction(ISD::SRL, VT, Expand); 00808 setOperationAction(ISD::ROTL, VT, Expand); 00809 setOperationAction(ISD::ROTR, VT, Expand); 00810 setOperationAction(ISD::BSWAP, VT, Expand); 00811 setOperationAction(ISD::SETCC, VT, Expand); 00812 setOperationAction(ISD::FLOG, VT, Expand); 00813 setOperationAction(ISD::FLOG2, VT, Expand); 00814 setOperationAction(ISD::FLOG10, VT, Expand); 00815 setOperationAction(ISD::FEXP, VT, Expand); 00816 setOperationAction(ISD::FEXP2, VT, Expand); 00817 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 00818 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 00819 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 00820 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 00821 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); 00822 setOperationAction(ISD::TRUNCATE, VT, Expand); 00823 setOperationAction(ISD::SIGN_EXTEND, VT, Expand); 00824 setOperationAction(ISD::ZERO_EXTEND, VT, Expand); 00825 setOperationAction(ISD::ANY_EXTEND, VT, Expand); 00826 setOperationAction(ISD::VSELECT, VT, Expand); 00827 for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE; 00828 InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 00829 setTruncStoreAction(VT, 00830 (MVT::SimpleValueType)InnerVT, Expand); 00831 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 00832 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 00833 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 00834 } 00835 00836 // FIXME: In order to prevent SSE instructions being expanded to MMX ones 00837 // with -msoft-float, disable use of MMX as well. 00838 if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) { 00839 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); 00840 // No operations on x86mmx supported, everything uses intrinsics. 00841 } 00842 00843 // MMX-sized vectors (other than x86mmx) are expected to be expanded 00844 // into smaller operations. 00845 setOperationAction(ISD::MULHS, MVT::v8i8, Expand); 00846 setOperationAction(ISD::MULHS, MVT::v4i16, Expand); 00847 setOperationAction(ISD::MULHS, MVT::v2i32, Expand); 00848 setOperationAction(ISD::MULHS, MVT::v1i64, Expand); 00849 setOperationAction(ISD::AND, MVT::v8i8, Expand); 00850 setOperationAction(ISD::AND, MVT::v4i16, Expand); 00851 setOperationAction(ISD::AND, MVT::v2i32, Expand); 00852 setOperationAction(ISD::AND, MVT::v1i64, Expand); 00853 setOperationAction(ISD::OR, MVT::v8i8, Expand); 00854 setOperationAction(ISD::OR, MVT::v4i16, Expand); 00855 setOperationAction(ISD::OR, MVT::v2i32, Expand); 00856 setOperationAction(ISD::OR, MVT::v1i64, Expand); 00857 setOperationAction(ISD::XOR, MVT::v8i8, Expand); 00858 setOperationAction(ISD::XOR, MVT::v4i16, Expand); 00859 setOperationAction(ISD::XOR, MVT::v2i32, Expand); 00860 setOperationAction(ISD::XOR, MVT::v1i64, Expand); 00861 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); 00862 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); 00863 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); 00864 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); 00865 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); 00866 setOperationAction(ISD::SELECT, MVT::v8i8, Expand); 00867 setOperationAction(ISD::SELECT, MVT::v4i16, Expand); 00868 setOperationAction(ISD::SELECT, MVT::v2i32, Expand); 00869 setOperationAction(ISD::SELECT, MVT::v1i64, Expand); 00870 setOperationAction(ISD::BITCAST, MVT::v8i8, Expand); 00871 setOperationAction(ISD::BITCAST, MVT::v4i16, Expand); 00872 setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); 00873 setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); 00874 00875 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) { 00876 addRegisterClass(MVT::v4f32, &X86::VR128RegClass); 00877 00878 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 00879 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 00880 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 00881 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 00882 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 00883 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 00884 setOperationAction(ISD::FABS, MVT::v4f32, Custom); 00885 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 00886 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 00887 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 00888 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 00889 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 00890 } 00891 00892 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { 00893 addRegisterClass(MVT::v2f64, &X86::VR128RegClass); 00894 00895 // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM 00896 // registers cannot be used even for integer operations. 00897 addRegisterClass(MVT::v16i8, &X86::VR128RegClass); 00898 addRegisterClass(MVT::v8i16, &X86::VR128RegClass); 00899 addRegisterClass(MVT::v4i32, &X86::VR128RegClass); 00900 addRegisterClass(MVT::v2i64, &X86::VR128RegClass); 00901 00902 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 00903 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 00904 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 00905 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 00906 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 00907 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 00908 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 00909 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 00910 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 00911 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 00912 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 00913 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 00914 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 00915 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 00916 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 00917 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 00918 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 00919 setOperationAction(ISD::FABS, MVT::v2f64, Custom); 00920 00921 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 00922 setOperationAction(ISD::SETCC, MVT::v16i8, Custom); 00923 setOperationAction(ISD::SETCC, MVT::v8i16, Custom); 00924 setOperationAction(ISD::SETCC, MVT::v4i32, Custom); 00925 00926 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 00927 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 00928 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 00929 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 00930 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 00931 00932 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 00933 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { 00934 MVT VT = (MVT::SimpleValueType)i; 00935 // Do not attempt to custom lower non-power-of-2 vectors 00936 if (!isPowerOf2_32(VT.getVectorNumElements())) 00937 continue; 00938 // Do not attempt to custom lower non-128-bit vectors 00939 if (!VT.is128BitVector()) 00940 continue; 00941 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 00942 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 00943 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 00944 } 00945 00946 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 00947 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 00948 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 00949 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 00950 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 00951 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 00952 00953 if (Subtarget->is64Bit()) { 00954 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 00955 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 00956 } 00957 00958 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 00959 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { 00960 MVT VT = (MVT::SimpleValueType)i; 00961 00962 // Do not attempt to promote non-128-bit vectors 00963 if (!VT.is128BitVector()) 00964 continue; 00965 00966 setOperationAction(ISD::AND, VT, Promote); 00967 AddPromotedToType (ISD::AND, VT, MVT::v2i64); 00968 setOperationAction(ISD::OR, VT, Promote); 00969 AddPromotedToType (ISD::OR, VT, MVT::v2i64); 00970 setOperationAction(ISD::XOR, VT, Promote); 00971 AddPromotedToType (ISD::XOR, VT, MVT::v2i64); 00972 setOperationAction(ISD::LOAD, VT, Promote); 00973 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); 00974 setOperationAction(ISD::SELECT, VT, Promote); 00975 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); 00976 } 00977 00978 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 00979 00980 // Custom lower v2i64 and v2f64 selects. 00981 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 00982 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 00983 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 00984 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 00985 00986 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 00987 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 00988 00989 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); 00990 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 00991 // As there is no 64-bit GPR available, we need build a special custom 00992 // sequence to convert from v2i32 to v2f32. 00993 if (!Subtarget->is64Bit()) 00994 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); 00995 00996 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); 00997 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); 00998 00999 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); 01000 } 01001 01002 if (Subtarget->hasSSE41()) { 01003 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 01004 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 01005 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 01006 setOperationAction(ISD::FRINT, MVT::f32, Legal); 01007 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 01008 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 01009 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 01010 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 01011 setOperationAction(ISD::FRINT, MVT::f64, Legal); 01012 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 01013 01014 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 01015 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 01016 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 01017 setOperationAction(ISD::FRINT, MVT::v4f32, Legal); 01018 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 01019 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 01020 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 01021 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 01022 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 01023 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 01024 01025 // FIXME: Do we need to handle scalar-to-vector here? 01026 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 01027 01028 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 01029 setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); 01030 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 01031 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 01032 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 01033 01034 // i8 and i16 vectors are custom , because the source register and source 01035 // source memory operand types are not the same width. f32 vectors are 01036 // custom since the immediate controlling the insert encodes additional 01037 // information. 01038 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 01039 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 01040 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 01041 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 01042 01043 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 01044 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 01045 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 01046 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 01047 01048 // FIXME: these should be Legal but thats only for the case where 01049 // the index is constant. For now custom expand to deal with that. 01050 if (Subtarget->is64Bit()) { 01051 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 01052 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 01053 } 01054 } 01055 01056 if (Subtarget->hasSSE2()) { 01057 setOperationAction(ISD::SRL, MVT::v8i16, Custom); 01058 setOperationAction(ISD::SRL, MVT::v16i8, Custom); 01059 01060 setOperationAction(ISD::SHL, MVT::v8i16, Custom); 01061 setOperationAction(ISD::SHL, MVT::v16i8, Custom); 01062 01063 setOperationAction(ISD::SRA, MVT::v8i16, Custom); 01064 setOperationAction(ISD::SRA, MVT::v16i8, Custom); 01065 01066 // In the customized shift lowering, the legal cases in AVX2 will be 01067 // recognized. 01068 setOperationAction(ISD::SRL, MVT::v2i64, Custom); 01069 setOperationAction(ISD::SRL, MVT::v4i32, Custom); 01070 01071 setOperationAction(ISD::SHL, MVT::v2i64, Custom); 01072 setOperationAction(ISD::SHL, MVT::v4i32, Custom); 01073 01074 setOperationAction(ISD::SRA, MVT::v4i32, Custom); 01075 01076 setOperationAction(ISD::SDIV, MVT::v8i16, Custom); 01077 setOperationAction(ISD::SDIV, MVT::v4i32, Custom); 01078 } 01079 01080 if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { 01081 addRegisterClass(MVT::v32i8, &X86::VR256RegClass); 01082 addRegisterClass(MVT::v16i16, &X86::VR256RegClass); 01083 addRegisterClass(MVT::v8i32, &X86::VR256RegClass); 01084 addRegisterClass(MVT::v8f32, &X86::VR256RegClass); 01085 addRegisterClass(MVT::v4i64, &X86::VR256RegClass); 01086 addRegisterClass(MVT::v4f64, &X86::VR256RegClass); 01087 01088 setOperationAction(ISD::LOAD, MVT::v8f32, Legal); 01089 setOperationAction(ISD::LOAD, MVT::v4f64, Legal); 01090 setOperationAction(ISD::LOAD, MVT::v4i64, Legal); 01091 01092 setOperationAction(ISD::FADD, MVT::v8f32, Legal); 01093 setOperationAction(ISD::FSUB, MVT::v8f32, Legal); 01094 setOperationAction(ISD::FMUL, MVT::v8f32, Legal); 01095 setOperationAction(ISD::FDIV, MVT::v8f32, Legal); 01096 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); 01097 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); 01098 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal); 01099 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal); 01100 setOperationAction(ISD::FRINT, MVT::v8f32, Legal); 01101 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal); 01102 setOperationAction(ISD::FNEG, MVT::v8f32, Custom); 01103 setOperationAction(ISD::FABS, MVT::v8f32, Custom); 01104 01105 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 01106 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 01107 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 01108 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 01109 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 01110 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 01111 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 01112 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 01113 setOperationAction(ISD::FRINT, MVT::v4f64, Legal); 01114 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal); 01115 setOperationAction(ISD::FNEG, MVT::v4f64, Custom); 01116 setOperationAction(ISD::FABS, MVT::v4f64, Custom); 01117 01118 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); 01119 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); 01120 01121 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); 01122 01123 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); 01124 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); 01125 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); 01126 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); 01127 01128 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 01129 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); 01130 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); 01131 01132 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal); 01133 01134 setOperationAction(ISD::SRL, MVT::v16i16, Custom); 01135 setOperationAction(ISD::SRL, MVT::v32i8, Custom); 01136 01137 setOperationAction(ISD::SHL, MVT::v16i16, Custom); 01138 setOperationAction(ISD::SHL, MVT::v32i8, Custom); 01139 01140 setOperationAction(ISD::SRA, MVT::v16i16, Custom); 01141 setOperationAction(ISD::SRA, MVT::v32i8, Custom); 01142 01143 setOperationAction(ISD::SDIV, MVT::v16i16, Custom); 01144 01145 setOperationAction(ISD::SETCC, MVT::v32i8, Custom); 01146 setOperationAction(ISD::SETCC, MVT::v16i16, Custom); 01147 setOperationAction(ISD::SETCC, MVT::v8i32, Custom); 01148 setOperationAction(ISD::SETCC, MVT::v4i64, Custom); 01149 01150 setOperationAction(ISD::SELECT, MVT::v4f64, Custom); 01151 setOperationAction(ISD::SELECT, MVT::v4i64, Custom); 01152 setOperationAction(ISD::SELECT, MVT::v8f32, Custom); 01153 01154 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 01155 setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); 01156 setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); 01157 setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); 01158 01159 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 01160 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 01161 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 01162 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 01163 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); 01164 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); 01165 01166 if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { 01167 setOperationAction(ISD::FMA, MVT::v8f32, Legal); 01168 setOperationAction(ISD::FMA, MVT::v4f64, Legal); 01169 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 01170 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 01171 setOperationAction(ISD::FMA, MVT::f32, Legal); 01172 setOperationAction(ISD::FMA, MVT::f64, Legal); 01173 } 01174 01175 if (Subtarget->hasInt256()) { 01176 setOperationAction(ISD::ADD, MVT::v4i64, Legal); 01177 setOperationAction(ISD::ADD, MVT::v8i32, Legal); 01178 setOperationAction(ISD::ADD, MVT::v16i16, Legal); 01179 setOperationAction(ISD::ADD, MVT::v32i8, Legal); 01180 01181 setOperationAction(ISD::SUB, MVT::v4i64, Legal); 01182 setOperationAction(ISD::SUB, MVT::v8i32, Legal); 01183 setOperationAction(ISD::SUB, MVT::v16i16, Legal); 01184 setOperationAction(ISD::SUB, MVT::v32i8, Legal); 01185 01186 setOperationAction(ISD::MUL, MVT::v4i64, Custom); 01187 setOperationAction(ISD::MUL, MVT::v8i32, Legal); 01188 setOperationAction(ISD::MUL, MVT::v16i16, Legal); 01189 // Don't lower v32i8 because there is no 128-bit byte mul 01190 01191 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); 01192 01193 setOperationAction(ISD::SDIV, MVT::v8i32, Custom); 01194 } else { 01195 setOperationAction(ISD::ADD, MVT::v4i64, Custom); 01196 setOperationAction(ISD::ADD, MVT::v8i32, Custom); 01197 setOperationAction(ISD::ADD, MVT::v16i16, Custom); 01198 setOperationAction(ISD::ADD, MVT::v32i8, Custom); 01199 01200 setOperationAction(ISD::SUB, MVT::v4i64, Custom); 01201 setOperationAction(ISD::SUB, MVT::v8i32, Custom); 01202 setOperationAction(ISD::SUB, MVT::v16i16, Custom); 01203 setOperationAction(ISD::SUB, MVT::v32i8, Custom); 01204 01205 setOperationAction(ISD::MUL, MVT::v4i64, Custom); 01206 setOperationAction(ISD::MUL, MVT::v8i32, Custom); 01207 setOperationAction(ISD::MUL, MVT::v16i16, Custom); 01208 // Don't lower v32i8 because there is no 128-bit byte mul 01209 } 01210 01211 // In the customized shift lowering, the legal cases in AVX2 will be 01212 // recognized. 01213 setOperationAction(ISD::SRL, MVT::v4i64, Custom); 01214 setOperationAction(ISD::SRL, MVT::v8i32, Custom); 01215 01216 setOperationAction(ISD::SHL, MVT::v4i64, Custom); 01217 setOperationAction(ISD::SHL, MVT::v8i32, Custom); 01218 01219 setOperationAction(ISD::SRA, MVT::v8i32, Custom); 01220 01221 // Custom lower several nodes for 256-bit types. 01222 for (int i = MVT::FIRST_VECTOR_VALUETYPE; 01223 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 01224 MVT VT = (MVT::SimpleValueType)i; 01225 01226 // Extract subvector is special because the value type 01227 // (result) is 128-bit but the source is 256-bit wide. 01228 if (VT.is128BitVector()) 01229 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 01230 01231 // Do not attempt to custom lower other non-256-bit vectors 01232 if (!VT.is256BitVector()) 01233 continue; 01234 01235 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 01236 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 01237 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 01238 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 01239 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 01240 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 01241 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 01242 } 01243 01244 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. 01245 for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { 01246 MVT VT = (MVT::SimpleValueType)i; 01247 01248 // Do not attempt to promote non-256-bit vectors 01249 if (!VT.is256BitVector()) 01250 continue; 01251 01252 setOperationAction(ISD::AND, VT, Promote); 01253 AddPromotedToType (ISD::AND, VT, MVT::v4i64); 01254 setOperationAction(ISD::OR, VT, Promote); 01255 AddPromotedToType (ISD::OR, VT, MVT::v4i64); 01256 setOperationAction(ISD::XOR, VT, Promote); 01257 AddPromotedToType (ISD::XOR, VT, MVT::v4i64); 01258 setOperationAction(ISD::LOAD, VT, Promote); 01259 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); 01260 setOperationAction(ISD::SELECT, VT, Promote); 01261 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); 01262 } 01263 } 01264 01265 // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion 01266 // of this type with custom code. 01267 for (int VT = MVT::FIRST_VECTOR_VALUETYPE; 01268 VT != MVT::LAST_VECTOR_VALUETYPE; VT++) { 01269 setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, 01270 Custom); 01271 } 01272 01273 // We want to custom lower some of our intrinsics. 01274 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 01275 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 01276 01277 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't 01278 // handle type legalization for these operations here. 01279 // 01280 // FIXME: We really should do custom legalization for addition and 01281 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better 01282 // than generic legalization for 64-bit multiplication-with-overflow, though. 01283 for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) { 01284 // Add/Sub/Mul with overflow operations are custom lowered. 01285 MVT VT = IntVTs[i]; 01286 setOperationAction(ISD::SADDO, VT, Custom); 01287 setOperationAction(ISD::UADDO, VT, Custom); 01288 setOperationAction(ISD::SSUBO, VT, Custom); 01289 setOperationAction(ISD::USUBO, VT, Custom); 01290 setOperationAction(ISD::SMULO, VT, Custom); 01291 setOperationAction(ISD::UMULO, VT, Custom); 01292 } 01293 01294 // There are no 8-bit 3-address imul/mul instructions 01295 setOperationAction(ISD::SMULO, MVT::i8, Expand); 01296 setOperationAction(ISD::UMULO, MVT::i8, Expand); 01297 01298 if (!Subtarget->is64Bit()) { 01299 // These libcalls are not available in 32-bit. 01300 setLibcallName(RTLIB::SHL_I128, 0); 01301 setLibcallName(RTLIB::SRL_I128, 0); 01302 setLibcallName(RTLIB::SRA_I128, 0); 01303 } 01304 01305 // Combine sin / cos into one node or libcall if possible. 01306 if (Subtarget->hasSinCos()) { 01307 setLibcallName(RTLIB::SINCOS_F32, "sincosf"); 01308 setLibcallName(RTLIB::SINCOS_F64, "sincos"); 01309 if (Subtarget->isTargetDarwin()) { 01310 // For MacOSX, we don't want to the normal expansion of a libcall to 01311 // sincos. We want to issue a libcall to __sincos_stret to avoid memory 01312 // traffic. 01313 setOperationAction(ISD::FSINCOS, MVT::f64, Custom); 01314 setOperationAction(ISD::FSINCOS, MVT::f32, Custom); 01315 } 01316 } 01317 01318 // We have target-specific dag combine patterns for the following nodes: 01319 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 01320 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 01321 setTargetDAGCombine(ISD::VSELECT); 01322 setTargetDAGCombine(ISD::SELECT); 01323 setTargetDAGCombine(ISD::SHL); 01324 setTargetDAGCombine(ISD::SRA); 01325 setTargetDAGCombine(ISD::SRL); 01326 setTargetDAGCombine(ISD::OR); 01327 setTargetDAGCombine(ISD::AND); 01328 setTargetDAGCombine(ISD::ADD); 01329 setTargetDAGCombine(ISD::FADD); 01330 setTargetDAGCombine(ISD::FSUB); 01331 setTargetDAGCombine(ISD::FMA); 01332 setTargetDAGCombine(ISD::SUB); 01333 setTargetDAGCombine(ISD::LOAD); 01334 setTargetDAGCombine(ISD::STORE); 01335 setTargetDAGCombine(ISD::ZERO_EXTEND); 01336 setTargetDAGCombine(ISD::ANY_EXTEND); 01337 setTargetDAGCombine(ISD::SIGN_EXTEND); 01338 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 01339 setTargetDAGCombine(ISD::TRUNCATE); 01340 setTargetDAGCombine(ISD::SINT_TO_FP); 01341 setTargetDAGCombine(ISD::SETCC); 01342 if (Subtarget->is64Bit()) 01343 setTargetDAGCombine(ISD::MUL); 01344 setTargetDAGCombine(ISD::XOR); 01345 01346 computeRegisterProperties(); 01347 01348 // On Darwin, -Os means optimize for size without hurting performance, 01349 // do not reduce the limit. 01350 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 01351 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; 01352 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores 01353 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 01354 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores 01355 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 01356 setPrefLoopAlignment(4); // 2^4 bytes. 01357 01358 // Predictable cmov don't hurt on atom because it's in-order. 01359 PredictableSelectIsExpensive = !Subtarget->isAtom(); 01360 01361 setPrefFunctionAlignment(4); // 2^4 bytes. 01362 } 01363 01364 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 01365 if (!VT.isVector()) return MVT::i8; 01366 return VT.changeVectorElementTypeToInteger(); 01367 } 01368 01369 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 01370 /// the desired ByVal argument alignment. 01371 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { 01372 if (MaxAlign == 16) 01373 return; 01374 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 01375 if (VTy->getBitWidth() == 128) 01376 MaxAlign = 16; 01377 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 01378 unsigned EltAlign = 0; 01379 getMaxByValAlign(ATy->getElementType(), EltAlign); 01380 if (EltAlign > MaxAlign) 01381 MaxAlign = EltAlign; 01382 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 01383 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 01384 unsigned EltAlign = 0; 01385 getMaxByValAlign(STy->getElementType(i), EltAlign); 01386 if (EltAlign > MaxAlign) 01387 MaxAlign = EltAlign; 01388 if (MaxAlign == 16) 01389 break; 01390 } 01391 } 01392 } 01393 01394 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 01395 /// function arguments in the caller parameter area. For X86, aggregates 01396 /// that contain SSE vectors are placed at 16-byte boundaries while the rest 01397 /// are at 4-byte boundaries. 01398 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { 01399 if (Subtarget->is64Bit()) { 01400 // Max of 8 and alignment of type. 01401 unsigned TyAlign = TD->getABITypeAlignment(Ty); 01402 if (TyAlign > 8) 01403 return TyAlign; 01404 return 8; 01405 } 01406 01407 unsigned Align = 4; 01408 if (Subtarget->hasSSE1()) 01409 getMaxByValAlign(Ty, Align); 01410 return Align; 01411 } 01412 01413 /// getOptimalMemOpType - Returns the target specific optimal type for load 01414 /// and store operations as a result of memset, memcpy, and memmove 01415 /// lowering. If DstAlign is zero that means it's safe to destination 01416 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 01417 /// means there isn't a need to check it against alignment requirement, 01418 /// probably because the source does not need to be loaded. If 'IsMemset' is 01419 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 01420 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 01421 /// source is constant so it does not need to be loaded. 01422 /// It returns EVT::Other if the type should be determined using generic 01423 /// target-independent logic. 01424 EVT 01425 X86TargetLowering::getOptimalMemOpType(uint64_t Size, 01426 unsigned DstAlign, unsigned SrcAlign, 01427 bool IsMemset, bool ZeroMemset, 01428 bool MemcpyStrSrc, 01429 MachineFunction &MF) const { 01430 const Function *F = MF.getFunction(); 01431 if ((!IsMemset || ZeroMemset) && 01432 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 01433 Attribute::NoImplicitFloat)) { 01434 if (Size >= 16 && 01435 (Subtarget->isUnalignedMemAccessFast() || 01436 ((DstAlign == 0 || DstAlign >= 16) && 01437 (SrcAlign == 0 || SrcAlign >= 16)))) { 01438 if (Size >= 32) { 01439 if (Subtarget->hasInt256()) 01440 return MVT::v8i32; 01441 if (Subtarget->hasFp256()) 01442 return MVT::v8f32; 01443 } 01444 if (Subtarget->hasSSE2()) 01445 return MVT::v4i32; 01446 if (Subtarget->hasSSE1()) 01447 return MVT::v4f32; 01448 } else if (!MemcpyStrSrc && Size >= 8 && 01449 !Subtarget->is64Bit() && 01450 Subtarget->hasSSE2()) { 01451 // Do not use f64 to lower memcpy if source is string constant. It's 01452 // better to use i32 to avoid the loads. 01453 return MVT::f64; 01454 } 01455 } 01456 if (Subtarget->is64Bit() && Size >= 8) 01457 return MVT::i64; 01458 return MVT::i32; 01459 } 01460 01461 bool X86TargetLowering::isSafeMemOpType(MVT VT) const { 01462 if (VT == MVT::f32) 01463 return X86ScalarSSEf32; 01464 else if (VT == MVT::f64) 01465 return X86ScalarSSEf64; 01466 return true; 01467 } 01468 01469 bool 01470 X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 01471 if (Fast) 01472 *Fast = Subtarget->isUnalignedMemAccessFast(); 01473 return true; 01474 } 01475 01476 /// getJumpTableEncoding - Return the entry encoding for a jump table in the 01477 /// current function. The returned value is a member of the 01478 /// MachineJumpTableInfo::JTEntryKind enum. 01479 unsigned X86TargetLowering::getJumpTableEncoding() const { 01480 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 01481 // symbol. 01482 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 01483 Subtarget->isPICStyleGOT()) 01484 return MachineJumpTableInfo::EK_Custom32; 01485 01486 // Otherwise, use the normal jump table encoding heuristics. 01487 return TargetLowering::getJumpTableEncoding(); 01488 } 01489 01490 const MCExpr * 01491 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 01492 const MachineBasicBlock *MBB, 01493 unsigned uid,MCContext &Ctx) const{ 01494 assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ && 01495 Subtarget->isPICStyleGOT()); 01496 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 01497 // entries. 01498 return MCSymbolRefExpr::Create(MBB->getSymbol(), 01499 MCSymbolRefExpr::VK_GOTOFF, Ctx); 01500 } 01501 01502 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 01503 /// jumptable. 01504 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 01505 SelectionDAG &DAG) const { 01506 if (!Subtarget->is64Bit()) 01507 // This doesn't have SDLoc associated with it, but is not really the 01508 // same as a Register. 01509 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy()); 01510 return Table; 01511 } 01512 01513 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the 01514 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an 01515 /// MCExpr. 01516 const MCExpr *X86TargetLowering:: 01517 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 01518 MCContext &Ctx) const { 01519 // X86-64 uses RIP relative addressing based on the jump table label. 01520 if (Subtarget->isPICStyleRIPRel()) 01521 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 01522 01523 // Otherwise, the reference is relative to the PIC base. 01524 return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); 01525 } 01526 01527 // FIXME: Why this routine is here? Move to RegInfo! 01528 std::pair<const TargetRegisterClass*, uint8_t> 01529 X86TargetLowering::findRepresentativeClass(MVT VT) const{ 01530 const TargetRegisterClass *RRC = 0; 01531 uint8_t Cost = 1; 01532 switch (VT.SimpleTy) { 01533 default: 01534 return TargetLowering::findRepresentativeClass(VT); 01535 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: 01536 RRC = Subtarget->is64Bit() ? 01537 (const TargetRegisterClass*)&X86::GR64RegClass : 01538 (const TargetRegisterClass*)&X86::GR32RegClass; 01539 break; 01540 case MVT::x86mmx: 01541 RRC = &X86::VR64RegClass; 01542 break; 01543 case MVT::f32: case MVT::f64: 01544 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 01545 case MVT::v4f32: case MVT::v2f64: 01546 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: 01547 case MVT::v4f64: 01548 RRC = &X86::VR128RegClass; 01549 break; 01550 } 01551 return std::make_pair(RRC, Cost); 01552 } 01553 01554 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, 01555 unsigned &Offset) const { 01556 if (!Subtarget->isTargetLinux()) 01557 return false; 01558 01559 if (Subtarget->is64Bit()) { 01560 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: 01561 Offset = 0x28; 01562 if (getTargetMachine().getCodeModel() == CodeModel::Kernel) 01563 AddressSpace = 256; 01564 else 01565 AddressSpace = 257; 01566 } else { 01567 // %gs:0x14 on i386 01568 Offset = 0x14; 01569 AddressSpace = 256; 01570 } 01571 return true; 01572 } 01573 01574 //===----------------------------------------------------------------------===// 01575 // Return Value Calling Convention Implementation 01576 //===----------------------------------------------------------------------===// 01577 01578 #include "X86GenCallingConv.inc" 01579 01580 bool 01581 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, 01582 MachineFunction &MF, bool isVarArg, 01583 const SmallVectorImpl<ISD::OutputArg> &Outs, 01584 LLVMContext &Context) const { 01585 SmallVector<CCValAssign, 16> RVLocs; 01586 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 01587 RVLocs, Context); 01588 return CCInfo.CheckReturn(Outs, RetCC_X86); 01589 } 01590 01591 SDValue 01592 X86TargetLowering::LowerReturn(SDValue Chain, 01593 CallingConv::ID CallConv, bool isVarArg, 01594 const SmallVectorImpl<ISD::OutputArg> &Outs, 01595 const SmallVectorImpl<SDValue> &OutVals, 01596 SDLoc dl, SelectionDAG &DAG) const { 01597 MachineFunction &MF = DAG.getMachineFunction(); 01598 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 01599 01600 SmallVector<CCValAssign, 16> RVLocs; 01601 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 01602 RVLocs, *DAG.getContext()); 01603 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 01604 01605 SDValue Flag; 01606 SmallVector<SDValue, 6> RetOps; 01607 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 01608 // Operand #1 = Bytes To Pop 01609 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), 01610 MVT::i16)); 01611 01612 // Copy the result values into the output registers. 01613 for (unsigned i = 0; i != RVLocs.size(); ++i) { 01614 CCValAssign &VA = RVLocs[i]; 01615 assert(VA.isRegLoc() && "Can only return in registers!"); 01616 SDValue ValToCopy = OutVals[i]; 01617 EVT ValVT = ValToCopy.getValueType(); 01618 01619 // Promote values to the appropriate types 01620 if (VA.getLocInfo() == CCValAssign::SExt) 01621 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 01622 else if (VA.getLocInfo() == CCValAssign::ZExt) 01623 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); 01624 else if (VA.getLocInfo() == CCValAssign::AExt) 01625 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); 01626 else if (VA.getLocInfo() == CCValAssign::BCvt) 01627 ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); 01628 01629 // If this is x86-64, and we disabled SSE, we can't return FP values, 01630 // or SSE or MMX vectors. 01631 if ((ValVT == MVT::f32 || ValVT == MVT::f64 || 01632 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && 01633 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { 01634 report_fatal_error("SSE register return with SSE disabled"); 01635 } 01636 // Likewise we can't return F64 values with SSE1 only. gcc does so, but 01637 // llvm-gcc has never done it right and no one has noticed, so this 01638 // should be OK for now. 01639 if (ValVT == MVT::f64 && 01640 (Subtarget->is64Bit() && !Subtarget->hasSSE2())) 01641 report_fatal_error("SSE2 register return with SSE2 disabled"); 01642 01643 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 01644 // the RET instruction and handled by the FP Stackifier. 01645 if (VA.getLocReg() == X86::ST0 || 01646 VA.getLocReg() == X86::ST1) { 01647 // If this is a copy from an xmm register to ST(0), use an FPExtend to 01648 // change the value to the FP stack register class. 01649 if (isScalarFPTypeInSSEReg(VA.getValVT())) 01650 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 01651 RetOps.push_back(ValToCopy); 01652 // Don't emit a copytoreg. 01653 continue; 01654 } 01655 01656 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 01657 // which is returned in RAX / RDX. 01658 if (Subtarget->is64Bit()) { 01659 if (ValVT == MVT::x86mmx) { 01660 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 01661 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); 01662 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, 01663 ValToCopy); 01664 // If we don't have SSE2 available, convert to v4f32 so the generated 01665 // register is legal. 01666 if (!Subtarget->hasSSE2()) 01667 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); 01668 } 01669 } 01670 } 01671 01672 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); 01673 Flag = Chain.getValue(1); 01674 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 01675 } 01676 01677 // The x86-64 ABIs require that for returning structs by value we copy 01678 // the sret argument into %rax/%eax (depending on ABI) for the return. 01679 // Win32 requires us to put the sret argument to %eax as well. 01680 // We saved the argument into a virtual register in the entry block, 01681 // so now we copy the value out and into %rax/%eax. 01682 if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() && 01683 (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { 01684 MachineFunction &MF = DAG.getMachineFunction(); 01685 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 01686 unsigned Reg = FuncInfo->getSRetReturnReg(); 01687 assert(Reg && 01688 "SRetReturnReg should have been set in LowerFormalArguments()."); 01689 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); 01690 01691 unsigned RetValReg 01692 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? 01693 X86::RAX : X86::EAX; 01694 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); 01695 Flag = Chain.getValue(1); 01696 01697 // RAX/EAX now acts like a return value. 01698 RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy())); 01699 } 01700 01701 RetOps[0] = Chain; // Update chain. 01702 01703 // Add the flag if we have it. 01704 if (Flag.getNode()) 01705 RetOps.push_back(Flag); 01706 01707 return DAG.getNode(X86ISD::RET_FLAG, dl, 01708 MVT::Other, &RetOps[0], RetOps.size()); 01709 } 01710 01711 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 01712 if (N->getNumValues() != 1) 01713 return false; 01714 if (!N->hasNUsesOfValue(1, 0)) 01715 return false; 01716 01717 SDValue TCChain = Chain; 01718 SDNode *Copy = *N->use_begin(); 01719 if (Copy->getOpcode() == ISD::CopyToReg) { 01720 // If the copy has a glue operand, we conservatively assume it isn't safe to 01721 // perform a tail call. 01722 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 01723 return false; 01724 TCChain = Copy->getOperand(0); 01725 } else if (Copy->getOpcode() != ISD::FP_EXTEND) 01726 return false; 01727 01728 bool HasRet = false; 01729 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 01730 UI != UE; ++UI) { 01731 if (UI->getOpcode() != X86ISD::RET_FLAG) 01732 return false; 01733 HasRet = true; 01734 } 01735 01736 if (!HasRet) 01737 return false; 01738 01739 Chain = TCChain; 01740 return true; 01741 } 01742 01743 MVT 01744 X86TargetLowering::getTypeForExtArgOrReturn(MVT VT, 01745 ISD::NodeType ExtendKind) const { 01746 MVT ReturnMVT; 01747 // TODO: Is this also valid on 32-bit? 01748 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) 01749 ReturnMVT = MVT::i8; 01750 else 01751 ReturnMVT = MVT::i32; 01752 01753 MVT MinVT = getRegisterType(ReturnMVT); 01754 return VT.bitsLT(MinVT) ? MinVT : VT; 01755 } 01756 01757 /// LowerCallResult - Lower the result values of a call into the 01758 /// appropriate copies out of appropriate physical registers. 01759 /// 01760 SDValue 01761 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 01762 CallingConv::ID CallConv, bool isVarArg, 01763 const SmallVectorImpl<ISD::InputArg> &Ins, 01764 SDLoc dl, SelectionDAG &DAG, 01765 SmallVectorImpl<SDValue> &InVals) const { 01766 01767 // Assign locations to each value returned by this call. 01768 SmallVector<CCValAssign, 16> RVLocs; 01769 bool Is64Bit = Subtarget->is64Bit(); 01770 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 01771 getTargetMachine(), RVLocs, *DAG.getContext()); 01772 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 01773 01774 // Copy all of the result registers out of their specified physreg. 01775 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 01776 CCValAssign &VA = RVLocs[i]; 01777 EVT CopyVT = VA.getValVT(); 01778 01779 // If this is x86-64, and we disabled SSE, we can't return FP values 01780 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 01781 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 01782 report_fatal_error("SSE register return with SSE disabled"); 01783 } 01784 01785 SDValue Val; 01786 01787 // If this is a call to a function that returns an fp value on the floating 01788 // point stack, we must guarantee the value is popped from the stack, so 01789 // a CopyFromReg is not good enough - the copy instruction may be eliminated 01790 // if the return value is not used. We use the FpPOP_RETVAL instruction 01791 // instead. 01792 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { 01793 // If we prefer to use the value in xmm registers, copy it out as f80 and 01794 // use a truncate to move it from fp stack reg to xmm reg. 01795 if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; 01796 SDValue Ops[] = { Chain, InFlag }; 01797 Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, 01798 MVT::Other, MVT::Glue, Ops), 1); 01799 Val = Chain.getValue(0); 01800 01801 // Round the f80 to the right size, which also moves it to the appropriate 01802 // xmm register. 01803 if (CopyVT != VA.getValVT()) 01804 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 01805 // This truncation won't change the value. 01806 DAG.getIntPtrConstant(1)); 01807 } else { 01808 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), 01809 CopyVT, InFlag).getValue(1); 01810 Val = Chain.getValue(0); 01811 } 01812 InFlag = Chain.getValue(2); 01813 InVals.push_back(Val); 01814 } 01815 01816 return Chain; 01817 } 01818 01819 //===----------------------------------------------------------------------===// 01820 // C & StdCall & Fast Calling Convention implementation 01821 //===----------------------------------------------------------------------===// 01822 // StdCall calling convention seems to be standard for many Windows' API 01823 // routines and around. It differs from C calling convention just a little: 01824 // callee should clean up the stack, not caller. Symbols should be also 01825 // decorated in some fancy way :) It doesn't support any vector arguments. 01826 // For info on fast calling convention see Fast Calling Convention (tail call) 01827 // implementation LowerX86_32FastCCCallTo. 01828 01829 /// CallIsStructReturn - Determines whether a call uses struct return 01830 /// semantics. 01831 enum StructReturnType { 01832 NotStructReturn, 01833 RegStructReturn, 01834 StackStructReturn 01835 }; 01836 static StructReturnType 01837 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { 01838 if (Outs.empty()) 01839 return NotStructReturn; 01840 01841 const ISD::ArgFlagsTy &Flags = Outs[0].Flags; 01842 if (!Flags.isSRet()) 01843 return NotStructReturn; 01844 if (Flags.isInReg()) 01845 return RegStructReturn; 01846 return StackStructReturn; 01847 } 01848 01849 /// ArgsAreStructReturn - Determines whether a function uses struct 01850 /// return semantics. 01851 static StructReturnType 01852 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { 01853 if (Ins.empty()) 01854 return NotStructReturn; 01855 01856 const ISD::ArgFlagsTy &Flags = Ins[0].Flags; 01857 if (!Flags.isSRet()) 01858 return NotStructReturn; 01859 if (Flags.isInReg()) 01860 return RegStructReturn; 01861 return StackStructReturn; 01862 } 01863 01864 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 01865 /// by "Src" to address "Dst" with size and alignment information specified by 01866 /// the specific parameter attribute. The copy will be passed as a byval 01867 /// function parameter. 01868 static SDValue 01869 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 01870 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 01871 SDLoc dl) { 01872 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 01873 01874 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 01875 /*isVolatile*/false, /*AlwaysInline=*/true, 01876 MachinePointerInfo(), MachinePointerInfo()); 01877 } 01878 01879 /// IsTailCallConvention - Return true if the calling convention is one that 01880 /// supports tail call optimization. 01881 static bool IsTailCallConvention(CallingConv::ID CC) { 01882 return (CC == CallingConv::Fast || CC == CallingConv::GHC || 01883 CC == CallingConv::HiPE); 01884 } 01885 01886 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 01887 if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) 01888 return false; 01889 01890 CallSite CS(CI); 01891 CallingConv::ID CalleeCC = CS.getCallingConv(); 01892 if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) 01893 return false; 01894 01895 return true; 01896 } 01897 01898 /// FuncIsMadeTailCallSafe - Return true if the function is being made into 01899 /// a tailcall target by changing its ABI. 01900 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC, 01901 bool GuaranteedTailCallOpt) { 01902 return GuaranteedTailCallOpt && IsTailCallConvention(CC); 01903 } 01904 01905 SDValue 01906 X86TargetLowering::LowerMemArgument(SDValue Chain, 01907 CallingConv::ID CallConv, 01908 const SmallVectorImpl<ISD::InputArg> &Ins, 01909 SDLoc dl, SelectionDAG &DAG, 01910 const CCValAssign &VA, 01911 MachineFrameInfo *MFI, 01912 unsigned i) const { 01913 // Create the nodes corresponding to a load from this parameter slot. 01914 ISD::ArgFlagsTy Flags = Ins[i].Flags; 01915 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv, 01916 getTargetMachine().Options.GuaranteedTailCallOpt); 01917 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 01918 EVT ValVT; 01919 01920 // If value is passed by pointer we have address passed instead of the value 01921 // itself. 01922 if (VA.getLocInfo() == CCValAssign::Indirect) 01923 ValVT = VA.getLocVT(); 01924 else 01925 ValVT = VA.getValVT(); 01926 01927 // FIXME: For now, all byval parameter objects are marked mutable. This can be 01928 // changed with more analysis. 01929 // In case of tail call optimization mark all arguments mutable. Since they 01930 // could be overwritten by lowering of arguments in case of a tail call. 01931 if (Flags.isByVal()) { 01932 unsigned Bytes = Flags.getByValSize(); 01933 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 01934 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); 01935 return DAG.getFrameIndex(FI, getPointerTy()); 01936 } else { 01937 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, 01938 VA.getLocMemOffset(), isImmutable); 01939 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 01940 return DAG.getLoad(ValVT, dl, Chain, FIN, 01941 MachinePointerInfo::getFixedStack(FI), 01942 false, false, false, 0); 01943 } 01944 } 01945 01946 SDValue 01947 X86TargetLowering::LowerFormalArguments(SDValue Chain, 01948 CallingConv::ID CallConv, 01949 bool isVarArg, 01950 const SmallVectorImpl<ISD::InputArg> &Ins, 01951 SDLoc dl, 01952 SelectionDAG &DAG, 01953 SmallVectorImpl<SDValue> &InVals) 01954 const { 01955 MachineFunction &MF = DAG.getMachineFunction(); 01956 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 01957 01958 const Function* Fn = MF.getFunction(); 01959 if (Fn->hasExternalLinkage() && 01960 Subtarget->isTargetCygMing() && 01961 Fn->getName() == "main") 01962 FuncInfo->setForceFramePointer(true); 01963 01964 MachineFrameInfo *MFI = MF.getFrameInfo(); 01965 bool Is64Bit = Subtarget->is64Bit(); 01966 bool IsWindows = Subtarget->isTargetWindows(); 01967 bool IsWin64 = Subtarget->isTargetWin64(); 01968 01969 assert(!(isVarArg && IsTailCallConvention(CallConv)) && 01970 "Var args not supported with calling convention fastcc, ghc or hipe"); 01971 01972 // Assign locations to all of the incoming arguments. 01973 SmallVector<CCValAssign, 16> ArgLocs; 01974 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 01975 ArgLocs, *DAG.getContext()); 01976 01977 // Allocate shadow area for Win64 01978 if (IsWin64) { 01979 CCInfo.AllocateStack(32, 8); 01980 } 01981 01982 CCInfo.AnalyzeFormalArguments(Ins, CC_X86); 01983 01984 unsigned LastVal = ~0U; 01985 SDValue ArgValue; 01986 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 01987 CCValAssign &VA = ArgLocs[i]; 01988 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 01989 // places. 01990 assert(VA.getValNo() != LastVal && 01991 "Don't support value assigned to multiple locs yet"); 01992 (void)LastVal; 01993 LastVal = VA.getValNo(); 01994 01995 if (VA.isRegLoc()) { 01996 EVT RegVT = VA.getLocVT(); 01997 const TargetRegisterClass *RC; 01998 if (RegVT == MVT::i32) 01999 RC = &X86::GR32RegClass; 02000 else if (Is64Bit && RegVT == MVT::i64) 02001 RC = &X86::GR64RegClass; 02002 else if (RegVT == MVT::f32) 02003 RC = &X86::FR32RegClass; 02004 else if (RegVT == MVT::f64) 02005 RC = &X86::FR64RegClass; 02006 else if (RegVT.is256BitVector()) 02007 RC = &X86::VR256RegClass; 02008 else if (RegVT.is128BitVector()) 02009 RC = &X86::VR128RegClass; 02010 else if (RegVT == MVT::x86mmx) 02011 RC = &X86::VR64RegClass; 02012 else 02013 llvm_unreachable("Unknown argument type!"); 02014 02015 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 02016 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 02017 02018 // If this is an 8 or 16-bit value, it is really passed promoted to 32 02019 // bits. Insert an assert[sz]ext to capture this, then truncate to the 02020 // right size. 02021 if (VA.getLocInfo() == CCValAssign::SExt) 02022 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 02023 DAG.getValueType(VA.getValVT())); 02024 else if (VA.getLocInfo() == CCValAssign::ZExt) 02025 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 02026 DAG.getValueType(VA.getValVT())); 02027 else if (VA.getLocInfo() == CCValAssign::BCvt) 02028 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 02029 02030 if (VA.isExtInLoc()) { 02031 // Handle MMX values passed in XMM regs. 02032 if (RegVT.isVector()) 02033 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); 02034 else 02035 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 02036 } 02037 } else { 02038 assert(VA.isMemLoc()); 02039 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); 02040 } 02041 02042 // If value is passed via pointer - do a load. 02043 if (VA.getLocInfo() == CCValAssign::Indirect) 02044 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, 02045 MachinePointerInfo(), false, false, false, 0); 02046 02047 InVals.push_back(ArgValue); 02048 } 02049 02050 // The x86-64 ABIs require that for returning structs by value we copy 02051 // the sret argument into %rax/%eax (depending on ABI) for the return. 02052 // Win32 requires us to put the sret argument to %eax as well. 02053 // Save the argument into a virtual register so that we can access it 02054 // from the return points. 02055 if (MF.getFunction()->hasStructRetAttr() && 02056 (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { 02057 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 02058 unsigned Reg = FuncInfo->getSRetReturnReg(); 02059 if (!Reg) { 02060 MVT PtrTy = getPointerTy(); 02061 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); 02062 FuncInfo->setSRetReturnReg(Reg); 02063 } 02064 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); 02065 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 02066 } 02067 02068 unsigned StackSize = CCInfo.getNextStackOffset(); 02069 // Align stack specially for tail calls. 02070 if (FuncIsMadeTailCallSafe(CallConv, 02071 MF.getTarget().Options.GuaranteedTailCallOpt)) 02072 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 02073 02074 // If the function takes variable number of arguments, make a frame index for 02075 // the start of the first vararg value... for expansion of llvm.va_start. 02076 if (isVarArg) { 02077 if (Is64Bit || (CallConv != CallingConv::X86_FastCall && 02078 CallConv != CallingConv::X86_ThisCall)) { 02079 FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); 02080 } 02081 if (Is64Bit) { 02082 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 02083 02084 // FIXME: We should really autogenerate these arrays 02085 static const uint16_t GPR64ArgRegsWin64[] = { 02086 X86::RCX, X86::RDX, X86::R8, X86::R9 02087 }; 02088 static const uint16_t GPR64ArgRegs64Bit[] = { 02089 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 02090 }; 02091 static const uint16_t XMMArgRegs64Bit[] = { 02092 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 02093 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 02094 }; 02095 const uint16_t *GPR64ArgRegs; 02096 unsigned NumXMMRegs = 0; 02097 02098 if (IsWin64) { 02099 // The XMM registers which might contain var arg parameters are shadowed 02100 // in their paired GPR. So we only need to save the GPR to their home 02101 // slots. 02102 TotalNumIntRegs = 4; 02103 GPR64ArgRegs = GPR64ArgRegsWin64; 02104 } else { 02105 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 02106 GPR64ArgRegs = GPR64ArgRegs64Bit; 02107 02108 NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, 02109 TotalNumXMMRegs); 02110 } 02111 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 02112 TotalNumIntRegs); 02113 02114 bool NoImplicitFloatOps = Fn->getAttributes(). 02115 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 02116 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && 02117 "SSE register cannot be used when SSE is disabled!"); 02118 assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && 02119 NoImplicitFloatOps) && 02120 "SSE register cannot be used when SSE is disabled!"); 02121 if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || 02122 !Subtarget->hasSSE1()) 02123 // Kernel mode asks for SSE to be disabled, so don't push them 02124 // on the stack. 02125 TotalNumXMMRegs = 0; 02126 02127 if (IsWin64) { 02128 const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); 02129 // Get to the caller-allocated home save location. Add 8 to account 02130 // for the return address. 02131 int HomeOffset = TFI.getOffsetOfLocalArea() + 8; 02132 FuncInfo->setRegSaveFrameIndex( 02133 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); 02134 // Fixup to set vararg frame on shadow area (4 x i64). 02135 if (NumIntRegs < 4) 02136 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); 02137 } else { 02138 // For X86-64, if there are vararg parameters that are passed via 02139 // registers, then we must store them to their spots on the stack so 02140 // they may be loaded by deferencing the result of va_next. 02141 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); 02142 FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); 02143 FuncInfo->setRegSaveFrameIndex( 02144 MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, 02145 false)); 02146 } 02147 02148 // Store the integer parameter registers. 02149 SmallVector<SDValue, 8> MemOps; 02150 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), 02151 getPointerTy()); 02152 unsigned Offset = FuncInfo->getVarArgsGPOffset(); 02153 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 02154 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, 02155 DAG.getIntPtrConstant(Offset)); 02156 unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], 02157 &X86::GR64RegClass); 02158 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 02159 SDValue Store = 02160 DAG.getStore(Val.getValue(1), dl, Val, FIN, 02161 MachinePointerInfo::getFixedStack( 02162 FuncInfo->getRegSaveFrameIndex(), Offset), 02163 false, false, 0); 02164 MemOps.push_back(Store); 02165 Offset += 8; 02166 } 02167 02168 if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) { 02169 // Now store the XMM (fp + vector) parameter registers. 02170 SmallVector<SDValue, 11> SaveXMMOps; 02171 SaveXMMOps.push_back(Chain); 02172 02173 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); 02174 SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); 02175 SaveXMMOps.push_back(ALVal); 02176 02177 SaveXMMOps.push_back(DAG.getIntPtrConstant( 02178 FuncInfo->getRegSaveFrameIndex())); 02179 SaveXMMOps.push_back(DAG.getIntPtrConstant( 02180 FuncInfo->getVarArgsFPOffset())); 02181 02182 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 02183 unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], 02184 &X86::VR128RegClass); 02185 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); 02186 SaveXMMOps.push_back(Val); 02187 } 02188 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, 02189 MVT::Other, 02190 &SaveXMMOps[0], SaveXMMOps.size())); 02191 } 02192 02193 if (!MemOps.empty()) 02194 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 02195 &MemOps[0], MemOps.size()); 02196 } 02197 } 02198 02199 // Some CCs need callee pop. 02200 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 02201 MF.getTarget().Options.GuaranteedTailCallOpt)) { 02202 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. 02203 } else { 02204 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. 02205 // If this is an sret function, the return should pop the hidden pointer. 02206 if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && 02207 argsAreStructReturn(Ins) == StackStructReturn) 02208 FuncInfo->setBytesToPopOnReturn(4); 02209 } 02210 02211 if (!Is64Bit) { 02212 // RegSaveFrameIndex is X86-64 only. 02213 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 02214 if (CallConv == CallingConv::X86_FastCall || 02215 CallConv == CallingConv::X86_ThisCall) 02216 // fastcc functions can't have varargs. 02217 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); 02218 } 02219 02220 FuncInfo->setArgumentStackSize(StackSize); 02221 02222 return Chain; 02223 } 02224 02225 SDValue 02226 X86TargetLowering::LowerMemOpCallTo(SDValue Chain, 02227 SDValue StackPtr, SDValue Arg, 02228 SDLoc dl, SelectionDAG &DAG, 02229 const CCValAssign &VA, 02230 ISD::ArgFlagsTy Flags) const { 02231 unsigned LocMemOffset = VA.getLocMemOffset(); 02232 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 02233 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 02234 if (Flags.isByVal()) 02235 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 02236 02237 return DAG.getStore(Chain, dl, Arg, PtrOff, 02238 MachinePointerInfo::getStack(LocMemOffset), 02239 false, false, 0); 02240 } 02241 02242 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call 02243 /// optimization is performed and it is required. 02244 SDValue 02245 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 02246 SDValue &OutRetAddr, SDValue Chain, 02247 bool IsTailCall, bool Is64Bit, 02248 int FPDiff, SDLoc dl) const { 02249 // Adjust the Return address stack slot. 02250 EVT VT = getPointerTy(); 02251 OutRetAddr = getReturnAddressFrameIndex(DAG); 02252 02253 // Load the "old" Return address. 02254 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), 02255 false, false, false, 0); 02256 return SDValue(OutRetAddr.getNode(), 1); 02257 } 02258 02259 /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call 02260 /// optimization is performed and it is required (FPDiff!=0). 02261 static SDValue 02262 EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 02263 SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, 02264 unsigned SlotSize, int FPDiff, SDLoc dl) { 02265 // Store the return address to the appropriate stack slot. 02266 if (!FPDiff) return Chain; 02267 // Calculate the new stack slot for the return address. 02268 int NewReturnAddrFI = 02269 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); 02270 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); 02271 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 02272 MachinePointerInfo::getFixedStack(NewReturnAddrFI), 02273 false, false, 0); 02274 return Chain; 02275 } 02276 02277 SDValue 02278 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 02279 SmallVectorImpl<SDValue> &InVals) const { 02280 SelectionDAG &DAG = CLI.DAG; 02281 SDLoc &dl = CLI.DL; 02282 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 02283 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 02284 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 02285 SDValue Chain = CLI.Chain; 02286 SDValue Callee = CLI.Callee; 02287 CallingConv::ID CallConv = CLI.CallConv; 02288 bool &isTailCall = CLI.IsTailCall; 02289 bool isVarArg = CLI.IsVarArg; 02290 02291 MachineFunction &MF = DAG.getMachineFunction(); 02292 bool Is64Bit = Subtarget->is64Bit(); 02293 bool IsWin64 = Subtarget->isTargetWin64(); 02294 bool IsWindows = Subtarget->isTargetWindows(); 02295 StructReturnType SR = callIsStructReturn(Outs); 02296 bool IsSibcall = false; 02297 02298 if (MF.getTarget().Options.DisableTailCalls) 02299 isTailCall = false; 02300 02301 if (isTailCall) { 02302 // Check if it's really possible to do a tail call. 02303 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 02304 isVarArg, SR != NotStructReturn, 02305 MF.getFunction()->hasStructRetAttr(), CLI.RetTy, 02306 Outs, OutVals, Ins, DAG); 02307 02308 // Sibcalls are automatically detected tailcalls which do not require 02309 // ABI changes. 02310 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) 02311 IsSibcall = true; 02312 02313 if (isTailCall) 02314 ++NumTailCalls; 02315 } 02316 02317 assert(!(isVarArg && IsTailCallConvention(CallConv)) && 02318 "Var args not supported with calling convention fastcc, ghc or hipe"); 02319 02320 // Analyze operands of the call, assigning locations to each operand. 02321 SmallVector<CCValAssign, 16> ArgLocs; 02322 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 02323 ArgLocs, *DAG.getContext()); 02324 02325 // Allocate shadow area for Win64 02326 if (IsWin64) { 02327 CCInfo.AllocateStack(32, 8); 02328 } 02329 02330 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 02331 02332 // Get a count of how many bytes are to be pushed on the stack. 02333 unsigned NumBytes = CCInfo.getNextStackOffset(); 02334 if (IsSibcall) 02335 // This is a sibcall. The memory operands are available in caller's 02336 // own caller's stack. 02337 NumBytes = 0; 02338 else if (getTargetMachine().Options.GuaranteedTailCallOpt && 02339 IsTailCallConvention(CallConv)) 02340 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 02341 02342 int FPDiff = 0; 02343 if (isTailCall && !IsSibcall) { 02344 // Lower arguments at fp - stackoffset + fpdiff. 02345 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); 02346 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); 02347 02348 FPDiff = NumBytesCallerPushed - NumBytes; 02349 02350 // Set the delta of movement of the returnaddr stackslot. 02351 // But only set if delta is greater than previous delta. 02352 if (FPDiff < X86Info->getTCReturnAddrDelta()) 02353 X86Info->setTCReturnAddrDelta(FPDiff); 02354 } 02355 02356 if (!IsSibcall) 02357 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 02358 02359 SDValue RetAddrFrIdx; 02360 // Load return address for tail calls. 02361 if (isTailCall && FPDiff) 02362 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 02363 Is64Bit, FPDiff, dl); 02364 02365 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 02366 SmallVector<SDValue, 8> MemOpChains; 02367 SDValue StackPtr; 02368 02369 // Walk the register/memloc assignments, inserting copies/loads. In the case 02370 // of tail call optimization arguments are handle later. 02371 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02372 CCValAssign &VA = ArgLocs[i]; 02373 EVT RegVT = VA.getLocVT(); 02374 SDValue Arg = OutVals[i]; 02375 ISD::ArgFlagsTy Flags = Outs[i].Flags; 02376 bool isByVal = Flags.isByVal(); 02377 02378 // Promote the value if needed. 02379 switch (VA.getLocInfo()) { 02380 default: llvm_unreachable("Unknown loc info!"); 02381 case CCValAssign::Full: break; 02382 case CCValAssign::SExt: 02383 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 02384 break; 02385 case CCValAssign::ZExt: 02386 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 02387 break; 02388 case CCValAssign::AExt: 02389 if (RegVT.is128BitVector()) { 02390 // Special case: passing MMX values in XMM registers. 02391 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 02392 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 02393 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 02394 } else 02395 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 02396 break; 02397 case CCValAssign::BCvt: 02398 Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); 02399 break; 02400 case CCValAssign::Indirect: { 02401 // Store the argument. 02402 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 02403 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 02404 Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, 02405 MachinePointerInfo::getFixedStack(FI), 02406 false, false, 0); 02407 Arg = SpillSlot; 02408 break; 02409 } 02410 } 02411 02412 if (VA.isRegLoc()) { 02413 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 02414 if (isVarArg && IsWin64) { 02415 // Win64 ABI requires argument XMM reg to be copied to the corresponding 02416 // shadow reg if callee is a varargs function. 02417 unsigned ShadowReg = 0; 02418 switch (VA.getLocReg()) { 02419 case X86::XMM0: ShadowReg = X86::RCX; break; 02420 case X86::XMM1: ShadowReg = X86::RDX; break; 02421 case X86::XMM2: ShadowReg = X86::R8; break; 02422 case X86::XMM3: ShadowReg = X86::R9; break; 02423 } 02424 if (ShadowReg) 02425 RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); 02426 } 02427 } else if (!IsSibcall && (!isTailCall || isByVal)) { 02428 assert(VA.isMemLoc()); 02429 if (StackPtr.getNode() == 0) 02430 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 02431 getPointerTy()); 02432 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 02433 dl, DAG, VA, Flags)); 02434 } 02435 } 02436 02437 if (!MemOpChains.empty()) 02438 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 02439 &MemOpChains[0], MemOpChains.size()); 02440 02441 if (Subtarget->isPICStyleGOT()) { 02442 // ELF / PIC requires GOT in the EBX register before function calls via PLT 02443 // GOT pointer. 02444 if (!isTailCall) { 02445 RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), 02446 DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy()))); 02447 } else { 02448 // If we are tail calling and generating PIC/GOT style code load the 02449 // address of the callee into ECX. The value in ecx is used as target of 02450 // the tail jump. This is done to circumvent the ebx/callee-saved problem 02451 // for tail calls on PIC/GOT architectures. Normally we would just put the 02452 // address of GOT into ebx and then call target@PLT. But for tail calls 02453 // ebx would be restored (since ebx is callee saved) before jumping to the 02454 // target@PLT. 02455 02456 // Note: The actual moving to ECX is done further down. 02457 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 02458 if (G && !G->getGlobal()->hasHiddenVisibility() && 02459 !G->getGlobal()->hasProtectedVisibility()) 02460 Callee = LowerGlobalAddress(Callee, DAG); 02461 else if (isa<ExternalSymbolSDNode>(Callee)) 02462 Callee = LowerExternalSymbol(Callee, DAG); 02463 } 02464 } 02465 02466 if (Is64Bit && isVarArg && !IsWin64) { 02467 // From AMD64 ABI document: 02468 // For calls that may call functions that use varargs or stdargs 02469 // (prototype-less calls or calls to functions containing ellipsis (...) in 02470 // the declaration) %al is used as hidden argument to specify the number 02471 // of SSE registers used. The contents of %al do not need to match exactly 02472 // the number of registers, but must be an ubound on the number of SSE 02473 // registers used and is in the range 0 - 8 inclusive. 02474 02475 // Count the number of XMM registers allocated. 02476 static const uint16_t XMMArgRegs[] = { 02477 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 02478 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 02479 }; 02480 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 02481 assert((Subtarget->hasSSE1() || !NumXMMRegs) 02482 && "SSE registers cannot be used when SSE is disabled"); 02483 02484 RegsToPass.push_back(std::make_pair(unsigned(X86::AL), 02485 DAG.getConstant(NumXMMRegs, MVT::i8))); 02486 } 02487 02488 // For tail calls lower the arguments to the 'real' stack slot. 02489 if (isTailCall) { 02490 // Force all the incoming stack arguments to be loaded from the stack 02491 // before any new outgoing arguments are stored to the stack, because the 02492 // outgoing stack slots may alias the incoming argument stack slots, and 02493 // the alias isn't otherwise explicit. This is slightly more conservative 02494 // than necessary, because it means that each store effectively depends 02495 // on every argument instead of just those arguments it would clobber. 02496 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 02497 02498 SmallVector<SDValue, 8> MemOpChains2; 02499 SDValue FIN; 02500 int FI = 0; 02501 if (getTargetMachine().Options.GuaranteedTailCallOpt) { 02502 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02503 CCValAssign &VA = ArgLocs[i]; 02504 if (VA.isRegLoc()) 02505 continue; 02506 assert(VA.isMemLoc()); 02507 SDValue Arg = OutVals[i]; 02508 ISD::ArgFlagsTy Flags = Outs[i].Flags; 02509 // Create frame index. 02510 int32_t Offset = VA.getLocMemOffset()+FPDiff; 02511 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 02512 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 02513 FIN = DAG.getFrameIndex(FI, getPointerTy()); 02514 02515 if (Flags.isByVal()) { 02516 // Copy relative to framepointer. 02517 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 02518 if (StackPtr.getNode() == 0) 02519 StackPtr = DAG.getCopyFromReg(Chain, dl, 02520 RegInfo->getStackRegister(), 02521 getPointerTy()); 02522 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); 02523 02524 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 02525 ArgChain, 02526 Flags, DAG, dl)); 02527 } else { 02528 // Store relative to framepointer. 02529 MemOpChains2.push_back( 02530 DAG.getStore(ArgChain, dl, Arg, FIN, 02531 MachinePointerInfo::getFixedStack(FI), 02532 false, false, 0)); 02533 } 02534 } 02535 } 02536 02537 if (!MemOpChains2.empty()) 02538 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 02539 &MemOpChains2[0], MemOpChains2.size()); 02540 02541 // Store the return address to the appropriate stack slot. 02542 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, 02543 getPointerTy(), RegInfo->getSlotSize(), 02544 FPDiff, dl); 02545 } 02546 02547 // Build a sequence of copy-to-reg nodes chained together with token chain 02548 // and flag operands which copy the outgoing args into registers. 02549 SDValue InFlag; 02550 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 02551 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 02552 RegsToPass[i].second, InFlag); 02553 InFlag = Chain.getValue(1); 02554 } 02555 02556 if (getTargetMachine().getCodeModel() == CodeModel::Large) { 02557 assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 02558 // In the 64-bit large code model, we have to make all calls 02559 // through a register, since the call instruction's 32-bit 02560 // pc-relative offset may not be large enough to hold the whole 02561 // address. 02562 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 02563 // If the callee is a GlobalAddress node (quite common, every direct call 02564 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack 02565 // it. 02566 02567 // We should use extra load for direct calls to dllimported functions in 02568 // non-JIT mode. 02569 const GlobalValue *GV = G->getGlobal(); 02570 if (!GV->hasDLLImportLinkage()) { 02571 unsigned char OpFlags = 0; 02572 bool ExtraLoad = false; 02573 unsigned WrapperKind = ISD::DELETED_NODE; 02574 02575 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 02576 // external symbols most go through the PLT in PIC mode. If the symbol 02577 // has hidden or protected visibility, or if it is static or local, then 02578 // we don't need to use the PLT - we can directly call it. 02579 if (Subtarget->isTargetELF() && 02580 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 02581 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 02582 OpFlags = X86II::MO_PLT; 02583 } else if (Subtarget->isPICStyleStubAny() && 02584 (GV->isDeclaration() || GV->isWeakForLinker()) && 02585 (!Subtarget->getTargetTriple().isMacOSX() || 02586 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { 02587 // PC-relative references to external symbols should go through $stub, 02588 // unless we're building with the leopard linker or later, which 02589 // automatically synthesizes these stubs. 02590 OpFlags = X86II::MO_DARWIN_STUB; 02591 } else if (Subtarget->isPICStyleRIPRel() && 02592 isa<Function>(GV) && 02593 cast<Function>(GV)->getAttributes(). 02594 hasAttribute(AttributeSet::FunctionIndex, 02595 Attribute::NonLazyBind)) { 02596 // If the function is marked as non-lazy, generate an indirect call 02597 // which loads from the GOT directly. This avoids runtime overhead 02598 // at the cost of eager binding (and one extra byte of encoding). 02599 OpFlags = X86II::MO_GOTPCREL; 02600 WrapperKind = X86ISD::WrapperRIP; 02601 ExtraLoad = true; 02602 } 02603 02604 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 02605 G->getOffset(), OpFlags); 02606 02607 // Add a wrapper if needed. 02608 if (WrapperKind != ISD::DELETED_NODE) 02609 Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee); 02610 // Add extra indirection if needed. 02611 if (ExtraLoad) 02612 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, 02613 MachinePointerInfo::getGOT(), 02614 false, false, false, 0); 02615 } 02616 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 02617 unsigned char OpFlags = 0; 02618 02619 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to 02620 // external symbols should go through the PLT. 02621 if (Subtarget->isTargetELF() && 02622 getTargetMachine().getRelocationModel() == Reloc::PIC_) { 02623 OpFlags = X86II::MO_PLT; 02624 } else if (Subtarget->isPICStyleStubAny() && 02625 (!Subtarget->getTargetTriple().isMacOSX() || 02626 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { 02627 // PC-relative references to external symbols should go through $stub, 02628 // unless we're building with the leopard linker or later, which 02629 // automatically synthesizes these stubs. 02630 OpFlags = X86II::MO_DARWIN_STUB; 02631 } 02632 02633 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), 02634 OpFlags); 02635 } 02636 02637 // Returns a chain & a flag for retval copy to use. 02638 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 02639 SmallVector<SDValue, 8> Ops; 02640 02641 if (!IsSibcall && isTailCall) { 02642 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 02643 DAG.getIntPtrConstant(0, true), InFlag); 02644 InFlag = Chain.getValue(1); 02645 } 02646 02647 Ops.push_back(Chain); 02648 Ops.push_back(Callee); 02649 02650 if (isTailCall) 02651 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 02652 02653 // Add argument registers to the end of the list so that they are known live 02654 // into the call. 02655 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 02656 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 02657 RegsToPass[i].second.getValueType())); 02658 02659 // Add a register mask operand representing the call-preserved registers. 02660 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 02661 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 02662 assert(Mask && "Missing call preserved mask for calling convention"); 02663 Ops.push_back(DAG.getRegisterMask(Mask)); 02664 02665 if (InFlag.getNode()) 02666 Ops.push_back(InFlag); 02667 02668 if (isTailCall) { 02669 // We used to do: 02670 //// If this is the first return lowered for this function, add the regs 02671 //// to the liveout set for the function. 02672 // This isn't right, although it's probably harmless on x86; liveouts 02673 // should be computed from returns not tail calls. Consider a void 02674 // function making a tail call to a function returning int. 02675 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 02676 } 02677 02678 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); 02679 InFlag = Chain.getValue(1); 02680 02681 // Create the CALLSEQ_END node. 02682 unsigned NumBytesForCalleeToPush; 02683 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 02684 getTargetMachine().Options.GuaranteedTailCallOpt)) 02685 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 02686 else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && 02687 SR == StackStructReturn) 02688 // If this is a call to a struct-return function, the callee 02689 // pops the hidden struct pointer, so we have to push it back. 02690 // This is common for Darwin/X86, Linux & Mingw32 targets. 02691 // For MSVC Win32 targets, the caller pops the hidden struct pointer. 02692 NumBytesForCalleeToPush = 4; 02693 else 02694 NumBytesForCalleeToPush = 0; // Callee pops nothing. 02695 02696 // Returns a flag for retval copy to use. 02697 if (!IsSibcall) { 02698 Chain = DAG.getCALLSEQ_END(Chain, 02699 DAG.getIntPtrConstant(NumBytes, true), 02700 DAG.getIntPtrConstant(NumBytesForCalleeToPush, 02701 true), 02702 InFlag); 02703 InFlag = Chain.getValue(1); 02704 } 02705 02706 // Handle result values, copying them out of physregs into vregs that we 02707 // return. 02708 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 02709 Ins, dl, DAG, InVals); 02710 } 02711 02712 //===----------------------------------------------------------------------===// 02713 // Fast Calling Convention (tail call) implementation 02714 //===----------------------------------------------------------------------===// 02715 02716 // Like std call, callee cleans arguments, convention except that ECX is 02717 // reserved for storing the tail called function address. Only 2 registers are 02718 // free for argument passing (inreg). Tail call optimization is performed 02719 // provided: 02720 // * tailcallopt is enabled 02721 // * caller/callee are fastcc 02722 // On X86_64 architecture with GOT-style position independent code only local 02723 // (within module) calls are supported at the moment. 02724 // To keep the stack aligned according to platform abi the function 02725 // GetAlignedArgumentStackSize ensures that argument delta is always multiples 02726 // of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 02727 // If a tail called function callee has more arguments than the caller the 02728 // caller needs to make sure that there is room to move the RETADDR to. This is 02729 // achieved by reserving an area the size of the argument delta right after the 02730 // original REtADDR, but before the saved framepointer or the spilled registers 02731 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 02732 // stack layout: 02733 // arg1 02734 // arg2 02735 // RETADDR 02736 // [ new RETADDR 02737 // move area ] 02738 // (possible EBP) 02739 // ESI 02740 // EDI 02741 // local1 .. 02742 02743 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 02744 /// for a 16 byte align requirement. 02745 unsigned 02746 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 02747 SelectionDAG& DAG) const { 02748 MachineFunction &MF = DAG.getMachineFunction(); 02749 const TargetMachine &TM = MF.getTarget(); 02750 const TargetFrameLowering &TFI = *TM.getFrameLowering(); 02751 unsigned StackAlignment = TFI.getStackAlignment(); 02752 uint64_t AlignMask = StackAlignment - 1; 02753 int64_t Offset = StackSize; 02754 unsigned SlotSize = RegInfo->getSlotSize(); 02755 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 02756 // Number smaller than 12 so just add the difference. 02757 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 02758 } else { 02759 // Mask out lower bits, add stackalignment once plus the 12 bytes. 02760 Offset = ((~AlignMask) & Offset) + StackAlignment + 02761 (StackAlignment-SlotSize); 02762 } 02763 return Offset; 02764 } 02765 02766 /// MatchingStackOffset - Return true if the given stack call argument is 02767 /// already available in the same position (relatively) of the caller's 02768 /// incoming argument stack. 02769 static 02770 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 02771 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 02772 const X86InstrInfo *TII) { 02773 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 02774 int FI = INT_MAX; 02775 if (Arg.getOpcode() == ISD::CopyFromReg) { 02776 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 02777 if (!TargetRegisterInfo::isVirtualRegister(VR)) 02778 return false; 02779 MachineInstr *Def = MRI->getVRegDef(VR); 02780 if (!Def) 02781 return false; 02782 if (!Flags.isByVal()) { 02783 if (!TII->isLoadFromStackSlot(Def, FI)) 02784 return false; 02785 } else { 02786 unsigned Opcode = Def->getOpcode(); 02787 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && 02788 Def->getOperand(1).isFI()) { 02789 FI = Def->getOperand(1).getIndex(); 02790 Bytes = Flags.getByValSize(); 02791 } else 02792 return false; 02793 } 02794 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 02795 if (Flags.isByVal()) 02796 // ByVal argument is passed in as a pointer but it's now being 02797 // dereferenced. e.g. 02798 // define @foo(%struct.X* %A) { 02799 // tail call @bar(%struct.X* byval %A) 02800 // } 02801 return false; 02802 SDValue Ptr = Ld->getBasePtr(); 02803 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 02804 if (!FINode) 02805 return false; 02806 FI = FINode->getIndex(); 02807 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { 02808 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); 02809 FI = FINode->getIndex(); 02810 Bytes = Flags.getByValSize(); 02811 } else 02812 return false; 02813 02814 assert(FI != INT_MAX); 02815 if (!MFI->isFixedObjectIndex(FI)) 02816 return false; 02817 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 02818 } 02819 02820 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 02821 /// for tail call optimization. Targets which want to do tail call 02822 /// optimization should implement this function. 02823 bool 02824 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 02825 CallingConv::ID CalleeCC, 02826 bool isVarArg, 02827 bool isCalleeStructRet, 02828 bool isCallerStructRet, 02829 Type *RetTy, 02830 const SmallVectorImpl<ISD::OutputArg> &Outs, 02831 const SmallVectorImpl<SDValue> &OutVals, 02832 const SmallVectorImpl<ISD::InputArg> &Ins, 02833 SelectionDAG &DAG) const { 02834 if (!IsTailCallConvention(CalleeCC) && 02835 CalleeCC != CallingConv::C) 02836 return false; 02837 02838 // If -tailcallopt is specified, make fastcc functions tail-callable. 02839 const MachineFunction &MF = DAG.getMachineFunction(); 02840 const Function *CallerF = DAG.getMachineFunction().getFunction(); 02841 02842 // If the function return type is x86_fp80 and the callee return type is not, 02843 // then the FP_EXTEND of the call result is not a nop. It's not safe to 02844 // perform a tailcall optimization here. 02845 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) 02846 return false; 02847 02848 CallingConv::ID CallerCC = CallerF->getCallingConv(); 02849 bool CCMatch = CallerCC == CalleeCC; 02850 02851 if (getTargetMachine().Options.GuaranteedTailCallOpt) { 02852 if (IsTailCallConvention(CalleeCC) && CCMatch) 02853 return true; 02854 return false; 02855 } 02856 02857 // Look for obvious safe cases to perform tail call optimization that do not 02858 // require ABI changes. This is what gcc calls sibcall. 02859 02860 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 02861 // emit a special epilogue. 02862 if (RegInfo->needsStackRealignment(MF)) 02863 return false; 02864 02865 // Also avoid sibcall optimization if either caller or callee uses struct 02866 // return semantics. 02867 if (isCalleeStructRet || isCallerStructRet) 02868 return false; 02869 02870 // An stdcall caller is expected to clean up its arguments; the callee 02871 // isn't going to do that. 02872 if (!CCMatch && CallerCC == CallingConv::X86_StdCall) 02873 return false; 02874 02875 // Do not sibcall optimize vararg calls unless all arguments are passed via 02876 // registers. 02877 if (isVarArg && !Outs.empty()) { 02878 02879 // Optimizing for varargs on Win64 is unlikely to be safe without 02880 // additional testing. 02881 if (Subtarget->isTargetWin64()) 02882 return false; 02883 02884 SmallVector<CCValAssign, 16> ArgLocs; 02885 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 02886 getTargetMachine(), ArgLocs, *DAG.getContext()); 02887 02888 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 02889 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) 02890 if (!ArgLocs[i].isRegLoc()) 02891 return false; 02892 } 02893 02894 // If the call result is in ST0 / ST1, it needs to be popped off the x87 02895 // stack. Therefore, if it's not used by the call it is not safe to optimize 02896 // this into a sibcall. 02897 bool Unused = false; 02898 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 02899 if (!Ins[i].Used) { 02900 Unused = true; 02901 break; 02902 } 02903 } 02904 if (Unused) { 02905 SmallVector<CCValAssign, 16> RVLocs; 02906 CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), 02907 getTargetMachine(), RVLocs, *DAG.getContext()); 02908 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 02909 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 02910 CCValAssign &VA = RVLocs[i]; 02911 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) 02912 return false; 02913 } 02914 } 02915 02916 // If the calling conventions do not match, then we'd better make sure the 02917 // results are returned in the same way as what the caller expects. 02918 if (!CCMatch) { 02919 SmallVector<CCValAssign, 16> RVLocs1; 02920 CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 02921 getTargetMachine(), RVLocs1, *DAG.getContext()); 02922 CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); 02923 02924 SmallVector<CCValAssign, 16> RVLocs2; 02925 CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 02926 getTargetMachine(), RVLocs2, *DAG.getContext()); 02927 CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); 02928 02929 if (RVLocs1.size() != RVLocs2.size()) 02930 return false; 02931 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 02932 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 02933 return false; 02934 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 02935 return false; 02936 if (RVLocs1[i].isRegLoc()) { 02937 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 02938 return false; 02939 } else { 02940 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 02941 return false; 02942 } 02943 } 02944 } 02945 02946 // If the callee takes no arguments then go on to check the results of the 02947 // call. 02948 if (!Outs.empty()) { 02949 // Check if stack adjustment is needed. For now, do not do this if any 02950 // argument is passed on the stack. 02951 SmallVector<CCValAssign, 16> ArgLocs; 02952 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 02953 getTargetMachine(), ArgLocs, *DAG.getContext()); 02954 02955 // Allocate shadow area for Win64 02956 if (Subtarget->isTargetWin64()) { 02957 CCInfo.AllocateStack(32, 8); 02958 } 02959 02960 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 02961 if (CCInfo.getNextStackOffset()) { 02962 MachineFunction &MF = DAG.getMachineFunction(); 02963 if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) 02964 return false; 02965 02966 // Check if the arguments are already laid out in the right way as 02967 // the caller's fixed stack objects. 02968 MachineFrameInfo *MFI = MF.getFrameInfo(); 02969 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 02970 const X86InstrInfo *TII = 02971 ((const X86TargetMachine&)getTargetMachine()).getInstrInfo(); 02972 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02973 CCValAssign &VA = ArgLocs[i]; 02974 SDValue Arg = OutVals[i]; 02975 ISD::ArgFlagsTy Flags = Outs[i].Flags; 02976 if (VA.getLocInfo() == CCValAssign::Indirect) 02977 return false; 02978 if (!VA.isRegLoc()) { 02979 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 02980 MFI, MRI, TII)) 02981 return false; 02982 } 02983 } 02984 } 02985 02986 // If the tailcall address may be in a register, then make sure it's 02987 // possible to register allocate for it. In 32-bit, the call address can 02988 // only target EAX, EDX, or ECX since the tail call must be scheduled after 02989 // callee-saved registers are restored. These happen to be the same 02990 // registers used to pass 'inreg' arguments so watch out for those. 02991 if (!Subtarget->is64Bit() && 02992 ((!isa<GlobalAddressSDNode>(Callee) && 02993 !isa<ExternalSymbolSDNode>(Callee)) || 02994 getTargetMachine().getRelocationModel() == Reloc::PIC_)) { 02995 unsigned NumInRegs = 0; 02996 // In PIC we need an extra register to formulate the address computation 02997 // for the callee. 02998 unsigned MaxInRegs = 02999 (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3; 03000 03001 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 03002 CCValAssign &VA = ArgLocs[i]; 03003 if (!VA.isRegLoc()) 03004 continue; 03005 unsigned Reg = VA.getLocReg(); 03006 switch (Reg) { 03007 default: break; 03008 case X86::EAX: case X86::EDX: case X86::ECX: 03009 if (++NumInRegs == MaxInRegs) 03010 return false; 03011 break; 03012 } 03013 } 03014 } 03015 } 03016 03017 return true; 03018 } 03019 03020 FastISel * 03021 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 03022 const TargetLibraryInfo *libInfo) const { 03023 return X86::createFastISel(funcInfo, libInfo); 03024 } 03025 03026 //===----------------------------------------------------------------------===// 03027 // Other Lowering Hooks 03028 //===----------------------------------------------------------------------===// 03029 03030 static bool MayFoldLoad(SDValue Op) { 03031 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); 03032 } 03033 03034 static bool MayFoldIntoStore(SDValue Op) { 03035 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); 03036 } 03037 03038 static bool isTargetShuffle(unsigned Opcode) { 03039 switch(Opcode) { 03040 default: return false; 03041 case X86ISD::PSHUFD: 03042 case X86ISD::PSHUFHW: 03043 case X86ISD::PSHUFLW: 03044 case X86ISD::SHUFP: 03045 case X86ISD::PALIGNR: 03046 case X86ISD::MOVLHPS: 03047 case X86ISD::MOVLHPD: 03048 case X86ISD::MOVHLPS: 03049 case X86ISD::MOVLPS: 03050 case X86ISD::MOVLPD: 03051 case X86ISD::MOVSHDUP: 03052 case X86ISD::MOVSLDUP: 03053 case X86ISD::MOVDDUP: 03054 case X86ISD::MOVSS: 03055 case X86ISD::MOVSD: 03056 case X86ISD::UNPCKL: 03057 case X86ISD::UNPCKH: 03058 case X86ISD::VPERMILP: 03059 case X86ISD::VPERM2X128: 03060 case X86ISD::VPERMI: 03061 return true; 03062 } 03063 } 03064 03065 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, 03066 SDValue V1, SelectionDAG &DAG) { 03067 switch(Opc) { 03068 default: llvm_unreachable("Unknown x86 shuffle node"); 03069 case X86ISD::MOVSHDUP: 03070 case X86ISD::MOVSLDUP: 03071 case X86ISD::MOVDDUP: 03072 return DAG.getNode(Opc, dl, VT, V1); 03073 } 03074 } 03075 03076 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, 03077 SDValue V1, unsigned TargetMask, 03078 SelectionDAG &DAG) { 03079 switch(Opc) { 03080 default: llvm_unreachable("Unknown x86 shuffle node"); 03081 case X86ISD::PSHUFD: 03082 case X86ISD::PSHUFHW: 03083 case X86ISD::PSHUFLW: 03084 case X86ISD::VPERMILP: 03085 case X86ISD::VPERMI: 03086 return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); 03087 } 03088 } 03089 03090 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, 03091 SDValue V1, SDValue V2, unsigned TargetMask, 03092 SelectionDAG &DAG) { 03093 switch(Opc) { 03094 default: llvm_unreachable("Unknown x86 shuffle node"); 03095 case X86ISD::PALIGNR: 03096 case X86ISD::SHUFP: 03097 case X86ISD::VPERM2X128: 03098 return DAG.getNode(Opc, dl, VT, V1, V2, 03099 DAG.getConstant(TargetMask, MVT::i8)); 03100 } 03101 } 03102 03103 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, 03104 SDValue V1, SDValue V2, SelectionDAG &DAG) { 03105 switch(Opc) { 03106 default: llvm_unreachable("Unknown x86 shuffle node"); 03107 case X86ISD::MOVLHPS: 03108 case X86ISD::MOVLHPD: 03109 case X86ISD::MOVHLPS: 03110 case X86ISD::MOVLPS: 03111 case X86ISD::MOVLPD: 03112 case X86ISD::MOVSS: 03113 case X86ISD::MOVSD: 03114 case X86ISD::UNPCKL: 03115 case X86ISD::UNPCKH: 03116 return DAG.getNode(Opc, dl, VT, V1, V2); 03117 } 03118 } 03119 03120 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { 03121 MachineFunction &MF = DAG.getMachineFunction(); 03122 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 03123 int ReturnAddrIndex = FuncInfo->getRAIndex(); 03124 03125 if (ReturnAddrIndex == 0) { 03126 // Set up a frame object for the return address. 03127 unsigned SlotSize = RegInfo->getSlotSize(); 03128 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, 03129 false); 03130 FuncInfo->setRAIndex(ReturnAddrIndex); 03131 } 03132 03133 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 03134 } 03135 03136 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 03137 bool hasSymbolicDisplacement) { 03138 // Offset should fit into 32 bit immediate field. 03139 if (!isInt<32>(Offset)) 03140 return false; 03141 03142 // If we don't have a symbolic displacement - we don't have any extra 03143 // restrictions. 03144 if (!hasSymbolicDisplacement) 03145 return true; 03146 03147 // FIXME: Some tweaks might be needed for medium code model. 03148 if (M != CodeModel::Small && M != CodeModel::Kernel) 03149 return false; 03150 03151 // For small code model we assume that latest object is 16MB before end of 31 03152 // bits boundary. We may also accept pretty large negative constants knowing 03153 // that all objects are in the positive half of address space. 03154 if (M == CodeModel::Small && Offset < 16*1024*1024) 03155 return true; 03156 03157 // For kernel code model we know that all object resist in the negative half 03158 // of 32bits address space. We may not accept negative offsets, since they may 03159 // be just off and we may accept pretty large positive ones. 03160 if (M == CodeModel::Kernel && Offset > 0) 03161 return true; 03162 03163 return false; 03164 } 03165 03166 /// isCalleePop - Determines whether the callee is required to pop its 03167 /// own arguments. Callee pop is necessary to support tail calls. 03168 bool X86::isCalleePop(CallingConv::ID CallingConv, 03169 bool is64Bit, bool IsVarArg, bool TailCallOpt) { 03170 if (IsVarArg) 03171 return false; 03172 03173 switch (CallingConv) { 03174 default: 03175 return false; 03176 case CallingConv::X86_StdCall: 03177 return !is64Bit; 03178 case CallingConv::X86_FastCall: 03179 return !is64Bit; 03180 case CallingConv::X86_ThisCall: 03181 return !is64Bit; 03182 case CallingConv::Fast: 03183 return TailCallOpt; 03184 case CallingConv::GHC: 03185 return TailCallOpt; 03186 case CallingConv::HiPE: 03187 return TailCallOpt; 03188 } 03189 } 03190 03191 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 03192 /// specific condition code, returning the condition code and the LHS/RHS of the 03193 /// comparison to make. 03194 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 03195 SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { 03196 if (!isFP) { 03197 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 03198 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 03199 // X > -1 -> X == 0, jump !sign. 03200 RHS = DAG.getConstant(0, RHS.getValueType()); 03201 return X86::COND_NS; 03202 } 03203 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 03204 // X < 0 -> X == 0, jump on sign. 03205 return X86::COND_S; 03206 } 03207 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 03208 // X < 1 -> X <= 0 03209 RHS = DAG.getConstant(0, RHS.getValueType()); 03210 return X86::COND_LE; 03211 } 03212 } 03213 03214 switch (SetCCOpcode) { 03215 default: llvm_unreachable("Invalid integer condition!"); 03216 case ISD::SETEQ: return X86::COND_E; 03217 case ISD::SETGT: return X86::COND_G; 03218 case ISD::SETGE: return X86::COND_GE; 03219 case ISD::SETLT: return X86::COND_L; 03220 case ISD::SETLE: return X86::COND_LE; 03221 case ISD::SETNE: return X86::COND_NE; 03222 case ISD::SETULT: return X86::COND_B; 03223 case ISD::SETUGT: return X86::COND_A; 03224 case ISD::SETULE: return X86::COND_BE; 03225 case ISD::SETUGE: return X86::COND_AE; 03226 } 03227 } 03228 03229 // First determine if it is required or is profitable to flip the operands. 03230 03231 // If LHS is a foldable load, but RHS is not, flip the condition. 03232 if (ISD::isNON_EXTLoad(LHS.getNode()) && 03233 !ISD::isNON_EXTLoad(RHS.getNode())) { 03234 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 03235 std::swap(LHS, RHS); 03236 } 03237 03238 switch (SetCCOpcode) { 03239 default: break; 03240 case ISD::SETOLT: 03241 case ISD::SETOLE: 03242 case ISD::SETUGT: 03243 case ISD::SETUGE: 03244 std::swap(LHS, RHS); 03245 break; 03246 } 03247 03248 // On a floating point condition, the flags are set as follows: 03249 // ZF PF CF op 03250 // 0 | 0 | 0 | X > Y 03251 // 0 | 0 | 1 | X < Y 03252 // 1 | 0 | 0 | X == Y 03253 // 1 | 1 | 1 | unordered 03254 switch (SetCCOpcode) { 03255 default: llvm_unreachable("Condcode should be pre-legalized away"); 03256 case ISD::SETUEQ: 03257 case ISD::SETEQ: return X86::COND_E; 03258 case ISD::SETOLT: // flipped 03259 case ISD::SETOGT: 03260 case ISD::SETGT: return X86::COND_A; 03261 case ISD::SETOLE: // flipped 03262 case ISD::SETOGE: 03263 case ISD::SETGE: return X86::COND_AE; 03264 case ISD::SETUGT: // flipped 03265 case ISD::SETULT: 03266 case ISD::SETLT: return X86::COND_B; 03267 case ISD::SETUGE: // flipped 03268 case ISD::SETULE: 03269 case ISD::SETLE: return X86::COND_BE; 03270 case ISD::SETONE: 03271 case ISD::SETNE: return X86::COND_NE; 03272 case ISD::SETUO: return X86::COND_P; 03273 case ISD::SETO: return X86::COND_NP; 03274 case ISD::SETOEQ: 03275 case ISD::SETUNE: return X86::COND_INVALID; 03276 } 03277 } 03278 03279 /// hasFPCMov - is there a floating point cmov for the specific X86 condition 03280 /// code. Current x86 isa includes the following FP cmov instructions: 03281 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 03282 static bool hasFPCMov(unsigned X86CC) { 03283 switch (X86CC) { 03284 default: 03285 return false; 03286 case X86::COND_B: 03287 case X86::COND_BE: 03288 case X86::COND_E: 03289 case X86::COND_P: 03290 case X86::COND_A: 03291 case X86::COND_AE: 03292 case X86::COND_NE: 03293 case X86::COND_NP: 03294 return true; 03295 } 03296 } 03297 03298 /// isFPImmLegal - Returns true if the target can instruction select the 03299 /// specified FP immediate natively. If false, the legalizer will 03300 /// materialize the FP immediate as a load from a constant pool. 03301 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 03302 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { 03303 if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) 03304 return true; 03305 } 03306 return false; 03307 } 03308 03309 /// isUndefOrInRange - Return true if Val is undef or if its value falls within 03310 /// the specified range (L, H]. 03311 static bool isUndefOrInRange(int Val, int Low, int Hi) { 03312 return (Val < 0) || (Val >= Low && Val < Hi); 03313 } 03314 03315 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the 03316 /// specified value. 03317 static bool isUndefOrEqual(int Val, int CmpVal) { 03318 return (Val < 0 || Val == CmpVal); 03319 } 03320 03321 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning 03322 /// from position Pos and ending in Pos+Size, falls within the specified 03323 /// sequential range (L, L+Pos]. or is undef. 03324 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, 03325 unsigned Pos, unsigned Size, int Low) { 03326 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) 03327 if (!isUndefOrEqual(Mask[i], Low)) 03328 return false; 03329 return true; 03330 } 03331 03332 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that 03333 /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference 03334 /// the second operand. 03335 static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) { 03336 if (VT == MVT::v4f32 || VT == MVT::v4i32 ) 03337 return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); 03338 if (VT == MVT::v2f64 || VT == MVT::v2i64) 03339 return (Mask[0] < 2 && Mask[1] < 2); 03340 return false; 03341 } 03342 03343 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that 03344 /// is suitable for input to PSHUFHW. 03345 static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { 03346 if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16)) 03347 return false; 03348 03349 // Lower quadword copied in order or undef. 03350 if (!isSequentialOrUndefInRange(Mask, 0, 4, 0)) 03351 return false; 03352 03353 // Upper quadword shuffled. 03354 for (unsigned i = 4; i != 8; ++i) 03355 if (!isUndefOrInRange(Mask[i], 4, 8)) 03356 return false; 03357 03358 if (VT == MVT::v16i16) { 03359 // Lower quadword copied in order or undef. 03360 if (!isSequentialOrUndefInRange(Mask, 8, 4, 8)) 03361 return false; 03362 03363 // Upper quadword shuffled. 03364 for (unsigned i = 12; i != 16; ++i) 03365 if (!isUndefOrInRange(Mask[i], 12, 16)) 03366 return false; 03367 } 03368 03369 return true; 03370 } 03371 03372 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that 03373 /// is suitable for input to PSHUFLW. 03374 static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { 03375 if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16)) 03376 return false; 03377 03378 // Upper quadword copied in order. 03379 if (!isSequentialOrUndefInRange(Mask, 4, 4, 4)) 03380 return false; 03381 03382 // Lower quadword shuffled. 03383 for (unsigned i = 0; i != 4; ++i) 03384 if (!isUndefOrInRange(Mask[i], 0, 4)) 03385 return false; 03386 03387 if (VT == MVT::v16i16) { 03388 // Upper quadword copied in order. 03389 if (!isSequentialOrUndefInRange(Mask, 12, 4, 12)) 03390 return false; 03391 03392 // Lower quadword shuffled. 03393 for (unsigned i = 8; i != 12; ++i) 03394 if (!isUndefOrInRange(Mask[i], 8, 12)) 03395 return false; 03396 } 03397 03398 return true; 03399 } 03400 03401 /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that 03402 /// is suitable for input to PALIGNR. 03403 static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT, 03404 const X86Subtarget *Subtarget) { 03405 if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || 03406 (VT.is256BitVector() && !Subtarget->hasInt256())) 03407 return false; 03408 03409 unsigned NumElts = VT.getVectorNumElements(); 03410 unsigned NumLanes = VT.getSizeInBits()/128; 03411 unsigned NumLaneElts = NumElts/NumLanes; 03412 03413 // Do not handle 64-bit element shuffles with palignr. 03414 if (NumLaneElts == 2) 03415 return false; 03416 03417 for (unsigned l = 0; l != NumElts; l+=NumLaneElts) { 03418 unsigned i; 03419 for (i = 0; i != NumLaneElts; ++i) { 03420 if (Mask[i+l] >= 0) 03421 break; 03422 } 03423 03424 // Lane is all undef, go to next lane 03425 if (i == NumLaneElts) 03426 continue; 03427 03428 int Start = Mask[i+l]; 03429 03430 // Make sure its in this lane in one of the sources 03431 if (!isUndefOrInRange(Start, l, l+NumLaneElts) && 03432 !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts)) 03433 return false; 03434 03435 // If not lane 0, then we must match lane 0 03436 if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l)) 03437 return false; 03438 03439 // Correct second source to be contiguous with first source 03440 if (Start >= (int)NumElts) 03441 Start -= NumElts - NumLaneElts; 03442 03443 // Make sure we're shifting in the right direction. 03444 if (Start <= (int)(i+l)) 03445 return false; 03446 03447 Start -= i; 03448 03449 // Check the rest of the elements to see if they are consecutive. 03450 for (++i; i != NumLaneElts; ++i) { 03451 int Idx = Mask[i+l]; 03452 03453 // Make sure its in this lane 03454 if (!isUndefOrInRange(Idx, l, l+NumLaneElts) && 03455 !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts)) 03456 return false; 03457 03458 // If not lane 0, then we must match lane 0 03459 if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l)) 03460 return false; 03461 03462 if (Idx >= (int)NumElts) 03463 Idx -= NumElts - NumLaneElts; 03464 03465 if (!isUndefOrEqual(Idx, Start+i)) 03466 return false; 03467 03468 } 03469 } 03470 03471 return true; 03472 } 03473 03474 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 03475 /// the two vector operands have swapped position. 03476 static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, 03477 unsigned NumElems) { 03478 for (unsigned i = 0; i != NumElems; ++i) { 03479 int idx = Mask[i]; 03480 if (idx < 0) 03481 continue; 03482 else if (idx < (int)NumElems) 03483 Mask[i] = idx + NumElems; 03484 else 03485 Mask[i] = idx - NumElems; 03486 } 03487 } 03488 03489 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 03490 /// specifies a shuffle of elements that is suitable for input to 128/256-bit 03491 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be 03492 /// reverse of what x86 shuffles want. 03493 static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256, 03494 bool Commuted = false) { 03495 if (!HasFp256 && VT.is256BitVector()) 03496 return false; 03497 03498 unsigned NumElems = VT.getVectorNumElements(); 03499 unsigned NumLanes = VT.getSizeInBits()/128; 03500 unsigned NumLaneElems = NumElems/NumLanes; 03501 03502 if (NumLaneElems != 2 && NumLaneElems != 4) 03503 return false; 03504 03505 // VSHUFPSY divides the resulting vector into 4 chunks. 03506 // The sources are also splitted into 4 chunks, and each destination 03507 // chunk must come from a different source chunk. 03508 // 03509 // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0 03510 // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9 03511 // 03512 // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, 03513 // Y3..Y0, Y3..Y0, X3..X0, X3..X0 03514 // 03515 // VSHUFPDY divides the resulting vector into 4 chunks. 03516 // The sources are also splitted into 4 chunks, and each destination 03517 // chunk must come from a different source chunk. 03518 // 03519 // SRC1 => X3 X2 X1 X0 03520 // SRC2 => Y3 Y2 Y1 Y0 03521 // 03522 // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 03523 // 03524 unsigned HalfLaneElems = NumLaneElems/2; 03525 for (unsigned l = 0; l != NumElems; l += NumLaneElems) { 03526 for (unsigned i = 0; i != NumLaneElems; ++i) { 03527 int Idx = Mask[i+l]; 03528 unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0); 03529 if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems)) 03530 return false; 03531 // For VSHUFPSY, the mask of the second half must be the same as the 03532 // first but with the appropriate offsets. This works in the same way as 03533 // VPERMILPS works with masks. 03534 if (NumElems != 8 || l == 0 || Mask[i] < 0) 03535 continue; 03536 if (!isUndefOrEqual(Idx, Mask[i]+l)) 03537 return false; 03538 } 03539 } 03540 03541 return true; 03542 } 03543 03544 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 03545 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 03546 static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { 03547 if (!VT.is128BitVector()) 03548 return false; 03549 03550 unsigned NumElems = VT.getVectorNumElements(); 03551 03552 if (NumElems != 4) 03553 return false; 03554 03555 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 03556 return isUndefOrEqual(Mask[0], 6) && 03557 isUndefOrEqual(Mask[1], 7) && 03558 isUndefOrEqual(Mask[2], 2) && 03559 isUndefOrEqual(Mask[3], 3); 03560 } 03561 03562 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 03563 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 03564 /// <2, 3, 2, 3> 03565 static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { 03566 if (!VT.is128BitVector()) 03567 return false; 03568 03569 unsigned NumElems = VT.getVectorNumElements(); 03570 03571 if (NumElems != 4) 03572 return false; 03573 03574 return isUndefOrEqual(Mask[0], 2) && 03575 isUndefOrEqual(Mask[1], 3) && 03576 isUndefOrEqual(Mask[2], 2) && 03577 isUndefOrEqual(Mask[3], 3); 03578 } 03579 03580 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 03581 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 03582 static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { 03583 if (!VT.is128BitVector()) 03584 return false; 03585 03586 unsigned NumElems = VT.getVectorNumElements(); 03587 03588 if (NumElems != 2 && NumElems != 4) 03589 return false; 03590 03591 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 03592 if (!isUndefOrEqual(Mask[i], i + NumElems)) 03593 return false; 03594 03595 for (unsigned i = NumElems/2, e = NumElems; i != e; ++i) 03596 if (!isUndefOrEqual(Mask[i], i)) 03597 return false; 03598 03599 return true; 03600 } 03601 03602 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 03603 /// specifies a shuffle of elements that is suitable for input to MOVLHPS. 03604 static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { 03605 if (!VT.is128BitVector()) 03606 return false; 03607 03608 unsigned NumElems = VT.getVectorNumElements(); 03609 03610 if (NumElems != 2 && NumElems != 4) 03611 return false; 03612 03613 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 03614 if (!isUndefOrEqual(Mask[i], i)) 03615 return false; 03616 03617 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 03618 if (!isUndefOrEqual(Mask[i + e], i + NumElems)) 03619 return false; 03620 03621 return true; 03622 } 03623 03624 // 03625 // Some special combinations that can be optimized. 03626 // 03627 static 03628 SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, 03629 SelectionDAG &DAG) { 03630 MVT VT = SVOp->getValueType(0).getSimpleVT(); 03631 SDLoc dl(SVOp); 03632 03633 if (VT != MVT::v8i32 && VT != MVT::v8f32) 03634 return SDValue(); 03635 03636 ArrayRef<int> Mask = SVOp->getMask(); 03637 03638 // These are the special masks that may be optimized. 03639 static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; 03640 static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; 03641 bool MatchEvenMask = true; 03642 bool MatchOddMask = true; 03643 for (int i=0; i<8; ++i) { 03644 if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) 03645 MatchEvenMask = false; 03646 if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) 03647 MatchOddMask = false; 03648 } 03649 03650 if (!MatchEvenMask && !MatchOddMask) 03651 return SDValue(); 03652 03653 SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); 03654 03655 SDValue Op0 = SVOp->getOperand(0); 03656 SDValue Op1 = SVOp->getOperand(1); 03657 03658 if (MatchEvenMask) { 03659 // Shift the second operand right to 32 bits. 03660 static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 }; 03661 Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask); 03662 } else { 03663 // Shift the first operand left to 32 bits. 03664 static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 }; 03665 Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask); 03666 } 03667 static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15}; 03668 return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask); 03669 } 03670 03671 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 03672 /// specifies a shuffle of elements that is suitable for input to UNPCKL. 03673 static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, 03674 bool HasInt256, bool V2IsSplat = false) { 03675 unsigned NumElts = VT.getVectorNumElements(); 03676 03677 assert((VT.is128BitVector() || VT.is256BitVector()) && 03678 "Unsupported vector type for unpckh"); 03679 03680 if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && 03681 (!HasInt256 || (NumElts != 16 && NumElts != 32))) 03682 return false; 03683 03684 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 03685 // independently on 128-bit lanes. 03686 unsigned NumLanes = VT.getSizeInBits()/128; 03687 unsigned NumLaneElts = NumElts/NumLanes; 03688 03689 for (unsigned l = 0; l != NumLanes; ++l) { 03690 for (unsigned i = l*NumLaneElts, j = l*NumLaneElts; 03691 i != (l+1)*NumLaneElts; 03692 i += 2, ++j) { 03693 int BitI = Mask[i]; 03694 int BitI1 = Mask[i+1]; 03695 if (!isUndefOrEqual(BitI, j)) 03696 return false; 03697 if (V2IsSplat) { 03698 if (!isUndefOrEqual(BitI1, NumElts)) 03699 return false; 03700 } else { 03701 if (!isUndefOrEqual(BitI1, j + NumElts)) 03702 return false; 03703 } 03704 } 03705 } 03706 03707 return true; 03708 } 03709 03710 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 03711 /// specifies a shuffle of elements that is suitable for input to UNPCKH. 03712 static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT, 03713 bool HasInt256, bool V2IsSplat = false) { 03714 unsigned NumElts = VT.getVectorNumElements(); 03715 03716 assert((VT.is128BitVector() || VT.is256BitVector()) && 03717 "Unsupported vector type for unpckh"); 03718 03719 if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && 03720 (!HasInt256 || (NumElts != 16 && NumElts != 32))) 03721 return false; 03722 03723 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 03724 // independently on 128-bit lanes. 03725 unsigned NumLanes = VT.getSizeInBits()/128; 03726 unsigned NumLaneElts = NumElts/NumLanes; 03727 03728 for (unsigned l = 0; l != NumLanes; ++l) { 03729 for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2; 03730 i != (l+1)*NumLaneElts; i += 2, ++j) { 03731 int BitI = Mask[i]; 03732 int BitI1 = Mask[i+1]; 03733 if (!isUndefOrEqual(BitI, j)) 03734 return false; 03735 if (V2IsSplat) { 03736 if (isUndefOrEqual(BitI1, NumElts)) 03737 return false; 03738 } else { 03739 if (!isUndefOrEqual(BitI1, j+NumElts)) 03740 return false; 03741 } 03742 } 03743 } 03744 return true; 03745 } 03746 03747 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 03748 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 03749 /// <0, 0, 1, 1> 03750 static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { 03751 unsigned NumElts = VT.getVectorNumElements(); 03752 bool Is256BitVec = VT.is256BitVector(); 03753 03754 assert((VT.is128BitVector() || VT.is256BitVector()) && 03755 "Unsupported vector type for unpckh"); 03756 03757 if (Is256BitVec && NumElts != 4 && NumElts != 8 && 03758 (!HasInt256 || (NumElts != 16 && NumElts != 32))) 03759 return false; 03760 03761 // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern 03762 // FIXME: Need a better way to get rid of this, there's no latency difference 03763 // between UNPCKLPD and MOVDDUP, the later should always be checked first and 03764 // the former later. We should also remove the "_undef" special mask. 03765 if (NumElts == 4 && Is256BitVec) 03766 return false; 03767 03768 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 03769 // independently on 128-bit lanes. 03770 unsigned NumLanes = VT.getSizeInBits()/128; 03771 unsigned NumLaneElts = NumElts/NumLanes; 03772 03773 for (unsigned l = 0; l != NumLanes; ++l) { 03774 for (unsigned i = l*NumLaneElts, j = l*NumLaneElts; 03775 i != (l+1)*NumLaneElts; 03776 i += 2, ++j) { 03777 int BitI = Mask[i]; 03778 int BitI1 = Mask[i+1]; 03779 03780 if (!isUndefOrEqual(BitI, j)) 03781 return false; 03782 if (!isUndefOrEqual(BitI1, j)) 03783 return false; 03784 } 03785 } 03786 03787 return true; 03788 } 03789 03790 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 03791 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 03792 /// <2, 2, 3, 3> 03793 static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { 03794 unsigned NumElts = VT.getVectorNumElements(); 03795 03796 assert((VT.is128BitVector() || VT.is256BitVector()) && 03797 "Unsupported vector type for unpckh"); 03798 03799 if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && 03800 (!HasInt256 || (NumElts != 16 && NumElts != 32))) 03801 return false; 03802 03803 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 03804 // independently on 128-bit lanes. 03805 unsigned NumLanes = VT.getSizeInBits()/128; 03806 unsigned NumLaneElts = NumElts/NumLanes; 03807 03808 for (unsigned l = 0; l != NumLanes; ++l) { 03809 for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2; 03810 i != (l+1)*NumLaneElts; i += 2, ++j) { 03811 int BitI = Mask[i]; 03812 int BitI1 = Mask[i+1]; 03813 if (!isUndefOrEqual(BitI, j)) 03814 return false; 03815 if (!isUndefOrEqual(BitI1, j)) 03816 return false; 03817 } 03818 } 03819 return true; 03820 } 03821 03822 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 03823 /// specifies a shuffle of elements that is suitable for input to MOVSS, 03824 /// MOVSD, and MOVD, i.e. setting the lowest element. 03825 static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { 03826 if (VT.getVectorElementType().getSizeInBits() < 32) 03827 return false; 03828 if (!VT.is128BitVector()) 03829 return false; 03830 03831 unsigned NumElts = VT.getVectorNumElements(); 03832 03833 if (!isUndefOrEqual(Mask[0], NumElts)) 03834 return false; 03835 03836 for (unsigned i = 1; i != NumElts; ++i) 03837 if (!isUndefOrEqual(Mask[i], i)) 03838 return false; 03839 03840 return true; 03841 } 03842 03843 /// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered 03844 /// as permutations between 128-bit chunks or halves. As an example: this 03845 /// shuffle bellow: 03846 /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> 03847 /// The first half comes from the second half of V1 and the second half from the 03848 /// the second half of V2. 03849 static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) { 03850 if (!HasFp256 || !VT.is256BitVector()) 03851 return false; 03852 03853 // The shuffle result is divided into half A and half B. In total the two 03854 // sources have 4 halves, namely: C, D, E, F. The final values of A and 03855 // B must come from C, D, E or F. 03856 unsigned HalfSize = VT.getVectorNumElements()/2; 03857 bool MatchA = false, MatchB = false; 03858 03859 // Check if A comes from one of C, D, E, F. 03860 for (unsigned Half = 0; Half != 4; ++Half) { 03861 if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { 03862 MatchA = true; 03863 break; 03864 } 03865 } 03866 03867 // Check if B comes from one of C, D, E, F. 03868 for (unsigned Half = 0; Half != 4; ++Half) { 03869 if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { 03870 MatchB = true; 03871 break; 03872 } 03873 } 03874 03875 return MatchA && MatchB; 03876 } 03877 03878 /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle 03879 /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. 03880 static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { 03881 MVT VT = SVOp->getValueType(0).getSimpleVT(); 03882 03883 unsigned HalfSize = VT.getVectorNumElements()/2; 03884 03885 unsigned FstHalf = 0, SndHalf = 0; 03886 for (unsigned i = 0; i < HalfSize; ++i) { 03887 if (SVOp->getMaskElt(i) > 0) { 03888 FstHalf = SVOp->getMaskElt(i)/HalfSize; 03889 break; 03890 } 03891 } 03892 for (unsigned i = HalfSize; i < HalfSize*2; ++i) { 03893 if (SVOp->getMaskElt(i) > 0) { 03894 SndHalf = SVOp->getMaskElt(i)/HalfSize; 03895 break; 03896 } 03897 } 03898 03899 return (FstHalf | (SndHalf << 4)); 03900 } 03901 03902 /// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand 03903 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. 03904 /// Note that VPERMIL mask matching is different depending whether theunderlying 03905 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point 03906 /// to the same elements of the low, but to the higher half of the source. 03907 /// In VPERMILPD the two lanes could be shuffled independently of each other 03908 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY. 03909 static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) { 03910 if (!HasFp256) 03911 return false; 03912 03913 unsigned NumElts = VT.getVectorNumElements(); 03914 // Only match 256-bit with 32/64-bit types 03915 if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8)) 03916 return false; 03917 03918 unsigned NumLanes = VT.getSizeInBits()/128; 03919 unsigned LaneSize = NumElts/NumLanes; 03920 for (unsigned l = 0; l != NumElts; l += LaneSize) { 03921 for (unsigned i = 0; i != LaneSize; ++i) { 03922 if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) 03923 return false; 03924 if (NumElts != 8 || l == 0) 03925 continue; 03926 // VPERMILPS handling 03927 if (Mask[i] < 0) 03928 continue; 03929 if (!isUndefOrEqual(Mask[i+l], Mask[i]+l)) 03930 return false; 03931 } 03932 } 03933 03934 return true; 03935 } 03936 03937 /// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse 03938 /// of what x86 movss want. X86 movs requires the lowest element to be lowest 03939 /// element of vector 2 and the other elements to come from vector 1 in order. 03940 static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, 03941 bool V2IsSplat = false, bool V2IsUndef = false) { 03942 if (!VT.is128BitVector()) 03943 return false; 03944 03945 unsigned NumOps = VT.getVectorNumElements(); 03946 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 03947 return false; 03948 03949 if (!isUndefOrEqual(Mask[0], 0)) 03950 return false; 03951 03952 for (unsigned i = 1; i != NumOps; ++i) 03953 if (!(isUndefOrEqual(Mask[i], i+NumOps) || 03954 (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || 03955 (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) 03956 return false; 03957 03958 return true; 03959 } 03960 03961 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 03962 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 03963 /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> 03964 static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT, 03965 const X86Subtarget *Subtarget) { 03966 if (!Subtarget->hasSSE3()) 03967 return false; 03968 03969 unsigned NumElems = VT.getVectorNumElements(); 03970 03971 if ((VT.is128BitVector() && NumElems != 4) || 03972 (VT.is256BitVector() && NumElems != 8)) 03973 return false; 03974 03975 // "i+1" is the value the indexed mask element must have 03976 for (unsigned i = 0; i != NumElems; i += 2) 03977 if (!isUndefOrEqual(Mask[i], i+1) || 03978 !isUndefOrEqual(Mask[i+1], i+1)) 03979 return false; 03980 03981 return true; 03982 } 03983 03984 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 03985 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 03986 /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> 03987 static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT, 03988 const X86Subtarget *Subtarget) { 03989 if (!Subtarget->hasSSE3()) 03990 return false; 03991 03992 unsigned NumElems = VT.getVectorNumElements(); 03993 03994 if ((VT.is128BitVector() && NumElems != 4) || 03995 (VT.is256BitVector() && NumElems != 8)) 03996 return false; 03997 03998 // "i" is the value the indexed mask element must have 03999 for (unsigned i = 0; i != NumElems; i += 2) 04000 if (!isUndefOrEqual(Mask[i], i) || 04001 !isUndefOrEqual(Mask[i+1], i)) 04002 return false; 04003 04004 return true; 04005 } 04006 04007 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand 04008 /// specifies a shuffle of elements that is suitable for input to 256-bit 04009 /// version of MOVDDUP. 04010 static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) { 04011 if (!HasFp256 || !VT.is256BitVector()) 04012 return false; 04013 04014 unsigned NumElts = VT.getVectorNumElements(); 04015 if (NumElts != 4) 04016 return false; 04017 04018 for (unsigned i = 0; i != NumElts/2; ++i) 04019 if (!isUndefOrEqual(Mask[i], 0)) 04020 return false; 04021 for (unsigned i = NumElts/2; i != NumElts; ++i) 04022 if (!isUndefOrEqual(Mask[i], NumElts/2)) 04023 return false; 04024 return true; 04025 } 04026 04027 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 04028 /// specifies a shuffle of elements that is suitable for input to 128-bit 04029 /// version of MOVDDUP. 04030 static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) { 04031 if (!VT.is128BitVector()) 04032 return false; 04033 04034 unsigned e = VT.getVectorNumElements() / 2; 04035 for (unsigned i = 0; i != e; ++i) 04036 if (!isUndefOrEqual(Mask[i], i)) 04037 return false; 04038 for (unsigned i = 0; i != e; ++i) 04039 if (!isUndefOrEqual(Mask[e+i], i)) 04040 return false; 04041 return true; 04042 } 04043 04044 /// isVEXTRACTF128Index - Return true if the specified 04045 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 04046 /// suitable for input to VEXTRACTF128. 04047 bool X86::isVEXTRACTF128Index(SDNode *N) { 04048 if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) 04049 return false; 04050 04051 // The index should be aligned on a 128-bit boundary. 04052 uint64_t Index = 04053 cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 04054 04055 MVT VT = N->getValueType(0).getSimpleVT(); 04056 unsigned ElSize = VT.getVectorElementType().getSizeInBits(); 04057 bool Result = (Index * ElSize) % 128 == 0; 04058 04059 return Result; 04060 } 04061 04062 /// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR 04063 /// operand specifies a subvector insert that is suitable for input to 04064 /// VINSERTF128. 04065 bool X86::isVINSERTF128Index(SDNode *N) { 04066 if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) 04067 return false; 04068 04069 // The index should be aligned on a 128-bit boundary. 04070 uint64_t Index = 04071 cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 04072 04073 MVT VT = N->getValueType(0).getSimpleVT(); 04074 unsigned ElSize = VT.getVectorElementType().getSizeInBits(); 04075 bool Result = (Index * ElSize) % 128 == 0; 04076 04077 return Result; 04078 } 04079 04080 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 04081 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. 04082 /// Handles 128-bit and 256-bit. 04083 static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { 04084 MVT VT = N->getValueType(0).getSimpleVT(); 04085 04086 assert((VT.is128BitVector() || VT.is256BitVector()) && 04087 "Unsupported vector type for PSHUF/SHUFP"); 04088 04089 // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate 04090 // independently on 128-bit lanes. 04091 unsigned NumElts = VT.getVectorNumElements(); 04092 unsigned NumLanes = VT.getSizeInBits()/128; 04093 unsigned NumLaneElts = NumElts/NumLanes; 04094 04095 assert((NumLaneElts == 2 || NumLaneElts == 4) && 04096 "Only supports 2 or 4 elements per lane"); 04097 04098 unsigned Shift = (NumLaneElts == 4) ? 1 : 0; 04099 unsigned Mask = 0; 04100 for (unsigned i = 0; i != NumElts; ++i) { 04101 int Elt = N->getMaskElt(i); 04102 if (Elt < 0) continue; 04103 Elt &= NumLaneElts - 1; 04104 unsigned ShAmt = (i << Shift) % 8; 04105 Mask |= Elt << ShAmt; 04106 } 04107 04108 return Mask; 04109 } 04110 04111 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 04112 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. 04113 static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { 04114 MVT VT = N->getValueType(0).getSimpleVT(); 04115 04116 assert((VT == MVT::v8i16 || VT == MVT::v16i16) && 04117 "Unsupported vector type for PSHUFHW"); 04118 04119 unsigned NumElts = VT.getVectorNumElements(); 04120 04121 unsigned Mask = 0; 04122 for (unsigned l = 0; l != NumElts; l += 8) { 04123 // 8 nodes per lane, but we only care about the last 4. 04124 for (unsigned i = 0; i < 4; ++i) { 04125 int Elt = N->getMaskElt(l+i+4); 04126 if (Elt < 0) continue; 04127 Elt &= 0x3; // only 2-bits. 04128 Mask |= Elt << (i * 2); 04129 } 04130 } 04131 04132 return Mask; 04133 } 04134 04135 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 04136 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. 04137 static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { 04138 MVT VT = N->getValueType(0).getSimpleVT(); 04139 04140 assert((VT == MVT::v8i16 || VT == MVT::v16i16) && 04141 "Unsupported vector type for PSHUFHW"); 04142 04143 unsigned NumElts = VT.getVectorNumElements(); 04144 04145 unsigned Mask = 0; 04146 for (unsigned l = 0; l != NumElts; l += 8) { 04147 // 8 nodes per lane, but we only care about the first 4. 04148 for (unsigned i = 0; i < 4; ++i) { 04149 int Elt = N->getMaskElt(l+i); 04150 if (Elt < 0) continue; 04151 Elt &= 0x3; // only 2-bits 04152 Mask |= Elt << (i * 2); 04153 } 04154 } 04155 04156 return Mask; 04157 } 04158 04159 /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle 04160 /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. 04161 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { 04162 MVT VT = SVOp->getValueType(0).getSimpleVT(); 04163 unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; 04164 04165 unsigned NumElts = VT.getVectorNumElements(); 04166 unsigned NumLanes = VT.getSizeInBits()/128; 04167 unsigned NumLaneElts = NumElts/NumLanes; 04168 04169 int Val = 0; 04170 unsigned i; 04171 for (i = 0; i != NumElts; ++i) { 04172 Val = SVOp->getMaskElt(i); 04173 if (Val >= 0) 04174 break; 04175 } 04176 if (Val >= (int)NumElts) 04177 Val -= NumElts - NumLaneElts; 04178 04179 assert(Val - i > 0 && "PALIGNR imm should be positive"); 04180 return (Val - i) * EltSize; 04181 } 04182 04183 /// getExtractVEXTRACTF128Immediate - Return the appropriate immediate 04184 /// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128 04185 /// instructions. 04186 unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { 04187 if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) 04188 llvm_unreachable("Illegal extract subvector for VEXTRACTF128"); 04189 04190 uint64_t Index = 04191 cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 04192 04193 MVT VecVT = N->getOperand(0).getValueType().getSimpleVT(); 04194 MVT ElVT = VecVT.getVectorElementType(); 04195 04196 unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); 04197 return Index / NumElemsPerChunk; 04198 } 04199 04200 /// getInsertVINSERTF128Immediate - Return the appropriate immediate 04201 /// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128 04202 /// instructions. 04203 unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { 04204 if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) 04205 llvm_unreachable("Illegal insert subvector for VINSERTF128"); 04206 04207 uint64_t Index = 04208 cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 04209 04210 MVT VecVT = N->getValueType(0).getSimpleVT(); 04211 MVT ElVT = VecVT.getVectorElementType(); 04212 04213 unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); 04214 return Index / NumElemsPerChunk; 04215 } 04216 04217 /// getShuffleCLImmediate - Return the appropriate immediate to shuffle 04218 /// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. 04219 /// Handles 256-bit. 04220 static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { 04221 MVT VT = N->getValueType(0).getSimpleVT(); 04222 04223 unsigned NumElts = VT.getVectorNumElements(); 04224 04225 assert((VT.is256BitVector() && NumElts == 4) && 04226 "Unsupported vector type for VPERMQ/VPERMPD"); 04227 04228 unsigned Mask = 0; 04229 for (unsigned i = 0; i != NumElts; ++i) { 04230 int Elt = N->getMaskElt(i); 04231 if (Elt < 0) 04232 continue; 04233 Mask |= Elt << (i*2); 04234 } 04235 04236 return Mask; 04237 } 04238 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 04239 /// constant +0.0. 04240 bool X86::isZeroNode(SDValue Elt) { 04241 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt)) 04242 return CN->isNullValue(); 04243 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt)) 04244 return CFP->getValueAPF().isPosZero(); 04245 return false; 04246 } 04247 04248 /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in 04249 /// their permute mask. 04250 static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, 04251 SelectionDAG &DAG) { 04252 MVT VT = SVOp->getValueType(0).getSimpleVT(); 04253 unsigned NumElems = VT.getVectorNumElements(); 04254 SmallVector<int, 8> MaskVec; 04255 04256 for (unsigned i = 0; i != NumElems; ++i) { 04257 int Idx = SVOp->getMaskElt(i); 04258 if (Idx >= 0) { 04259 if (Idx < (int)NumElems) 04260 Idx += NumElems; 04261 else 04262 Idx -= NumElems; 04263 } 04264 MaskVec.push_back(Idx); 04265 } 04266 return DAG.getVectorShuffle(VT, SDLoc(SVOp), SVOp->getOperand(1), 04267 SVOp->getOperand(0), &MaskVec[0]); 04268 } 04269 04270 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to 04271 /// match movhlps. The lower half elements should come from upper half of 04272 /// V1 (and in order), and the upper half elements should come from the upper 04273 /// half of V2 (and in order). 04274 static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) { 04275 if (!VT.is128BitVector()) 04276 return false; 04277 if (VT.getVectorNumElements() != 4) 04278 return false; 04279 for (unsigned i = 0, e = 2; i != e; ++i) 04280 if (!isUndefOrEqual(Mask[i], i+2)) 04281 return false; 04282 for (unsigned i = 2; i != 4; ++i) 04283 if (!isUndefOrEqual(Mask[i], i+4)) 04284 return false; 04285 return true; 04286 } 04287 04288 /// isScalarLoadToVector - Returns true if the node is a scalar load that 04289 /// is promoted to a vector. It also returns the LoadSDNode by reference if 04290 /// required. 04291 static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 04292 if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) 04293 return false; 04294 N = N->getOperand(0).getNode(); 04295 if (!ISD::isNON_EXTLoad(N)) 04296 return false; 04297 if (LD) 04298 *LD = cast<LoadSDNode>(N); 04299 return true; 04300 } 04301 04302 // Test whether the given value is a vector value which will be legalized 04303 // into a load. 04304 static bool WillBeConstantPoolLoad(SDNode *N) { 04305 if (N->getOpcode() != ISD::BUILD_VECTOR) 04306 return false; 04307 04308 // Check for any non-constant elements. 04309 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 04310 switch (N->getOperand(i).getNode()->getOpcode()) { 04311 case ISD::UNDEF: 04312 case ISD::ConstantFP: 04313 case ISD::Constant: 04314 break; 04315 default: 04316 return false; 04317 } 04318 04319 // Vectors of all-zeros and all-ones are materialized with special 04320 // instructions rather than being loaded. 04321 return !ISD::isBuildVectorAllZeros(N) && 04322 !ISD::isBuildVectorAllOnes(N); 04323 } 04324 04325 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 04326 /// match movlp{s|d}. The lower half elements should come from lower half of 04327 /// V1 (and in order), and the upper half elements should come from the upper 04328 /// half of V2 (and in order). And since V1 will become the source of the 04329 /// MOVLP, it must be either a vector load or a scalar load to vector. 04330 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, 04331 ArrayRef<int> Mask, EVT VT) { 04332 if (!VT.is128BitVector()) 04333 return false; 04334 04335 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 04336 return false; 04337 // Is V2 is a vector load, don't do this transformation. We will try to use 04338 // load folding shufps op. 04339 if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) 04340 return false; 04341 04342 unsigned NumElems = VT.getVectorNumElements(); 04343 04344 if (NumElems != 2 && NumElems != 4) 04345 return false; 04346 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 04347 if (!isUndefOrEqual(Mask[i], i)) 04348 return false; 04349 for (unsigned i = NumElems/2, e = NumElems; i != e; ++i) 04350 if (!isUndefOrEqual(Mask[i], i+NumElems)) 04351 return false; 04352 return true; 04353 } 04354 04355 /// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 04356 /// all the same. 04357 static bool isSplatVector(SDNode *N) { 04358 if (N->getOpcode() != ISD::BUILD_VECTOR) 04359 return false; 04360 04361 SDValue SplatValue = N->getOperand(0); 04362 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 04363 if (N->getOperand(i) != SplatValue) 04364 return false; 04365 return true; 04366 } 04367 04368 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 04369 /// to an zero vector. 04370 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode 04371 static bool isZeroShuffle(ShuffleVectorSDNode *N) { 04372 SDValue V1 = N->getOperand(0); 04373 SDValue V2 = N->getOperand(1); 04374 unsigned NumElems = N->getValueType(0).getVectorNumElements(); 04375 for (unsigned i = 0; i != NumElems; ++i) { 04376 int Idx = N->getMaskElt(i); 04377 if (Idx >= (int)NumElems) { 04378 unsigned Opc = V2.getOpcode(); 04379 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 04380 continue; 04381 if (Opc != ISD::BUILD_VECTOR || 04382 !X86::isZeroNode(V2.getOperand(Idx-NumElems))) 04383 return false; 04384 } else if (Idx >= 0) { 04385 unsigned Opc = V1.getOpcode(); 04386 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 04387 continue; 04388 if (Opc != ISD::BUILD_VECTOR || 04389 !X86::isZeroNode(V1.getOperand(Idx))) 04390 return false; 04391 } 04392 } 04393 return true; 04394 } 04395 04396 /// getZeroVector - Returns a vector of specified type with all zero elements. 04397 /// 04398 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, 04399 SelectionDAG &DAG, SDLoc dl) { 04400 assert(VT.isVector() && "Expected a vector type"); 04401 04402 // Always build SSE zero vectors as <4 x i32> bitcasted 04403 // to their dest type. This ensures they get CSE'd. 04404 SDValue Vec; 04405 if (VT.is128BitVector()) { // SSE 04406 if (Subtarget->hasSSE2()) { // SSE2 04407 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 04408 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); 04409 } else { // SSE1 04410 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 04411 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); 04412 } 04413 } else if (VT.is256BitVector()) { // AVX 04414 if (Subtarget->hasInt256()) { // AVX2 04415 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 04416 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; 04417 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 04418 array_lengthof(Ops)); 04419 } else { 04420 // 256-bit logic and arithmetic instructions in AVX are all 04421 // floating-point, no support for integer ops. Emit fp zeroed vectors. 04422 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 04423 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; 04424 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 04425 array_lengthof(Ops)); 04426 } 04427 } else 04428 llvm_unreachable("Unexpected vector type"); 04429 04430 return DAG.getNode(ISD::BITCAST, dl, VT, Vec); 04431 } 04432 04433 /// getOnesVector - Returns a vector of specified type with all bits set. 04434 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with 04435 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. 04436 /// Then bitcast to their original type, ensuring they get CSE'd. 04437 static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, 04438 SDLoc dl) { 04439 assert(VT.isVector() && "Expected a vector type"); 04440 04441 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 04442 SDValue Vec; 04443 if (VT.is256BitVector()) { 04444 if (HasInt256) { // AVX2 04445 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; 04446 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 04447 array_lengthof(Ops)); 04448 } else { // AVX 04449 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); 04450 Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); 04451 } 04452 } else if (VT.is128BitVector()) { 04453 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); 04454 } else 04455 llvm_unreachable("Unexpected vector type"); 04456 04457 return DAG.getNode(ISD::BITCAST, dl, VT, Vec); 04458 } 04459 04460 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 04461 /// that point to V2 points to its first element. 04462 static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) { 04463 for (unsigned i = 0; i != NumElems; ++i) { 04464 if (Mask[i] > (int)NumElems) { 04465 Mask[i] = NumElems; 04466 } 04467 } 04468 } 04469 04470 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 04471 /// operation of specified width. 04472 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, 04473 SDValue V2) { 04474 unsigned NumElems = VT.getVectorNumElements(); 04475 SmallVector<int, 8> Mask; 04476 Mask.push_back(NumElems); 04477 for (unsigned i = 1; i != NumElems; ++i) 04478 Mask.push_back(i); 04479 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 04480 } 04481 04482 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation. 04483 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, 04484 SDValue V2) { 04485 unsigned NumElems = VT.getVectorNumElements(); 04486 SmallVector<int, 8> Mask; 04487 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 04488 Mask.push_back(i); 04489 Mask.push_back(i + NumElems); 04490 } 04491 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 04492 } 04493 04494 /// getUnpackh - Returns a vector_shuffle node for an unpackh operation. 04495 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, 04496 SDValue V2) { 04497 unsigned NumElems = VT.getVectorNumElements(); 04498 SmallVector<int, 8> Mask; 04499 for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) { 04500 Mask.push_back(i + Half); 04501 Mask.push_back(i + NumElems + Half); 04502 } 04503 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 04504 } 04505 04506 // PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by 04507 // a generic shuffle instruction because the target has no such instructions. 04508 // Generate shuffles which repeat i16 and i8 several times until they can be 04509 // represented by v4f32 and then be manipulated by target suported shuffles. 04510 static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { 04511 EVT VT = V.getValueType(); 04512 int NumElems = VT.getVectorNumElements(); 04513 SDLoc dl(V); 04514 04515 while (NumElems > 4) { 04516 if (EltNo < NumElems/2) { 04517 V = getUnpackl(DAG, dl, VT, V, V); 04518 } else { 04519 V = getUnpackh(DAG, dl, VT, V, V); 04520 EltNo -= NumElems/2; 04521 } 04522 NumElems >>= 1; 04523 } 04524 return V; 04525 } 04526 04527 /// getLegalSplat - Generate a legal splat with supported x86 shuffles 04528 static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { 04529 EVT VT = V.getValueType(); 04530 SDLoc dl(V); 04531 04532 if (VT.is128BitVector()) { 04533 V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); 04534 int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; 04535 V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), 04536 &SplatMask[0]); 04537 } else if (VT.is256BitVector()) { 04538 // To use VPERMILPS to splat scalars, the second half of indicies must 04539 // refer to the higher part, which is a duplication of the lower one, 04540 // because VPERMILPS can only handle in-lane permutations. 04541 int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo, 04542 EltNo+4, EltNo+4, EltNo+4, EltNo+4 }; 04543 04544 V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V); 04545 V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32), 04546 &SplatMask[0]); 04547 } else 04548 llvm_unreachable("Vector size not supported"); 04549 04550 return DAG.getNode(ISD::BITCAST, dl, VT, V); 04551 } 04552 04553 /// PromoteSplat - Splat is promoted to target supported vector shuffles. 04554 static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { 04555 EVT SrcVT = SV->getValueType(0); 04556 SDValue V1 = SV->getOperand(0); 04557 SDLoc dl(SV); 04558 04559 int EltNo = SV->getSplatIndex(); 04560 int NumElems = SrcVT.getVectorNumElements(); 04561 bool Is256BitVec = SrcVT.is256BitVector(); 04562 04563 assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) && 04564 "Unknown how to promote splat for type"); 04565 04566 // Extract the 128-bit part containing the splat element and update 04567 // the splat element index when it refers to the higher register. 04568 if (Is256BitVec) { 04569 V1 = Extract128BitVector(V1, EltNo, DAG, dl); 04570 if (EltNo >= NumElems/2) 04571 EltNo -= NumElems/2; 04572 } 04573 04574 // All i16 and i8 vector types can't be used directly by a generic shuffle 04575 // instruction because the target has no such instruction. Generate shuffles 04576 // which repeat i16 and i8 several times until they fit in i32, and then can 04577 // be manipulated by target suported shuffles. 04578 EVT EltVT = SrcVT.getVectorElementType(); 04579 if (EltVT == MVT::i8 || EltVT == MVT::i16) 04580 V1 = PromoteSplati8i16(V1, DAG, EltNo); 04581 04582 // Recreate the 256-bit vector and place the same 128-bit vector 04583 // into the low and high part. This is necessary because we want 04584 // to use VPERM* to shuffle the vectors 04585 if (Is256BitVec) { 04586 V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1); 04587 } 04588 04589 return getLegalSplat(DAG, V1, EltNo); 04590 } 04591 04592 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 04593 /// vector of zero or undef vector. This produces a shuffle where the low 04594 /// element of V2 is swizzled into the zero/undef vector, landing at element 04595 /// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 04596 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 04597 bool IsZero, 04598 const X86Subtarget *Subtarget, 04599 SelectionDAG &DAG) { 04600 EVT VT = V2.getValueType(); 04601 SDValue V1 = IsZero 04602 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); 04603 unsigned NumElems = VT.getVectorNumElements(); 04604 SmallVector<int, 16> MaskVec; 04605 for (unsigned i = 0; i != NumElems; ++i) 04606 // If this is the insertion idx, put the low elt of V2 here. 04607 MaskVec.push_back(i == Idx ? NumElems : i); 04608 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]); 04609 } 04610 04611 /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the 04612 /// target specific opcode. Returns true if the Mask could be calculated. 04613 /// Sets IsUnary to true if only uses one source. 04614 static bool getTargetShuffleMask(SDNode *N, MVT VT, 04615 SmallVectorImpl<int> &Mask, bool &IsUnary) { 04616 unsigned NumElems = VT.getVectorNumElements(); 04617 SDValue ImmN; 04618 04619 IsUnary = false; 04620 switch(N->getOpcode()) { 04621 case X86ISD::SHUFP: 04622 ImmN = N->getOperand(N->getNumOperands()-1); 04623 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04624 break; 04625 case X86ISD::UNPCKH: 04626 DecodeUNPCKHMask(VT, Mask); 04627 break; 04628 case X86ISD::UNPCKL: 04629 DecodeUNPCKLMask(VT, Mask); 04630 break; 04631 case X86ISD::MOVHLPS: 04632 DecodeMOVHLPSMask(NumElems, Mask); 04633 break; 04634 case X86ISD::MOVLHPS: 04635 DecodeMOVLHPSMask(NumElems, Mask); 04636 break; 04637 case X86ISD::PALIGNR: 04638 ImmN = N->getOperand(N->getNumOperands()-1); 04639 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04640 break; 04641 case X86ISD::PSHUFD: 04642 case X86ISD::VPERMILP: 04643 ImmN = N->getOperand(N->getNumOperands()-1); 04644 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04645 IsUnary = true; 04646 break; 04647 case X86ISD::PSHUFHW: 04648 ImmN = N->getOperand(N->getNumOperands()-1); 04649 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04650 IsUnary = true; 04651 break; 04652 case X86ISD::PSHUFLW: 04653 ImmN = N->getOperand(N->getNumOperands()-1); 04654 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04655 IsUnary = true; 04656 break; 04657 case X86ISD::VPERMI: 04658 ImmN = N->getOperand(N->getNumOperands()-1); 04659 DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04660 IsUnary = true; 04661 break; 04662 case X86ISD::MOVSS: 04663 case X86ISD::MOVSD: { 04664 // The index 0 always comes from the first element of the second source, 04665 // this is why MOVSS and MOVSD are used in the first place. The other 04666 // elements come from the other positions of the first source vector 04667 Mask.push_back(NumElems); 04668 for (unsigned i = 1; i != NumElems; ++i) { 04669 Mask.push_back(i); 04670 } 04671 break; 04672 } 04673 case X86ISD::VPERM2X128: 04674 ImmN = N->getOperand(N->getNumOperands()-1); 04675 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); 04676 if (Mask.empty()) return false; 04677 break; 04678 case X86ISD::MOVDDUP: 04679 case X86ISD::MOVLHPD: 04680 case X86ISD::MOVLPD: 04681 case X86ISD::MOVLPS: 04682 case X86ISD::MOVSHDUP: 04683 case X86ISD::MOVSLDUP: 04684 // Not yet implemented 04685 return false; 04686 default: llvm_unreachable("unknown target shuffle node"); 04687 } 04688 04689 return true; 04690 } 04691 04692 /// getShuffleScalarElt - Returns the scalar element that will make up the ith 04693 /// element of the result of the vector shuffle. 04694 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, 04695 unsigned Depth) { 04696 if (Depth == 6) 04697 return SDValue(); // Limit search depth. 04698 04699 SDValue V = SDValue(N, 0); 04700 EVT VT = V.getValueType(); 04701 unsigned Opcode = V.getOpcode(); 04702 04703 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. 04704 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) { 04705 int Elt = SV->getMaskElt(Index); 04706 04707 if (Elt < 0) 04708 return DAG.getUNDEF(VT.getVectorElementType()); 04709 04710 unsigned NumElems = VT.getVectorNumElements(); 04711 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) 04712 : SV->getOperand(1); 04713 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); 04714 } 04715 04716 // Recurse into target specific vector shuffles to find scalars. 04717 if (isTargetShuffle(Opcode)) { 04718 MVT ShufVT = V.getValueType().getSimpleVT(); 04719 unsigned NumElems = ShufVT.getVectorNumElements(); 04720 SmallVector<int, 16> ShuffleMask; 04721 bool IsUnary; 04722 04723 if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) 04724 return SDValue(); 04725 04726 int Elt = ShuffleMask[Index]; 04727 if (Elt < 0) 04728 return DAG.getUNDEF(ShufVT.getVectorElementType()); 04729 04730 SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0) 04731 : N->getOperand(1); 04732 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, 04733 Depth+1); 04734 } 04735 04736 // Actual nodes that may contain scalar elements 04737 if (Opcode == ISD::BITCAST) { 04738 V = V.getOperand(0); 04739 EVT SrcVT = V.getValueType(); 04740 unsigned NumElems = VT.getVectorNumElements(); 04741 04742 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems) 04743 return SDValue(); 04744 } 04745 04746 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) 04747 return (Index == 0) ? V.getOperand(0) 04748 : DAG.getUNDEF(VT.getVectorElementType()); 04749 04750 if (V.getOpcode() == ISD::BUILD_VECTOR) 04751 return V.getOperand(Index); 04752 04753 return SDValue(); 04754 } 04755 04756 /// getNumOfConsecutiveZeros - Return the number of elements of a vector 04757 /// shuffle operation which come from a consecutively from a zero. The 04758 /// search can start in two different directions, from left or right. 04759 /// We count undefs as zeros until PreferredNum is reached. 04760 static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, 04761 unsigned NumElems, bool ZerosFromLeft, 04762 SelectionDAG &DAG, 04763 unsigned PreferredNum = -1U) { 04764 unsigned NumZeros = 0; 04765 for (unsigned i = 0; i != NumElems; ++i) { 04766 unsigned Index = ZerosFromLeft ? i : NumElems - i - 1; 04767 SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); 04768 if (!Elt.getNode()) 04769 break; 04770 04771 if (X86::isZeroNode(Elt)) 04772 ++NumZeros; 04773 else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum. 04774 NumZeros = std::min(NumZeros + 1, PreferredNum); 04775 else 04776 break; 04777 } 04778 04779 return NumZeros; 04780 } 04781 04782 /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) 04783 /// correspond consecutively to elements from one of the vector operands, 04784 /// starting from its index OpIdx. Also tell OpNum which source vector operand. 04785 static 04786 bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, 04787 unsigned MaskI, unsigned MaskE, unsigned OpIdx, 04788 unsigned NumElems, unsigned &OpNum) { 04789 bool SeenV1 = false; 04790 bool SeenV2 = false; 04791 04792 for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) { 04793 int Idx = SVOp->getMaskElt(i); 04794 // Ignore undef indicies 04795 if (Idx < 0) 04796 continue; 04797 04798 if (Idx < (int)NumElems) 04799 SeenV1 = true; 04800 else 04801 SeenV2 = true; 04802 04803 // Only accept consecutive elements from the same vector 04804 if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2)) 04805 return false; 04806 } 04807 04808 OpNum = SeenV1 ? 0 : 1; 04809 return true; 04810 } 04811 04812 /// isVectorShiftRight - Returns true if the shuffle can be implemented as a 04813 /// logical left shift of a vector. 04814 static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, 04815 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 04816 unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); 04817 unsigned NumZeros = getNumOfConsecutiveZeros( 04818 SVOp, NumElems, false /* check zeros from right */, DAG, 04819 SVOp->getMaskElt(0)); 04820 unsigned OpSrc; 04821 04822 if (!NumZeros) 04823 return false; 04824 04825 // Considering the elements in the mask that are not consecutive zeros, 04826 // check if they consecutively come from only one of the source vectors. 04827 // 04828 // V1 = {X, A, B, C} 0 04829 // \ \ \ / 04830 // vector_shuffle V1, V2 <1, 2, 3, X> 04831 // 04832 if (!isShuffleMaskConsecutive(SVOp, 04833 0, // Mask Start Index 04834 NumElems-NumZeros, // Mask End Index(exclusive) 04835 NumZeros, // Where to start looking in the src vector 04836 NumElems, // Number of elements in vector 04837 OpSrc)) // Which source operand ? 04838 return false; 04839 04840 isLeft = false; 04841 ShAmt = NumZeros; 04842 ShVal = SVOp->getOperand(OpSrc); 04843 return true; 04844 } 04845 04846 /// isVectorShiftLeft - Returns true if the shuffle can be implemented as a 04847 /// logical left shift of a vector. 04848 static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, 04849 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 04850 unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); 04851 unsigned NumZeros = getNumOfConsecutiveZeros( 04852 SVOp, NumElems, true /* check zeros from left */, DAG, 04853 NumElems - SVOp->getMaskElt(NumElems - 1) - 1); 04854 unsigned OpSrc; 04855 04856 if (!NumZeros) 04857 return false; 04858 04859 // Considering the elements in the mask that are not consecutive zeros, 04860 // check if they consecutively come from only one of the source vectors. 04861 // 04862 // 0 { A, B, X, X } = V2 04863 // / \ / / 04864 // vector_shuffle V1, V2 <X, X, 4, 5> 04865 // 04866 if (!isShuffleMaskConsecutive(SVOp, 04867 NumZeros, // Mask Start Index 04868 NumElems, // Mask End Index(exclusive) 04869 0, // Where to start looking in the src vector 04870 NumElems, // Number of elements in vector 04871 OpSrc)) // Which source operand ? 04872 return false; 04873 04874 isLeft = true; 04875 ShAmt = NumZeros; 04876 ShVal = SVOp->getOperand(OpSrc); 04877 return true; 04878 } 04879 04880 /// isVectorShift - Returns true if the shuffle can be implemented as a 04881 /// logical left or right shift of a vector. 04882 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, 04883 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 04884 // Although the logic below support any bitwidth size, there are no 04885 // shift instructions which handle more than 128-bit vectors. 04886 if (!SVOp->getValueType(0).is128BitVector()) 04887 return false; 04888 04889 if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || 04890 isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt)) 04891 return true; 04892 04893 return false; 04894 } 04895 04896 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 04897 /// 04898 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 04899 unsigned NumNonZero, unsigned NumZero, 04900 SelectionDAG &DAG, 04901 const X86Subtarget* Subtarget, 04902 const TargetLowering &TLI) { 04903 if (NumNonZero > 8) 04904 return SDValue(); 04905 04906 SDLoc dl(Op); 04907 SDValue V(0, 0); 04908 bool First = true; 04909 for (unsigned i = 0; i < 16; ++i) { 04910 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 04911 if (ThisIsNonZero && First) { 04912 if (NumZero) 04913 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); 04914 else 04915 V = DAG.getUNDEF(MVT::v8i16); 04916 First = false; 04917 } 04918 04919 if ((i & 1) != 0) { 04920 SDValue ThisElt(0, 0), LastElt(0, 0); 04921 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 04922 if (LastIsNonZero) { 04923 LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, 04924 MVT::i16, Op.getOperand(i-1)); 04925 } 04926 if (ThisIsNonZero) { 04927 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); 04928 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, 04929 ThisElt, DAG.getConstant(8, MVT::i8)); 04930 if (LastIsNonZero) 04931 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); 04932 } else 04933 ThisElt = LastElt; 04934 04935 if (ThisElt.getNode()) 04936 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, 04937 DAG.getIntPtrConstant(i/2)); 04938 } 04939 } 04940 04941 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); 04942 } 04943 04944 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 04945 /// 04946 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 04947 unsigned NumNonZero, unsigned NumZero, 04948 SelectionDAG &DAG, 04949 const X86Subtarget* Subtarget, 04950 const TargetLowering &TLI) { 04951 if (NumNonZero > 4) 04952 return SDValue(); 04953 04954 SDLoc dl(Op); 04955 SDValue V(0, 0); 04956 bool First = true; 04957 for (unsigned i = 0; i < 8; ++i) { 04958 bool isNonZero = (NonZeros & (1 << i)) != 0; 04959 if (isNonZero) { 04960 if (First) { 04961 if (NumZero) 04962 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); 04963 else 04964 V = DAG.getUNDEF(MVT::v8i16); 04965 First = false; 04966 } 04967 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, 04968 MVT::v8i16, V, Op.getOperand(i), 04969 DAG.getIntPtrConstant(i)); 04970 } 04971 } 04972 04973 return V; 04974 } 04975 04976 /// getVShift - Return a vector logical shift node. 04977 /// 04978 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, 04979 unsigned NumBits, SelectionDAG &DAG, 04980 const TargetLowering &TLI, SDLoc dl) { 04981 assert(VT.is128BitVector() && "Unknown type for VShift"); 04982 EVT ShVT = MVT::v2i64; 04983 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; 04984 SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); 04985 return DAG.getNode(ISD::BITCAST, dl, VT, 04986 DAG.getNode(Opc, dl, ShVT, SrcOp, 04987 DAG.getConstant(NumBits, 04988 TLI.getScalarShiftAmountTy(SrcOp.getValueType())))); 04989 } 04990 04991 SDValue 04992 X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl, 04993 SelectionDAG &DAG) const { 04994 04995 // Check if the scalar load can be widened into a vector load. And if 04996 // the address is "base + cst" see if the cst can be "absorbed" into 04997 // the shuffle mask. 04998 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { 04999 SDValue Ptr = LD->getBasePtr(); 05000 if (!ISD::isNormalLoad(LD) || LD->isVolatile()) 05001 return SDValue(); 05002 EVT PVT = LD->getValueType(0); 05003 if (PVT != MVT::i32 && PVT != MVT::f32) 05004 return SDValue(); 05005 05006 int FI = -1; 05007 int64_t Offset = 0; 05008 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { 05009 FI = FINode->getIndex(); 05010 Offset = 0; 05011 } else if (DAG.isBaseWithConstantOffset(Ptr) && 05012 isa<FrameIndexSDNode>(Ptr.getOperand(0))) { 05013 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 05014 Offset = Ptr.getConstantOperandVal(1); 05015 Ptr = Ptr.getOperand(0); 05016 } else { 05017 return SDValue(); 05018 } 05019 05020 // FIXME: 256-bit vector instructions don't require a strict alignment, 05021 // improve this code to support it better. 05022 unsigned RequiredAlign = VT.getSizeInBits()/8; 05023 SDValue Chain = LD->getChain(); 05024 // Make sure the stack object alignment is at least 16 or 32. 05025 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 05026 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { 05027 if (MFI->isFixedObjectIndex(FI)) { 05028 // Can't change the alignment. FIXME: It's possible to compute 05029 // the exact stack offset and reference FI + adjust offset instead. 05030 // If someone *really* cares about this. That's the way to implement it. 05031 return SDValue(); 05032 } else { 05033 MFI->setObjectAlignment(FI, RequiredAlign); 05034 } 05035 } 05036 05037 // (Offset % 16 or 32) must be multiple of 4. Then address is then 05038 // Ptr + (Offset & ~15). 05039 if (Offset < 0) 05040 return SDValue(); 05041 if ((Offset % RequiredAlign) & 3) 05042 return SDValue(); 05043 int64_t StartOffset = Offset & ~(RequiredAlign-1); 05044 if (StartOffset) 05045 Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(), 05046 Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); 05047 05048 int EltNo = (Offset - StartOffset) >> 2; 05049 unsigned NumElems = VT.getVectorNumElements(); 05050 05051 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); 05052 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, 05053 LD->getPointerInfo().getWithOffset(StartOffset), 05054 false, false, false, 0); 05055 05056 SmallVector<int, 8> Mask; 05057 for (unsigned i = 0; i != NumElems; ++i) 05058 Mask.push_back(EltNo); 05059 05060 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); 05061 } 05062 05063 return SDValue(); 05064 } 05065 05066 /// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a 05067 /// vector of type 'VT', see if the elements can be replaced by a single large 05068 /// load which has the same value as a build_vector whose operands are 'elts'. 05069 /// 05070 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a 05071 /// 05072 /// FIXME: we'd also like to handle the case where the last elements are zero 05073 /// rather than undef via VZEXT_LOAD, but we do not detect that case today. 05074 /// There's even a handy isZeroNode for that purpose. 05075 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, 05076 SDLoc &DL, SelectionDAG &DAG) { 05077 EVT EltVT = VT.getVectorElementType(); 05078 unsigned NumElems = Elts.size(); 05079 05080 LoadSDNode *LDBase = NULL; 05081 unsigned LastLoadedElt = -1U; 05082 05083 // For each element in the initializer, see if we've found a load or an undef. 05084 // If we don't find an initial load element, or later load elements are 05085 // non-consecutive, bail out. 05086 for (unsigned i = 0; i < NumElems; ++i) { 05087 SDValue Elt = Elts[i]; 05088 05089 if (!Elt.getNode() || 05090 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 05091 return SDValue(); 05092 if (!LDBase) { 05093 if (Elt.getNode()->getOpcode() == ISD::UNDEF) 05094 return SDValue(); 05095 LDBase = cast<LoadSDNode>(Elt.getNode()); 05096 LastLoadedElt = i; 05097 continue; 05098 } 05099 if (Elt.getOpcode() == ISD::UNDEF) 05100 continue; 05101 05102 LoadSDNode *LD = cast<LoadSDNode>(Elt); 05103 if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) 05104 return SDValue(); 05105 LastLoadedElt = i; 05106 } 05107 05108 // If we have found an entire vector of loads and undefs, then return a large 05109 // load of the entire vector width starting at the base pointer. If we found 05110 // consecutive loads for the low half, generate a vzext_load node. 05111 if (LastLoadedElt == NumElems - 1) { 05112 SDValue NewLd = SDValue(); 05113 if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) 05114 NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), 05115 LDBase->getPointerInfo(), 05116 LDBase->isVolatile(), LDBase->isNonTemporal(), 05117 LDBase->isInvariant(), 0); 05118 NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), 05119 LDBase->getPointerInfo(), 05120 LDBase->isVolatile(), LDBase->isNonTemporal(), 05121 LDBase->isInvariant(), LDBase->getAlignment()); 05122 05123 if (LDBase->hasAnyUseOfValue(1)) { 05124 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 05125 SDValue(LDBase, 1), 05126 SDValue(NewLd.getNode(), 1)); 05127 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); 05128 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), 05129 SDValue(NewLd.getNode(), 1)); 05130 } 05131 05132 return NewLd; 05133 } 05134 if (NumElems == 4 && LastLoadedElt == 1 && 05135 DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { 05136 SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); 05137 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; 05138 SDValue ResNode = 05139 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 05140 array_lengthof(Ops), MVT::i64, 05141 LDBase->getPointerInfo(), 05142 LDBase->getAlignment(), 05143 false/*isVolatile*/, true/*ReadMem*/, 05144 false/*WriteMem*/); 05145 05146 // Make sure the newly-created LOAD is in the same position as LDBase in 05147 // terms of dependency. We create a TokenFactor for LDBase and ResNode, and 05148 // update uses of LDBase's output chain to use the TokenFactor. 05149 if (LDBase->hasAnyUseOfValue(1)) { 05150 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 05151 SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); 05152 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); 05153 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), 05154 SDValue(ResNode.getNode(), 1)); 05155 } 05156 05157 return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); 05158 } 05159 return SDValue(); 05160 } 05161 05162 /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction 05163 /// to generate a splat value for the following cases: 05164 /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant. 05165 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from 05166 /// a scalar load, or a constant. 05167 /// The VBROADCAST node is returned when a pattern is found, 05168 /// or SDValue() otherwise. 05169 SDValue 05170 X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { 05171 if (!Subtarget->hasFp256()) 05172 return SDValue(); 05173 05174 MVT VT = Op.getValueType().getSimpleVT(); 05175 SDLoc dl(Op); 05176 05177 assert((VT.is128BitVector() || VT.is256BitVector()) && 05178 "Unsupported vector type for broadcast."); 05179 05180 SDValue Ld; 05181 bool ConstSplatVal; 05182 05183 switch (Op.getOpcode()) { 05184 default: 05185 // Unknown pattern found. 05186 return SDValue(); 05187 05188 case ISD::BUILD_VECTOR: { 05189 // The BUILD_VECTOR node must be a splat. 05190 if (!isSplatVector(Op.getNode())) 05191 return SDValue(); 05192 05193 Ld = Op.getOperand(0); 05194 ConstSplatVal = (Ld.getOpcode() == ISD::Constant || 05195 Ld.getOpcode() == ISD::ConstantFP); 05196 05197 // The suspected load node has several users. Make sure that all 05198 // of its users are from the BUILD_VECTOR node. 05199 // Constants may have multiple users. 05200 if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) 05201 return SDValue(); 05202 break; 05203 } 05204 05205 case ISD::VECTOR_SHUFFLE: { 05206 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 05207 05208 // Shuffles must have a splat mask where the first element is 05209 // broadcasted. 05210 if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0) 05211 return SDValue(); 05212 05213 SDValue Sc = Op.getOperand(0); 05214 if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && 05215 Sc.getOpcode() != ISD::BUILD_VECTOR) { 05216 05217 if (!Subtarget->hasInt256()) 05218 return SDValue(); 05219 05220 // Use the register form of the broadcast instruction available on AVX2. 05221 if (VT.is256BitVector()) 05222 Sc = Extract128BitVector(Sc, 0, DAG, dl); 05223 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); 05224 } 05225 05226 Ld = Sc.getOperand(0); 05227 ConstSplatVal = (Ld.getOpcode() == ISD::Constant || 05228 Ld.getOpcode() == ISD::ConstantFP); 05229 05230 // The scalar_to_vector node and the suspected 05231 // load node must have exactly one user. 05232 // Constants may have multiple users. 05233 if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse())) 05234 return SDValue(); 05235 break; 05236 } 05237 } 05238 05239 bool Is256 = VT.is256BitVector(); 05240 05241 // Handle the broadcasting a single constant scalar from the constant pool 05242 // into a vector. On Sandybridge it is still better to load a constant vector 05243 // from the constant pool and not to broadcast it from a scalar. 05244 if (ConstSplatVal && Subtarget->hasInt256()) { 05245 EVT CVT = Ld.getValueType(); 05246 assert(!CVT.isVector() && "Must not broadcast a vector type"); 05247 unsigned ScalarSize = CVT.getSizeInBits(); 05248 05249 if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) { 05250 const Constant *C = 0; 05251 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) 05252 C = CI->getConstantIntValue(); 05253 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) 05254 C = CF->getConstantFPValue(); 05255 05256 assert(C && "Invalid constant type"); 05257 05258 SDValue CP = DAG.getConstantPool(C, getPointerTy()); 05259 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); 05260 Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, 05261 MachinePointerInfo::getConstantPool(), 05262 false, false, false, Alignment); 05263 05264 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); 05265 } 05266 } 05267 05268 bool IsLoad = ISD::isNormalLoad(Ld.getNode()); 05269 unsigned ScalarSize = Ld.getValueType().getSizeInBits(); 05270 05271 // Handle AVX2 in-register broadcasts. 05272 if (!IsLoad && Subtarget->hasInt256() && 05273 (ScalarSize == 32 || (Is256 && ScalarSize == 64))) 05274 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); 05275 05276 // The scalar source must be a normal load. 05277 if (!IsLoad) 05278 return SDValue(); 05279 05280 if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) 05281 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); 05282 05283 // The integer check is needed for the 64-bit into 128-bit so it doesn't match 05284 // double since there is no vbroadcastsd xmm 05285 if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) { 05286 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64) 05287 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); 05288 } 05289 05290 // Unsupported broadcast. 05291 return SDValue(); 05292 } 05293 05294 SDValue 05295 X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { 05296 EVT VT = Op.getValueType(); 05297 05298 // Skip if insert_vec_elt is not supported. 05299 if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) 05300 return SDValue(); 05301 05302 SDLoc DL(Op); 05303 unsigned NumElems = Op.getNumOperands(); 05304 05305 SDValue VecIn1; 05306 SDValue VecIn2; 05307 SmallVector<unsigned, 4> InsertIndices; 05308 SmallVector<int, 8> Mask(NumElems, -1); 05309 05310 for (unsigned i = 0; i != NumElems; ++i) { 05311 unsigned Opc = Op.getOperand(i).getOpcode(); 05312 05313 if (Opc == ISD::UNDEF) 05314 continue; 05315 05316 if (Opc != ISD::EXTRACT_VECTOR_ELT) { 05317 // Quit if more than 1 elements need inserting. 05318 if (InsertIndices.size() > 1) 05319 return SDValue(); 05320 05321 InsertIndices.push_back(i); 05322 continue; 05323 } 05324 05325 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); 05326 SDValue ExtIdx = Op.getOperand(i).getOperand(1); 05327 05328 // Quit if extracted from vector of different type. 05329 if (ExtractedFromVec.getValueType() != VT) 05330 return SDValue(); 05331 05332 // Quit if non-constant index. 05333 if (!isa<ConstantSDNode>(ExtIdx)) 05334 return SDValue(); 05335 05336 if (VecIn1.getNode() == 0) 05337 VecIn1 = ExtractedFromVec; 05338 else if (VecIn1 != ExtractedFromVec) { 05339 if (VecIn2.getNode() == 0) 05340 VecIn2 = ExtractedFromVec; 05341 else if (VecIn2 != ExtractedFromVec) 05342 // Quit if more than 2 vectors to shuffle 05343 return SDValue(); 05344 } 05345 05346 unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); 05347 05348 if (ExtractedFromVec == VecIn1) 05349 Mask[i] = Idx; 05350 else if (ExtractedFromVec == VecIn2) 05351 Mask[i] = Idx + NumElems; 05352 } 05353 05354 if (VecIn1.getNode() == 0) 05355 return SDValue(); 05356 05357 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 05358 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); 05359 for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { 05360 unsigned Idx = InsertIndices[i]; 05361 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), 05362 DAG.getIntPtrConstant(Idx)); 05363 } 05364 05365 return NV; 05366 } 05367 05368 SDValue 05369 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { 05370 SDLoc dl(Op); 05371 05372 MVT VT = Op.getValueType().getSimpleVT(); 05373 MVT ExtVT = VT.getVectorElementType(); 05374 unsigned NumElems = Op.getNumOperands(); 05375 05376 // Vectors containing all zeros can be matched by pxor and xorps later 05377 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 05378 // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd 05379 // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. 05380 if (VT == MVT::v4i32 || VT == MVT::v8i32) 05381 return Op; 05382 05383 return getZeroVector(VT, Subtarget, DAG, dl); 05384 } 05385 05386 // Vectors containing all ones can be matched by pcmpeqd on 128-bit width 05387 // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use 05388 // vpcmpeqd on 256-bit vectors. 05389 if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { 05390 if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) 05391 return Op; 05392 05393 return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); 05394 } 05395 05396 SDValue Broadcast = LowerVectorBroadcast(Op, DAG); 05397 if (Broadcast.getNode()) 05398 return Broadcast; 05399 05400 unsigned EVTBits = ExtVT.getSizeInBits(); 05401 05402 unsigned NumZero = 0; 05403 unsigned NumNonZero = 0; 05404 unsigned NonZeros = 0; 05405 bool IsAllConstants = true; 05406 SmallSet<SDValue, 8> Values; 05407 for (unsigned i = 0; i < NumElems; ++i) { 05408 SDValue Elt = Op.getOperand(i); 05409 if (Elt.getOpcode() == ISD::UNDEF) 05410 continue; 05411 Values.insert(Elt); 05412 if (Elt.getOpcode() != ISD::Constant && 05413 Elt.getOpcode() != ISD::ConstantFP) 05414 IsAllConstants = false; 05415 if (X86::isZeroNode(Elt)) 05416 NumZero++; 05417 else { 05418 NonZeros |= (1 << i); 05419 NumNonZero++; 05420 } 05421 } 05422 05423 // All undef vector. Return an UNDEF. All zero vectors were handled above. 05424 if (NumNonZero == 0) 05425 return DAG.getUNDEF(VT); 05426 05427 // Special case for single non-zero, non-undef, element. 05428 if (NumNonZero == 1) { 05429 unsigned Idx = countTrailingZeros(NonZeros); 05430 SDValue Item = Op.getOperand(Idx); 05431 05432 // If this is an insertion of an i64 value on x86-32, and if the top bits of 05433 // the value are obviously zero, truncate the value to i32 and do the 05434 // insertion that way. Only do this if the value is non-constant or if the 05435 // value is a constant being inserted into element 0. It is cheaper to do 05436 // a constant pool load than it is to do a movd + shuffle. 05437 if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && 05438 (!IsAllConstants || Idx == 0)) { 05439 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 05440 // Handle SSE only. 05441 assert(VT == MVT::v2i64 && "Expected an SSE value type!"); 05442 EVT VecVT = MVT::v4i32; 05443 unsigned VecElts = 4; 05444 05445 // Truncate the value (which may itself be a constant) to i32, and 05446 // convert it to a vector with movd (S2V+shuffle to zero extend). 05447 Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); 05448 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); 05449 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); 05450 05451 // Now we have our 32-bit value zero extended in the low element of 05452 // a vector. If Idx != 0, swizzle it into place. 05453 if (Idx != 0) { 05454 SmallVector<int, 4> Mask; 05455 Mask.push_back(Idx); 05456 for (unsigned i = 1; i != VecElts; ++i) 05457 Mask.push_back(i); 05458 Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT), 05459 &Mask[0]); 05460 } 05461 return DAG.getNode(ISD::BITCAST, dl, VT, Item); 05462 } 05463 } 05464 05465 // If we have a constant or non-constant insertion into the low element of 05466 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 05467 // the rest of the elements. This will be matched as movd/movq/movss/movsd 05468 // depending on what the source datatype is. 05469 if (Idx == 0) { 05470 if (NumZero == 0) 05471 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 05472 05473 if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || 05474 (ExtVT == MVT::i64 && Subtarget->is64Bit())) { 05475 if (VT.is256BitVector()) { 05476 SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); 05477 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, 05478 Item, DAG.getIntPtrConstant(0)); 05479 } 05480 assert(VT.is128BitVector() && "Expected an SSE value type!"); 05481 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 05482 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 05483 return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); 05484 } 05485 05486 if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { 05487 Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); 05488 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); 05489 if (VT.is256BitVector()) { 05490 SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); 05491 Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); 05492 } else { 05493 assert(VT.is128BitVector() && "Expected an SSE value type!"); 05494 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); 05495 } 05496 return DAG.getNode(ISD::BITCAST, dl, VT, Item); 05497 } 05498 } 05499 05500 // Is it a vector logical left shift? 05501 if (NumElems == 2 && Idx == 1 && 05502 X86::isZeroNode(Op.getOperand(0)) && 05503 !X86::isZeroNode(Op.getOperand(1))) { 05504 unsigned NumBits = VT.getSizeInBits(); 05505 return getVShift(true, VT, 05506 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, 05507 VT, Op.getOperand(1)), 05508 NumBits/2, DAG, *this, dl); 05509 } 05510 05511 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 05512 return SDValue(); 05513 05514 // Otherwise, if this is a vector with i32 or f32 elements, and the element 05515 // is a non-constant being inserted into an element other than the low one, 05516 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 05517 // movd/movss) to move this into the low element, then shuffle it into 05518 // place. 05519 if (EVTBits == 32) { 05520 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 05521 05522 // Turn it into a shuffle of zero and zero-extended scalar to vector. 05523 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG); 05524 SmallVector<int, 8> MaskVec; 05525 for (unsigned i = 0; i != NumElems; ++i) 05526 MaskVec.push_back(i == Idx ? 0 : 1); 05527 return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &