LLVM API Documentation
00001 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements routines for translating from LLVM IR into SelectionDAG IR. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #define DEBUG_TYPE "isel" 00015 #include "SelectionDAGBuilder.h" 00016 #include "SDNodeDbgValue.h" 00017 #include "llvm/ADT/BitVector.h" 00018 #include "llvm/ADT/SmallSet.h" 00019 #include "llvm/Analysis/AliasAnalysis.h" 00020 #include "llvm/Analysis/BranchProbabilityInfo.h" 00021 #include "llvm/Analysis/ConstantFolding.h" 00022 #include "llvm/Analysis/ValueTracking.h" 00023 #include "llvm/CodeGen/Analysis.h" 00024 #include "llvm/CodeGen/FastISel.h" 00025 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00026 #include "llvm/CodeGen/GCMetadata.h" 00027 #include "llvm/CodeGen/GCStrategy.h" 00028 #include "llvm/CodeGen/MachineFrameInfo.h" 00029 #include "llvm/CodeGen/MachineFunction.h" 00030 #include "llvm/CodeGen/MachineInstrBuilder.h" 00031 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00032 #include "llvm/CodeGen/MachineModuleInfo.h" 00033 #include "llvm/CodeGen/MachineRegisterInfo.h" 00034 #include "llvm/CodeGen/SelectionDAG.h" 00035 #include "llvm/DebugInfo.h" 00036 #include "llvm/IR/CallingConv.h" 00037 #include "llvm/IR/Constants.h" 00038 #include "llvm/IR/DataLayout.h" 00039 #include "llvm/IR/DerivedTypes.h" 00040 #include "llvm/IR/Function.h" 00041 #include "llvm/IR/GlobalVariable.h" 00042 #include "llvm/IR/InlineAsm.h" 00043 #include "llvm/IR/Instructions.h" 00044 #include "llvm/IR/IntrinsicInst.h" 00045 #include "llvm/IR/Intrinsics.h" 00046 #include "llvm/IR/LLVMContext.h" 00047 #include "llvm/IR/Module.h" 00048 #include "llvm/Support/CommandLine.h" 00049 #include "llvm/Support/Debug.h" 00050 #include "llvm/Support/ErrorHandling.h" 00051 #include "llvm/Support/IntegersSubsetMapping.h" 00052 #include "llvm/Support/MathExtras.h" 00053 #include "llvm/Support/raw_ostream.h" 00054 #include "llvm/Target/TargetFrameLowering.h" 00055 #include "llvm/Target/TargetInstrInfo.h" 00056 #include "llvm/Target/TargetIntrinsicInfo.h" 00057 #include "llvm/Target/TargetLibraryInfo.h" 00058 #include "llvm/Target/TargetLowering.h" 00059 #include "llvm/Target/TargetOptions.h" 00060 #include <algorithm> 00061 using namespace llvm; 00062 00063 /// LimitFloatPrecision - Generate low-precision inline sequences for 00064 /// some float libcalls (6, 8 or 12 bits). 00065 static unsigned LimitFloatPrecision; 00066 00067 static cl::opt<unsigned, true> 00068 LimitFPPrecision("limit-float-precision", 00069 cl::desc("Generate low-precision inline sequences " 00070 "for some float libcalls"), 00071 cl::location(LimitFloatPrecision), 00072 cl::init(0)); 00073 00074 // Limit the width of DAG chains. This is important in general to prevent 00075 // prevent DAG-based analysis from blowing up. For example, alias analysis and 00076 // load clustering may not complete in reasonable time. It is difficult to 00077 // recognize and avoid this situation within each individual analysis, and 00078 // future analyses are likely to have the same behavior. Limiting DAG width is 00079 // the safe approach, and will be especially important with global DAGs. 00080 // 00081 // MaxParallelChains default is arbitrarily high to avoid affecting 00082 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st 00083 // sequence over this should have been converted to llvm.memcpy by the 00084 // frontend. It easy to induce this behavior with .ll code such as: 00085 // %buffer = alloca [4096 x i8] 00086 // %data = load [4096 x i8]* %argPtr 00087 // store [4096 x i8] %data, [4096 x i8]* %buffer 00088 static const unsigned MaxParallelChains = 64; 00089 00090 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, 00091 const SDValue *Parts, unsigned NumParts, 00092 MVT PartVT, EVT ValueVT, const Value *V); 00093 00094 /// getCopyFromParts - Create a value that contains the specified legal parts 00095 /// combined into the value they represent. If the parts combine to a type 00096 /// larger then ValueVT then AssertOp can be used to specify whether the extra 00097 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT 00098 /// (ISD::AssertSext). 00099 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, 00100 const SDValue *Parts, 00101 unsigned NumParts, MVT PartVT, EVT ValueVT, 00102 const Value *V, 00103 ISD::NodeType AssertOp = ISD::DELETED_NODE) { 00104 if (ValueVT.isVector()) 00105 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, 00106 PartVT, ValueVT, V); 00107 00108 assert(NumParts > 0 && "No parts to assemble!"); 00109 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00110 SDValue Val = Parts[0]; 00111 00112 if (NumParts > 1) { 00113 // Assemble the value from multiple parts. 00114 if (ValueVT.isInteger()) { 00115 unsigned PartBits = PartVT.getSizeInBits(); 00116 unsigned ValueBits = ValueVT.getSizeInBits(); 00117 00118 // Assemble the power of 2 part. 00119 unsigned RoundParts = NumParts & (NumParts - 1) ? 00120 1 << Log2_32(NumParts) : NumParts; 00121 unsigned RoundBits = PartBits * RoundParts; 00122 EVT RoundVT = RoundBits == ValueBits ? 00123 ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); 00124 SDValue Lo, Hi; 00125 00126 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); 00127 00128 if (RoundParts > 2) { 00129 Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, 00130 PartVT, HalfVT, V); 00131 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, 00132 RoundParts / 2, PartVT, HalfVT, V); 00133 } else { 00134 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); 00135 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); 00136 } 00137 00138 if (TLI.isBigEndian()) 00139 std::swap(Lo, Hi); 00140 00141 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); 00142 00143 if (RoundParts < NumParts) { 00144 // Assemble the trailing non-power-of-2 part. 00145 unsigned OddParts = NumParts - RoundParts; 00146 EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); 00147 Hi = getCopyFromParts(DAG, DL, 00148 Parts + RoundParts, OddParts, PartVT, OddVT, V); 00149 00150 // Combine the round and odd parts. 00151 Lo = Val; 00152 if (TLI.isBigEndian()) 00153 std::swap(Lo, Hi); 00154 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00155 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); 00156 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, 00157 DAG.getConstant(Lo.getValueType().getSizeInBits(), 00158 TLI.getPointerTy())); 00159 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); 00160 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); 00161 } 00162 } else if (PartVT.isFloatingPoint()) { 00163 // FP split into multiple FP parts (for ppcf128) 00164 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && 00165 "Unexpected split"); 00166 SDValue Lo, Hi; 00167 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); 00168 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); 00169 if (TLI.isBigEndian()) 00170 std::swap(Lo, Hi); 00171 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); 00172 } else { 00173 // FP split into integer parts (soft fp) 00174 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && 00175 !PartVT.isVector() && "Unexpected split"); 00176 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); 00177 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); 00178 } 00179 } 00180 00181 // There is now one part, held in Val. Correct it to match ValueVT. 00182 EVT PartEVT = Val.getValueType(); 00183 00184 if (PartEVT == ValueVT) 00185 return Val; 00186 00187 if (PartEVT.isInteger() && ValueVT.isInteger()) { 00188 if (ValueVT.bitsLT(PartEVT)) { 00189 // For a truncate, see if we have any information to 00190 // indicate whether the truncated bits will always be 00191 // zero or sign-extension. 00192 if (AssertOp != ISD::DELETED_NODE) 00193 Val = DAG.getNode(AssertOp, DL, PartEVT, Val, 00194 DAG.getValueType(ValueVT)); 00195 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00196 } 00197 return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); 00198 } 00199 00200 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 00201 // FP_ROUND's are always exact here. 00202 if (ValueVT.bitsLT(Val.getValueType())) 00203 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, 00204 DAG.getTargetConstant(1, TLI.getPointerTy())); 00205 00206 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); 00207 } 00208 00209 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) 00210 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00211 00212 llvm_unreachable("Unknown mismatch!"); 00213 } 00214 00215 /// getCopyFromPartsVector - Create a value that contains the specified legal 00216 /// parts combined into the value they represent. If the parts combine to a 00217 /// type larger then ValueVT then AssertOp can be used to specify whether the 00218 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from 00219 /// ValueVT (ISD::AssertSext). 00220 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, 00221 const SDValue *Parts, unsigned NumParts, 00222 MVT PartVT, EVT ValueVT, const Value *V) { 00223 assert(ValueVT.isVector() && "Not a vector value"); 00224 assert(NumParts > 0 && "No parts to assemble!"); 00225 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00226 SDValue Val = Parts[0]; 00227 00228 // Handle a multi-element vector. 00229 if (NumParts > 1) { 00230 EVT IntermediateVT; 00231 MVT RegisterVT; 00232 unsigned NumIntermediates; 00233 unsigned NumRegs = 00234 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 00235 NumIntermediates, RegisterVT); 00236 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 00237 NumParts = NumRegs; // Silence a compiler warning. 00238 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 00239 assert(RegisterVT == Parts[0].getSimpleValueType() && 00240 "Part type doesn't match part!"); 00241 00242 // Assemble the parts into intermediate operands. 00243 SmallVector<SDValue, 8> Ops(NumIntermediates); 00244 if (NumIntermediates == NumParts) { 00245 // If the register was not expanded, truncate or copy the value, 00246 // as appropriate. 00247 for (unsigned i = 0; i != NumParts; ++i) 00248 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, 00249 PartVT, IntermediateVT, V); 00250 } else if (NumParts > 0) { 00251 // If the intermediate type was expanded, build the intermediate 00252 // operands from the parts. 00253 assert(NumParts % NumIntermediates == 0 && 00254 "Must expand into a divisible number of parts!"); 00255 unsigned Factor = NumParts / NumIntermediates; 00256 for (unsigned i = 0; i != NumIntermediates; ++i) 00257 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, 00258 PartVT, IntermediateVT, V); 00259 } 00260 00261 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the 00262 // intermediate operands. 00263 Val = DAG.getNode(IntermediateVT.isVector() ? 00264 ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, 00265 ValueVT, &Ops[0], NumIntermediates); 00266 } 00267 00268 // There is now one part, held in Val. Correct it to match ValueVT. 00269 EVT PartEVT = Val.getValueType(); 00270 00271 if (PartEVT == ValueVT) 00272 return Val; 00273 00274 if (PartEVT.isVector()) { 00275 // If the element type of the source/dest vectors are the same, but the 00276 // parts vector has more elements than the value vector, then we have a 00277 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the 00278 // elements we want. 00279 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { 00280 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && 00281 "Cannot narrow, it would be a lossy transformation"); 00282 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 00283 DAG.getIntPtrConstant(0)); 00284 } 00285 00286 // Vector/Vector bitcast. 00287 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) 00288 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00289 00290 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && 00291 "Cannot handle this kind of promotion"); 00292 // Promoted vector extract 00293 bool Smaller = ValueVT.bitsLE(PartEVT); 00294 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00295 DL, ValueVT, Val); 00296 00297 } 00298 00299 // Trivial bitcast if the types are the same size and the destination 00300 // vector type is legal. 00301 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && 00302 TLI.isTypeLegal(ValueVT)) 00303 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00304 00305 // Handle cases such as i8 -> <1 x i1> 00306 if (ValueVT.getVectorNumElements() != 1) { 00307 LLVMContext &Ctx = *DAG.getContext(); 00308 Twine ErrMsg("non-trivial scalar-to-vector conversion"); 00309 if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { 00310 if (const CallInst *CI = dyn_cast<CallInst>(I)) 00311 if (isa<InlineAsm>(CI->getCalledValue())) 00312 ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; 00313 Ctx.emitError(I, ErrMsg); 00314 } else { 00315 Ctx.emitError(ErrMsg); 00316 } 00317 return DAG.getUNDEF(ValueVT); 00318 } 00319 00320 if (ValueVT.getVectorNumElements() == 1 && 00321 ValueVT.getVectorElementType() != PartEVT) { 00322 bool Smaller = ValueVT.bitsLE(PartEVT); 00323 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00324 DL, ValueVT.getScalarType(), Val); 00325 } 00326 00327 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); 00328 } 00329 00330 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, 00331 SDValue Val, SDValue *Parts, unsigned NumParts, 00332 MVT PartVT, const Value *V); 00333 00334 /// getCopyToParts - Create a series of nodes that contain the specified value 00335 /// split into legal parts. If the parts contain more bits than Val, then, for 00336 /// integers, ExtendKind can be used to specify how to generate the extra bits. 00337 static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, 00338 SDValue Val, SDValue *Parts, unsigned NumParts, 00339 MVT PartVT, const Value *V, 00340 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { 00341 EVT ValueVT = Val.getValueType(); 00342 00343 // Handle the vector case separately. 00344 if (ValueVT.isVector()) 00345 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); 00346 00347 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00348 unsigned PartBits = PartVT.getSizeInBits(); 00349 unsigned OrigNumParts = NumParts; 00350 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); 00351 00352 if (NumParts == 0) 00353 return; 00354 00355 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); 00356 EVT PartEVT = PartVT; 00357 if (PartEVT == ValueVT) { 00358 assert(NumParts == 1 && "No-op copy with multiple parts!"); 00359 Parts[0] = Val; 00360 return; 00361 } 00362 00363 if (NumParts * PartBits > ValueVT.getSizeInBits()) { 00364 // If the parts cover more bits than the value has, promote the value. 00365 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 00366 assert(NumParts == 1 && "Do not know what to promote to!"); 00367 Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); 00368 } else { 00369 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 00370 ValueVT.isInteger() && 00371 "Unknown mismatch!"); 00372 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00373 Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); 00374 if (PartVT == MVT::x86mmx) 00375 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00376 } 00377 } else if (PartBits == ValueVT.getSizeInBits()) { 00378 // Different types of the same size. 00379 assert(NumParts == 1 && PartEVT != ValueVT); 00380 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00381 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { 00382 // If the parts cover less bits than value has, truncate the value. 00383 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 00384 ValueVT.isInteger() && 00385 "Unknown mismatch!"); 00386 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00387 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00388 if (PartVT == MVT::x86mmx) 00389 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00390 } 00391 00392 // The value may have changed - recompute ValueVT. 00393 ValueVT = Val.getValueType(); 00394 assert(NumParts * PartBits == ValueVT.getSizeInBits() && 00395 "Failed to tile the value with PartVT!"); 00396 00397 if (NumParts == 1) { 00398 if (PartEVT != ValueVT) { 00399 LLVMContext &Ctx = *DAG.getContext(); 00400 Twine ErrMsg("scalar-to-vector conversion failed"); 00401 if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { 00402 if (const CallInst *CI = dyn_cast<CallInst>(I)) 00403 if (isa<InlineAsm>(CI->getCalledValue())) 00404 ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; 00405 Ctx.emitError(I, ErrMsg); 00406 } else { 00407 Ctx.emitError(ErrMsg); 00408 } 00409 } 00410 00411 Parts[0] = Val; 00412 return; 00413 } 00414 00415 // Expand the value into multiple parts. 00416 if (NumParts & (NumParts - 1)) { 00417 // The number of parts is not a power of 2. Split off and copy the tail. 00418 assert(PartVT.isInteger() && ValueVT.isInteger() && 00419 "Do not know what to expand to!"); 00420 unsigned RoundParts = 1 << Log2_32(NumParts); 00421 unsigned RoundBits = RoundParts * PartBits; 00422 unsigned OddParts = NumParts - RoundParts; 00423 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, 00424 DAG.getIntPtrConstant(RoundBits)); 00425 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); 00426 00427 if (TLI.isBigEndian()) 00428 // The odd parts were reversed by getCopyToParts - unreverse them. 00429 std::reverse(Parts + RoundParts, Parts + NumParts); 00430 00431 NumParts = RoundParts; 00432 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00433 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00434 } 00435 00436 // The number of parts is a power of 2. Repeatedly bisect the value using 00437 // EXTRACT_ELEMENT. 00438 Parts[0] = DAG.getNode(ISD::BITCAST, DL, 00439 EVT::getIntegerVT(*DAG.getContext(), 00440 ValueVT.getSizeInBits()), 00441 Val); 00442 00443 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { 00444 for (unsigned i = 0; i < NumParts; i += StepSize) { 00445 unsigned ThisBits = StepSize * PartBits / 2; 00446 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); 00447 SDValue &Part0 = Parts[i]; 00448 SDValue &Part1 = Parts[i+StepSize/2]; 00449 00450 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 00451 ThisVT, Part0, DAG.getIntPtrConstant(1)); 00452 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 00453 ThisVT, Part0, DAG.getIntPtrConstant(0)); 00454 00455 if (ThisBits == PartBits && ThisVT != PartVT) { 00456 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); 00457 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); 00458 } 00459 } 00460 } 00461 00462 if (TLI.isBigEndian()) 00463 std::reverse(Parts, Parts + OrigNumParts); 00464 } 00465 00466 00467 /// getCopyToPartsVector - Create a series of nodes that contain the specified 00468 /// value split into legal parts. 00469 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, 00470 SDValue Val, SDValue *Parts, unsigned NumParts, 00471 MVT PartVT, const Value *V) { 00472 EVT ValueVT = Val.getValueType(); 00473 assert(ValueVT.isVector() && "Not a vector"); 00474 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00475 00476 if (NumParts == 1) { 00477 EVT PartEVT = PartVT; 00478 if (PartEVT == ValueVT) { 00479 // Nothing to do. 00480 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { 00481 // Bitconvert vector->vector case. 00482 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00483 } else if (PartVT.isVector() && 00484 PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && 00485 PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { 00486 EVT ElementVT = PartVT.getVectorElementType(); 00487 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in 00488 // undef elements. 00489 SmallVector<SDValue, 16> Ops; 00490 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) 00491 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00492 ElementVT, Val, DAG.getIntPtrConstant(i))); 00493 00494 for (unsigned i = ValueVT.getVectorNumElements(), 00495 e = PartVT.getVectorNumElements(); i != e; ++i) 00496 Ops.push_back(DAG.getUNDEF(ElementVT)); 00497 00498 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); 00499 00500 // FIXME: Use CONCAT for 2x -> 4x. 00501 00502 //SDValue UndefElts = DAG.getUNDEF(VectorTy); 00503 //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); 00504 } else if (PartVT.isVector() && 00505 PartEVT.getVectorElementType().bitsGE( 00506 ValueVT.getVectorElementType()) && 00507 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { 00508 00509 // Promoted vector extract 00510 bool Smaller = PartEVT.bitsLE(ValueVT); 00511 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00512 DL, PartVT, Val); 00513 } else{ 00514 // Vector -> scalar conversion. 00515 assert(ValueVT.getVectorNumElements() == 1 && 00516 "Only trivial vector-to-scalar conversions should get here!"); 00517 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00518 PartVT, Val, DAG.getIntPtrConstant(0)); 00519 00520 bool Smaller = ValueVT.bitsLE(PartVT); 00521 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00522 DL, PartVT, Val); 00523 } 00524 00525 Parts[0] = Val; 00526 return; 00527 } 00528 00529 // Handle a multi-element vector. 00530 EVT IntermediateVT; 00531 MVT RegisterVT; 00532 unsigned NumIntermediates; 00533 unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, 00534 IntermediateVT, 00535 NumIntermediates, RegisterVT); 00536 unsigned NumElements = ValueVT.getVectorNumElements(); 00537 00538 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 00539 NumParts = NumRegs; // Silence a compiler warning. 00540 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 00541 00542 // Split the vector into intermediate operands. 00543 SmallVector<SDValue, 8> Ops(NumIntermediates); 00544 for (unsigned i = 0; i != NumIntermediates; ++i) { 00545 if (IntermediateVT.isVector()) 00546 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 00547 IntermediateVT, Val, 00548 DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); 00549 else 00550 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00551 IntermediateVT, Val, DAG.getIntPtrConstant(i)); 00552 } 00553 00554 // Split the intermediate operands into legal parts. 00555 if (NumParts == NumIntermediates) { 00556 // If the register was not expanded, promote or copy the value, 00557 // as appropriate. 00558 for (unsigned i = 0; i != NumParts; ++i) 00559 getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); 00560 } else if (NumParts > 0) { 00561 // If the intermediate type was expanded, split each the value into 00562 // legal parts. 00563 assert(NumParts % NumIntermediates == 0 && 00564 "Must expand into a divisible number of parts!"); 00565 unsigned Factor = NumParts / NumIntermediates; 00566 for (unsigned i = 0; i != NumIntermediates; ++i) 00567 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); 00568 } 00569 } 00570 00571 namespace { 00572 /// RegsForValue - This struct represents the registers (physical or virtual) 00573 /// that a particular set of values is assigned, and the type information 00574 /// about the value. The most common situation is to represent one value at a 00575 /// time, but struct or array values are handled element-wise as multiple 00576 /// values. The splitting of aggregates is performed recursively, so that we 00577 /// never have aggregate-typed registers. The values at this point do not 00578 /// necessarily have legal types, so each value may require one or more 00579 /// registers of some legal type. 00580 /// 00581 struct RegsForValue { 00582 /// ValueVTs - The value types of the values, which may not be legal, and 00583 /// may need be promoted or synthesized from one or more registers. 00584 /// 00585 SmallVector<EVT, 4> ValueVTs; 00586 00587 /// RegVTs - The value types of the registers. This is the same size as 00588 /// ValueVTs and it records, for each value, what the type of the assigned 00589 /// register or registers are. (Individual values are never synthesized 00590 /// from more than one type of register.) 00591 /// 00592 /// With virtual registers, the contents of RegVTs is redundant with TLI's 00593 /// getRegisterType member function, however when with physical registers 00594 /// it is necessary to have a separate record of the types. 00595 /// 00596 SmallVector<MVT, 4> RegVTs; 00597 00598 /// Regs - This list holds the registers assigned to the values. 00599 /// Each legal or promoted value requires one register, and each 00600 /// expanded value requires multiple registers. 00601 /// 00602 SmallVector<unsigned, 4> Regs; 00603 00604 RegsForValue() {} 00605 00606 RegsForValue(const SmallVector<unsigned, 4> ®s, 00607 MVT regvt, EVT valuevt) 00608 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} 00609 00610 RegsForValue(LLVMContext &Context, const TargetLowering &tli, 00611 unsigned Reg, Type *Ty) { 00612 ComputeValueVTs(tli, Ty, ValueVTs); 00613 00614 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 00615 EVT ValueVT = ValueVTs[Value]; 00616 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); 00617 MVT RegisterVT = tli.getRegisterType(Context, ValueVT); 00618 for (unsigned i = 0; i != NumRegs; ++i) 00619 Regs.push_back(Reg + i); 00620 RegVTs.push_back(RegisterVT); 00621 Reg += NumRegs; 00622 } 00623 } 00624 00625 /// areValueTypesLegal - Return true if types of all the values are legal. 00626 bool areValueTypesLegal(const TargetLowering &TLI) { 00627 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 00628 MVT RegisterVT = RegVTs[Value]; 00629 if (!TLI.isTypeLegal(RegisterVT)) 00630 return false; 00631 } 00632 return true; 00633 } 00634 00635 /// append - Add the specified values to this one. 00636 void append(const RegsForValue &RHS) { 00637 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); 00638 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); 00639 Regs.append(RHS.Regs.begin(), RHS.Regs.end()); 00640 } 00641 00642 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 00643 /// this value and returns the result as a ValueVTs value. This uses 00644 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00645 /// If the Flag pointer is NULL, no flag is used. 00646 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, 00647 DebugLoc dl, 00648 SDValue &Chain, SDValue *Flag, 00649 const Value *V = 0) const; 00650 00651 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 00652 /// specified value into the registers specified by this object. This uses 00653 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00654 /// If the Flag pointer is NULL, no flag is used. 00655 void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, 00656 SDValue &Chain, SDValue *Flag, const Value *V) const; 00657 00658 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 00659 /// operand list. This adds the code marker, matching input operand index 00660 /// (if applicable), and includes the number of values added into it. 00661 void AddInlineAsmOperands(unsigned Kind, 00662 bool HasMatching, unsigned MatchingIdx, 00663 SelectionDAG &DAG, 00664 std::vector<SDValue> &Ops) const; 00665 }; 00666 } 00667 00668 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 00669 /// this value and returns the result as a ValueVT value. This uses 00670 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00671 /// If the Flag pointer is NULL, no flag is used. 00672 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, 00673 FunctionLoweringInfo &FuncInfo, 00674 DebugLoc dl, 00675 SDValue &Chain, SDValue *Flag, 00676 const Value *V) const { 00677 // A Value with type {} or [0 x %t] needs no registers. 00678 if (ValueVTs.empty()) 00679 return SDValue(); 00680 00681 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00682 00683 // Assemble the legal parts into the final values. 00684 SmallVector<SDValue, 4> Values(ValueVTs.size()); 00685 SmallVector<SDValue, 8> Parts; 00686 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 00687 // Copy the legal parts from the registers. 00688 EVT ValueVT = ValueVTs[Value]; 00689 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 00690 MVT RegisterVT = RegVTs[Value]; 00691 00692 Parts.resize(NumRegs); 00693 for (unsigned i = 0; i != NumRegs; ++i) { 00694 SDValue P; 00695 if (Flag == 0) { 00696 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); 00697 } else { 00698 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); 00699 *Flag = P.getValue(2); 00700 } 00701 00702 Chain = P.getValue(1); 00703 Parts[i] = P; 00704 00705 // If the source register was virtual and if we know something about it, 00706 // add an assert node. 00707 if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || 00708 !RegisterVT.isInteger() || RegisterVT.isVector()) 00709 continue; 00710 00711 const FunctionLoweringInfo::LiveOutInfo *LOI = 00712 FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); 00713 if (!LOI) 00714 continue; 00715 00716 unsigned RegSize = RegisterVT.getSizeInBits(); 00717 unsigned NumSignBits = LOI->NumSignBits; 00718 unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); 00719 00720 // FIXME: We capture more information than the dag can represent. For 00721 // now, just use the tightest assertzext/assertsext possible. 00722 bool isSExt = true; 00723 EVT FromVT(MVT::Other); 00724 if (NumSignBits == RegSize) 00725 isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 00726 else if (NumZeroBits >= RegSize-1) 00727 isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 00728 else if (NumSignBits > RegSize-8) 00729 isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 00730 else if (NumZeroBits >= RegSize-8) 00731 isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 00732 else if (NumSignBits > RegSize-16) 00733 isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 00734 else if (NumZeroBits >= RegSize-16) 00735 isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 00736 else if (NumSignBits > RegSize-32) 00737 isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 00738 else if (NumZeroBits >= RegSize-32) 00739 isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 00740 else 00741 continue; 00742 00743 // Add an assertion node. 00744 assert(FromVT != MVT::Other); 00745 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, 00746 RegisterVT, P, DAG.getValueType(FromVT)); 00747 } 00748 00749 Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), 00750 NumRegs, RegisterVT, ValueVT, V); 00751 Part += NumRegs; 00752 Parts.clear(); 00753 } 00754 00755 return DAG.getNode(ISD::MERGE_VALUES, dl, 00756 DAG.getVTList(&ValueVTs[0], ValueVTs.size()), 00757 &Values[0], ValueVTs.size()); 00758 } 00759 00760 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 00761 /// specified value into the registers specified by this object. This uses 00762 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00763 /// If the Flag pointer is NULL, no flag is used. 00764 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, 00765 SDValue &Chain, SDValue *Flag, 00766 const Value *V) const { 00767 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00768 00769 // Get the list of the values's legal parts. 00770 unsigned NumRegs = Regs.size(); 00771 SmallVector<SDValue, 8> Parts(NumRegs); 00772 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 00773 EVT ValueVT = ValueVTs[Value]; 00774 unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 00775 MVT RegisterVT = RegVTs[Value]; 00776 ISD::NodeType ExtendKind = 00777 TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; 00778 00779 getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), 00780 &Parts[Part], NumParts, RegisterVT, V, ExtendKind); 00781 Part += NumParts; 00782 } 00783 00784 // Copy the parts into the registers. 00785 SmallVector<SDValue, 8> Chains(NumRegs); 00786 for (unsigned i = 0; i != NumRegs; ++i) { 00787 SDValue Part; 00788 if (Flag == 0) { 00789 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); 00790 } else { 00791 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); 00792 *Flag = Part.getValue(1); 00793 } 00794 00795 Chains[i] = Part.getValue(0); 00796 } 00797 00798 if (NumRegs == 1 || Flag) 00799 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is 00800 // flagged to it. That is the CopyToReg nodes and the user are considered 00801 // a single scheduling unit. If we create a TokenFactor and return it as 00802 // chain, then the TokenFactor is both a predecessor (operand) of the 00803 // user as well as a successor (the TF operands are flagged to the user). 00804 // c1, f1 = CopyToReg 00805 // c2, f2 = CopyToReg 00806 // c3 = TokenFactor c1, c2 00807 // ... 00808 // = op c3, ..., f2 00809 Chain = Chains[NumRegs-1]; 00810 else 00811 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); 00812 } 00813 00814 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 00815 /// operand list. This adds the code marker and includes the number of 00816 /// values added into it. 00817 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, 00818 unsigned MatchingIdx, 00819 SelectionDAG &DAG, 00820 std::vector<SDValue> &Ops) const { 00821 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00822 00823 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); 00824 if (HasMatching) 00825 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); 00826 else if (!Regs.empty() && 00827 TargetRegisterInfo::isVirtualRegister(Regs.front())) { 00828 // Put the register class of the virtual registers in the flag word. That 00829 // way, later passes can recompute register class constraints for inline 00830 // assembly as well as normal instructions. 00831 // Don't do this for tied operands that can use the regclass information 00832 // from the def. 00833 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 00834 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); 00835 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 00836 } 00837 00838 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); 00839 Ops.push_back(Res); 00840 00841 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { 00842 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); 00843 MVT RegisterVT = RegVTs[Value]; 00844 for (unsigned i = 0; i != NumRegs; ++i) { 00845 assert(Reg < Regs.size() && "Mismatch in # registers expected"); 00846 Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); 00847 } 00848 } 00849 } 00850 00851 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, 00852 const TargetLibraryInfo *li) { 00853 AA = &aa; 00854 GFI = gfi; 00855 LibInfo = li; 00856 TD = DAG.getTarget().getDataLayout(); 00857 Context = DAG.getContext(); 00858 LPadToCallSiteMap.clear(); 00859 } 00860 00861 /// clear - Clear out the current SelectionDAG and the associated 00862 /// state and prepare this SelectionDAGBuilder object to be used 00863 /// for a new block. This doesn't clear out information about 00864 /// additional blocks that are needed to complete switch lowering 00865 /// or PHI node updating; that information is cleared out as it is 00866 /// consumed. 00867 void SelectionDAGBuilder::clear() { 00868 NodeMap.clear(); 00869 UnusedArgNodeMap.clear(); 00870 PendingLoads.clear(); 00871 PendingExports.clear(); 00872 CurDebugLoc = DebugLoc(); 00873 HasTailCall = false; 00874 } 00875 00876 /// clearDanglingDebugInfo - Clear the dangling debug information 00877 /// map. This function is separated from the clear so that debug 00878 /// information that is dangling in a basic block can be properly 00879 /// resolved in a different basic block. This allows the 00880 /// SelectionDAG to resolve dangling debug information attached 00881 /// to PHI nodes. 00882 void SelectionDAGBuilder::clearDanglingDebugInfo() { 00883 DanglingDebugInfoMap.clear(); 00884 } 00885 00886 /// getRoot - Return the current virtual root of the Selection DAG, 00887 /// flushing any PendingLoad items. This must be done before emitting 00888 /// a store or any other node that may need to be ordered after any 00889 /// prior load instructions. 00890 /// 00891 SDValue SelectionDAGBuilder::getRoot() { 00892 if (PendingLoads.empty()) 00893 return DAG.getRoot(); 00894 00895 if (PendingLoads.size() == 1) { 00896 SDValue Root = PendingLoads[0]; 00897 DAG.setRoot(Root); 00898 PendingLoads.clear(); 00899 return Root; 00900 } 00901 00902 // Otherwise, we have to make a token factor node. 00903 SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, 00904 &PendingLoads[0], PendingLoads.size()); 00905 PendingLoads.clear(); 00906 DAG.setRoot(Root); 00907 return Root; 00908 } 00909 00910 /// getControlRoot - Similar to getRoot, but instead of flushing all the 00911 /// PendingLoad items, flush all the PendingExports items. It is necessary 00912 /// to do this before emitting a terminator instruction. 00913 /// 00914 SDValue SelectionDAGBuilder::getControlRoot() { 00915 SDValue Root = DAG.getRoot(); 00916 00917 if (PendingExports.empty()) 00918 return Root; 00919 00920 // Turn all of the CopyToReg chains into one factored node. 00921 if (Root.getOpcode() != ISD::EntryToken) { 00922 unsigned i = 0, e = PendingExports.size(); 00923 for (; i != e; ++i) { 00924 assert(PendingExports[i].getNode()->getNumOperands() > 1); 00925 if (PendingExports[i].getNode()->getOperand(0) == Root) 00926 break; // Don't add the root if we already indirectly depend on it. 00927 } 00928 00929 if (i == e) 00930 PendingExports.push_back(Root); 00931 } 00932 00933 Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, 00934 &PendingExports[0], 00935 PendingExports.size()); 00936 PendingExports.clear(); 00937 DAG.setRoot(Root); 00938 return Root; 00939 } 00940 00941 void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { 00942 if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. 00943 DAG.AssignOrdering(Node, SDNodeOrder); 00944 00945 for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) 00946 AssignOrderingToNode(Node->getOperand(I).getNode()); 00947 } 00948 00949 void SelectionDAGBuilder::visit(const Instruction &I) { 00950 // Set up outgoing PHI node register values before emitting the terminator. 00951 if (isa<TerminatorInst>(&I)) 00952 HandlePHINodesInSuccessorBlocks(I.getParent()); 00953 00954 CurDebugLoc = I.getDebugLoc(); 00955 00956 visit(I.getOpcode(), I); 00957 00958 if (!isa<TerminatorInst>(&I) && !HasTailCall) 00959 CopyToExportRegsIfNeeded(&I); 00960 00961 CurDebugLoc = DebugLoc(); 00962 } 00963 00964 void SelectionDAGBuilder::visitPHI(const PHINode &) { 00965 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); 00966 } 00967 00968 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { 00969 // Note: this doesn't use InstVisitor, because it has to work with 00970 // ConstantExpr's in addition to instructions. 00971 switch (Opcode) { 00972 default: llvm_unreachable("Unknown instruction type encountered!"); 00973 // Build the switch statement using the Instruction.def file. 00974 #define HANDLE_INST(NUM, OPCODE, CLASS) \ 00975 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; 00976 #include "llvm/IR/Instruction.def" 00977 } 00978 00979 // Assign the ordering to the freshly created DAG nodes. 00980 if (NodeMap.count(&I)) { 00981 ++SDNodeOrder; 00982 AssignOrderingToNode(getValue(&I).getNode()); 00983 } 00984 } 00985 00986 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, 00987 // generate the debug data structures now that we've seen its definition. 00988 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, 00989 SDValue Val) { 00990 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; 00991 if (DDI.getDI()) { 00992 const DbgValueInst *DI = DDI.getDI(); 00993 DebugLoc dl = DDI.getdl(); 00994 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); 00995 MDNode *Variable = DI->getVariable(); 00996 uint64_t Offset = DI->getOffset(); 00997 SDDbgValue *SDV; 00998 if (Val.getNode()) { 00999 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { 01000 SDV = DAG.getDbgValue(Variable, Val.getNode(), 01001 Val.getResNo(), Offset, dl, DbgSDNodeOrder); 01002 DAG.AddDbgValue(SDV, Val.getNode(), false); 01003 } 01004 } else 01005 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 01006 DanglingDebugInfoMap[V] = DanglingDebugInfo(); 01007 } 01008 } 01009 01010 /// getValue - Return an SDValue for the given Value. 01011 SDValue SelectionDAGBuilder::getValue(const Value *V) { 01012 // If we already have an SDValue for this value, use it. It's important 01013 // to do this first, so that we don't create a CopyFromReg if we already 01014 // have a regular SDValue. 01015 SDValue &N = NodeMap[V]; 01016 if (N.getNode()) return N; 01017 01018 // If there's a virtual register allocated and initialized for this 01019 // value, use it. 01020 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); 01021 if (It != FuncInfo.ValueMap.end()) { 01022 unsigned InReg = It->second; 01023 RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); 01024 SDValue Chain = DAG.getEntryNode(); 01025 N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); 01026 resolveDanglingDebugInfo(V, N); 01027 return N; 01028 } 01029 01030 // Otherwise create a new SDValue and remember it. 01031 SDValue Val = getValueImpl(V); 01032 NodeMap[V] = Val; 01033 resolveDanglingDebugInfo(V, Val); 01034 return Val; 01035 } 01036 01037 /// getNonRegisterValue - Return an SDValue for the given Value, but 01038 /// don't look in FuncInfo.ValueMap for a virtual register. 01039 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { 01040 // If we already have an SDValue for this value, use it. 01041 SDValue &N = NodeMap[V]; 01042 if (N.getNode()) return N; 01043 01044 // Otherwise create a new SDValue and remember it. 01045 SDValue Val = getValueImpl(V); 01046 NodeMap[V] = Val; 01047 resolveDanglingDebugInfo(V, Val); 01048 return Val; 01049 } 01050 01051 /// getValueImpl - Helper function for getValue and getNonRegisterValue. 01052 /// Create an SDValue for the given value. 01053 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { 01054 if (const Constant *C = dyn_cast<Constant>(V)) { 01055 EVT VT = TLI.getValueType(V->getType(), true); 01056 01057 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) 01058 return DAG.getConstant(*CI, VT); 01059 01060 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 01061 return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); 01062 01063 if (isa<ConstantPointerNull>(C)) 01064 return DAG.getConstant(0, TLI.getPointerTy()); 01065 01066 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 01067 return DAG.getConstantFP(*CFP, VT); 01068 01069 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) 01070 return DAG.getUNDEF(VT); 01071 01072 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 01073 visit(CE->getOpcode(), *CE); 01074 SDValue N1 = NodeMap[V]; 01075 assert(N1.getNode() && "visit didn't populate the NodeMap!"); 01076 return N1; 01077 } 01078 01079 if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { 01080 SmallVector<SDValue, 4> Constants; 01081 for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); 01082 OI != OE; ++OI) { 01083 SDNode *Val = getValue(*OI).getNode(); 01084 // If the operand is an empty aggregate, there are no values. 01085 if (!Val) continue; 01086 // Add each leaf value from the operand to the Constants list 01087 // to form a flattened list of all the values. 01088 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 01089 Constants.push_back(SDValue(Val, i)); 01090 } 01091 01092 return DAG.getMergeValues(&Constants[0], Constants.size(), 01093 getCurDebugLoc()); 01094 } 01095 01096 if (const ConstantDataSequential *CDS = 01097 dyn_cast<ConstantDataSequential>(C)) { 01098 SmallVector<SDValue, 4> Ops; 01099 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { 01100 SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); 01101 // Add each leaf value from the operand to the Constants list 01102 // to form a flattened list of all the values. 01103 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 01104 Ops.push_back(SDValue(Val, i)); 01105 } 01106 01107 if (isa<ArrayType>(CDS->getType())) 01108 return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); 01109 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), 01110 VT, &Ops[0], Ops.size()); 01111 } 01112 01113 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { 01114 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && 01115 "Unknown struct or array constant!"); 01116 01117 SmallVector<EVT, 4> ValueVTs; 01118 ComputeValueVTs(TLI, C->getType(), ValueVTs); 01119 unsigned NumElts = ValueVTs.size(); 01120 if (NumElts == 0) 01121 return SDValue(); // empty struct 01122 SmallVector<SDValue, 4> Constants(NumElts); 01123 for (unsigned i = 0; i != NumElts; ++i) { 01124 EVT EltVT = ValueVTs[i]; 01125 if (isa<UndefValue>(C)) 01126 Constants[i] = DAG.getUNDEF(EltVT); 01127 else if (EltVT.isFloatingPoint()) 01128 Constants[i] = DAG.getConstantFP(0, EltVT); 01129 else 01130 Constants[i] = DAG.getConstant(0, EltVT); 01131 } 01132 01133 return DAG.getMergeValues(&Constants[0], NumElts, 01134 getCurDebugLoc()); 01135 } 01136 01137 if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) 01138 return DAG.getBlockAddress(BA, VT); 01139 01140 VectorType *VecTy = cast<VectorType>(V->getType()); 01141 unsigned NumElements = VecTy->getNumElements(); 01142 01143 // Now that we know the number and type of the elements, get that number of 01144 // elements into the Ops array based on what kind of constant it is. 01145 SmallVector<SDValue, 16> Ops; 01146 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { 01147 for (unsigned i = 0; i != NumElements; ++i) 01148 Ops.push_back(getValue(CV->getOperand(i))); 01149 } else { 01150 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); 01151 EVT EltVT = TLI.getValueType(VecTy->getElementType()); 01152 01153 SDValue Op; 01154 if (EltVT.isFloatingPoint()) 01155 Op = DAG.getConstantFP(0, EltVT); 01156 else 01157 Op = DAG.getConstant(0, EltVT); 01158 Ops.assign(NumElements, Op); 01159 } 01160 01161 // Create a BUILD_VECTOR node. 01162 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), 01163 VT, &Ops[0], Ops.size()); 01164 } 01165 01166 // If this is a static alloca, generate it as the frameindex instead of 01167 // computation. 01168 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 01169 DenseMap<const AllocaInst*, int>::iterator SI = 01170 FuncInfo.StaticAllocaMap.find(AI); 01171 if (SI != FuncInfo.StaticAllocaMap.end()) 01172 return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); 01173 } 01174 01175 // If this is an instruction which fast-isel has deferred, select it now. 01176 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 01177 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); 01178 RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); 01179 SDValue Chain = DAG.getEntryNode(); 01180 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); 01181 } 01182 01183 llvm_unreachable("Can't get register for value!"); 01184 } 01185 01186 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 01187 SDValue Chain = getControlRoot(); 01188 SmallVector<ISD::OutputArg, 8> Outs; 01189 SmallVector<SDValue, 8> OutVals; 01190 01191 if (!FuncInfo.CanLowerReturn) { 01192 unsigned DemoteReg = FuncInfo.DemoteRegister; 01193 const Function *F = I.getParent()->getParent(); 01194 01195 // Emit a store of the return value through the virtual register. 01196 // Leave Outs empty so that LowerReturn won't try to load return 01197 // registers the usual way. 01198 SmallVector<EVT, 1> PtrValueVTs; 01199 ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 01200 PtrValueVTs); 01201 01202 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); 01203 SDValue RetOp = getValue(I.getOperand(0)); 01204 01205 SmallVector<EVT, 4> ValueVTs; 01206 SmallVector<uint64_t, 4> Offsets; 01207 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); 01208 unsigned NumValues = ValueVTs.size(); 01209 01210 SmallVector<SDValue, 4> Chains(NumValues); 01211 for (unsigned i = 0; i != NumValues; ++i) { 01212 SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), 01213 RetPtr.getValueType(), RetPtr, 01214 DAG.getIntPtrConstant(Offsets[i])); 01215 Chains[i] = 01216 DAG.getStore(Chain, getCurDebugLoc(), 01217 SDValue(RetOp.getNode(), RetOp.getResNo() + i), 01218 // FIXME: better loc info would be nice. 01219 Add, MachinePointerInfo(), false, false, 0); 01220 } 01221 01222 Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 01223 MVT::Other, &Chains[0], NumValues); 01224 } else if (I.getNumOperands() != 0) { 01225 SmallVector<EVT, 4> ValueVTs; 01226 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); 01227 unsigned NumValues = ValueVTs.size(); 01228 if (NumValues) { 01229 SDValue RetOp = getValue(I.getOperand(0)); 01230 for (unsigned j = 0, f = NumValues; j != f; ++j) { 01231 EVT VT = ValueVTs[j]; 01232 01233 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 01234 01235 const Function *F = I.getParent()->getParent(); 01236 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01237 Attribute::SExt)) 01238 ExtendKind = ISD::SIGN_EXTEND; 01239 else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01240 Attribute::ZExt)) 01241 ExtendKind = ISD::ZERO_EXTEND; 01242 01243 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) 01244 VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); 01245 01246 unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); 01247 MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); 01248 SmallVector<SDValue, 4> Parts(NumParts); 01249 getCopyToParts(DAG, getCurDebugLoc(), 01250 SDValue(RetOp.getNode(), RetOp.getResNo() + j), 01251 &Parts[0], NumParts, PartVT, &I, ExtendKind); 01252 01253 // 'inreg' on function refers to return value 01254 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); 01255 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01256 Attribute::InReg)) 01257 Flags.setInReg(); 01258 01259 // Propagate extension type if any 01260 if (ExtendKind == ISD::SIGN_EXTEND) 01261 Flags.setSExt(); 01262 else if (ExtendKind == ISD::ZERO_EXTEND) 01263 Flags.setZExt(); 01264 01265 for (unsigned i = 0; i < NumParts; ++i) { 01266 Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), 01267 /*isfixed=*/true, 0, 0)); 01268 OutVals.push_back(Parts[i]); 01269 } 01270 } 01271 } 01272 } 01273 01274 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 01275 CallingConv::ID CallConv = 01276 DAG.getMachineFunction().getFunction()->getCallingConv(); 01277 Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, 01278 Outs, OutVals, getCurDebugLoc(), DAG); 01279 01280 // Verify that the target's LowerReturn behaved as expected. 01281 assert(Chain.getNode() && Chain.getValueType() == MVT::Other && 01282 "LowerReturn didn't return a valid chain!"); 01283 01284 // Update the DAG with the new chain value resulting from return lowering. 01285 DAG.setRoot(Chain); 01286 } 01287 01288 /// CopyToExportRegsIfNeeded - If the given value has virtual registers 01289 /// created for it, emit nodes to copy the value into the virtual 01290 /// registers. 01291 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { 01292 // Skip empty types 01293 if (V->getType()->isEmptyTy()) 01294 return; 01295 01296 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 01297 if (VMI != FuncInfo.ValueMap.end()) { 01298 assert(!V->use_empty() && "Unused value assigned virtual registers!"); 01299 CopyValueToVirtualRegister(V, VMI->second); 01300 } 01301 } 01302 01303 /// ExportFromCurrentBlock - If this condition isn't known to be exported from 01304 /// the current basic block, add it to ValueMap now so that we'll get a 01305 /// CopyTo/FromReg. 01306 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { 01307 // No need to export constants. 01308 if (!isa<Instruction>(V) && !isa<Argument>(V)) return; 01309 01310 // Already exported? 01311 if (FuncInfo.isExportedInst(V)) return; 01312 01313 unsigned Reg = FuncInfo.InitializeRegForValue(V); 01314 CopyValueToVirtualRegister(V, Reg); 01315 } 01316 01317 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, 01318 const BasicBlock *FromBB) { 01319 // The operands of the setcc have to be in this block. We don't know 01320 // how to export them from some other block. 01321 if (const Instruction *VI = dyn_cast<Instruction>(V)) { 01322 // Can export from current BB. 01323 if (VI->getParent() == FromBB) 01324 return true; 01325 01326 // Is already exported, noop. 01327 return FuncInfo.isExportedInst(V); 01328 } 01329 01330 // If this is an argument, we can export it if the BB is the entry block or 01331 // if it is already exported. 01332 if (isa<Argument>(V)) { 01333 if (FromBB == &FromBB->getParent()->getEntryBlock()) 01334 return true; 01335 01336 // Otherwise, can only export this if it is already exported. 01337 return FuncInfo.isExportedInst(V); 01338 } 01339 01340 // Otherwise, constants can always be exported. 01341 return true; 01342 } 01343 01344 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. 01345 uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, 01346 const MachineBasicBlock *Dst) const { 01347 BranchProbabilityInfo *BPI = FuncInfo.BPI; 01348 if (!BPI) 01349 return 0; 01350 const BasicBlock *SrcBB = Src->getBasicBlock(); 01351 const BasicBlock *DstBB = Dst->getBasicBlock(); 01352 return BPI->getEdgeWeight(SrcBB, DstBB); 01353 } 01354 01355 void SelectionDAGBuilder:: 01356 addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, 01357 uint32_t Weight /* = 0 */) { 01358 if (!Weight) 01359 Weight = getEdgeWeight(Src, Dst); 01360 Src->addSuccessor(Dst, Weight); 01361 } 01362 01363 01364 static bool InBlock(const Value *V, const BasicBlock *BB) { 01365 if (const Instruction *I = dyn_cast<Instruction>(V)) 01366 return I->getParent() == BB; 01367 return true; 01368 } 01369 01370 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. 01371 /// This function emits a branch and is used at the leaves of an OR or an 01372 /// AND operator tree. 01373 /// 01374 void 01375 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, 01376 MachineBasicBlock *TBB, 01377 MachineBasicBlock *FBB, 01378 MachineBasicBlock *CurBB, 01379 MachineBasicBlock *SwitchBB) { 01380 const BasicBlock *BB = CurBB->getBasicBlock(); 01381 01382 // If the leaf of the tree is a comparison, merge the condition into 01383 // the caseblock. 01384 if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { 01385 // The operands of the cmp have to be in this block. We don't know 01386 // how to export them from some other block. If this is the first block 01387 // of the sequence, no exporting is needed. 01388 if (CurBB == SwitchBB || 01389 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && 01390 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { 01391 ISD::CondCode Condition; 01392 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 01393 Condition = getICmpCondCode(IC->getPredicate()); 01394 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { 01395 Condition = getFCmpCondCode(FC->getPredicate()); 01396 if (TM.Options.NoNaNsFPMath) 01397 Condition = getFCmpCodeWithoutNaN(Condition); 01398 } else { 01399 Condition = ISD::SETEQ; // silence warning. 01400 llvm_unreachable("Unknown compare instruction"); 01401 } 01402 01403 CaseBlock CB(Condition, BOp->getOperand(0), 01404 BOp->getOperand(1), NULL, TBB, FBB, CurBB); 01405 SwitchCases.push_back(CB); 01406 return; 01407 } 01408 } 01409 01410 // Create a CaseBlock record representing this branch. 01411 CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), 01412 NULL, TBB, FBB, CurBB); 01413 SwitchCases.push_back(CB); 01414 } 01415 01416 /// FindMergedConditions - If Cond is an expression like 01417 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, 01418 MachineBasicBlock *TBB, 01419 MachineBasicBlock *FBB, 01420 MachineBasicBlock *CurBB, 01421 MachineBasicBlock *SwitchBB, 01422 unsigned Opc) { 01423 // If this node is not part of the or/and tree, emit it as a branch. 01424 const Instruction *BOp = dyn_cast<Instruction>(Cond); 01425 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 01426 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || 01427 BOp->getParent() != CurBB->getBasicBlock() || 01428 !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || 01429 !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { 01430 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); 01431 return; 01432 } 01433 01434 // Create TmpBB after CurBB. 01435 MachineFunction::iterator BBI = CurBB; 01436 MachineFunction &MF = DAG.getMachineFunction(); 01437 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); 01438 CurBB->getParent()->insert(++BBI, TmpBB); 01439 01440 if (Opc == Instruction::Or) { 01441 // Codegen X | Y as: 01442 // jmp_if_X TBB 01443 // jmp TmpBB 01444 // TmpBB: 01445 // jmp_if_Y TBB 01446 // jmp FBB 01447 // 01448 01449 // Emit the LHS condition. 01450 FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); 01451 01452 // Emit the RHS condition into TmpBB. 01453 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); 01454 } else { 01455 assert(Opc == Instruction::And && "Unknown merge op!"); 01456 // Codegen X & Y as: 01457 // jmp_if_X TmpBB 01458 // jmp FBB 01459 // TmpBB: 01460 // jmp_if_Y TBB 01461 // jmp FBB 01462 // 01463 // This requires creation of TmpBB after CurBB. 01464 01465 // Emit the LHS condition. 01466 FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); 01467 01468 // Emit the RHS condition into TmpBB. 01469 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); 01470 } 01471 } 01472 01473 /// If the set of cases should be emitted as a series of branches, return true. 01474 /// If we should emit this as a bunch of and/or'd together conditions, return 01475 /// false. 01476 bool 01477 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ 01478 if (Cases.size() != 2) return true; 01479 01480 // If this is two comparisons of the same values or'd or and'd together, they 01481 // will get folded into a single comparison, so don't emit two blocks. 01482 if ((Cases[0].CmpLHS == Cases[1].CmpLHS && 01483 Cases[0].CmpRHS == Cases[1].CmpRHS) || 01484 (Cases[0].CmpRHS == Cases[1].CmpLHS && 01485 Cases[0].CmpLHS == Cases[1].CmpRHS)) { 01486 return false; 01487 } 01488 01489 // Handle: (X != null) | (Y != null) --> (X|Y) != 0 01490 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 01491 if (Cases[0].CmpRHS == Cases[1].CmpRHS && 01492 Cases[0].CC == Cases[1].CC && 01493 isa<Constant>(Cases[0].CmpRHS) && 01494 cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { 01495 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) 01496 return false; 01497 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) 01498 return false; 01499 } 01500 01501 return true; 01502 } 01503 01504 void SelectionDAGBuilder::visitBr(const BranchInst &I) { 01505 MachineBasicBlock *BrMBB = FuncInfo.MBB; 01506 01507 // Update machine-CFG edges. 01508 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 01509 01510 // Figure out which block is immediately after the current one. 01511 MachineBasicBlock *NextBlock = 0; 01512 MachineFunction::iterator BBI = BrMBB; 01513 if (++BBI != FuncInfo.MF->end()) 01514 NextBlock = BBI; 01515 01516 if (I.isUnconditional()) { 01517 // Update machine-CFG edges. 01518 BrMBB->addSuccessor(Succ0MBB); 01519 01520 // If this is not a fall-through branch, emit the branch. 01521 if (Succ0MBB != NextBlock) 01522 DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), 01523 MVT::Other, getControlRoot(), 01524 DAG.getBasicBlock(Succ0MBB))); 01525 01526 return; 01527 } 01528 01529 // If this condition is one of the special cases we handle, do special stuff 01530 // now. 01531 const Value *CondVal = I.getCondition(); 01532 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; 01533 01534 // If this is a series of conditions that are or'd or and'd together, emit 01535 // this as a sequence of branches instead of setcc's with and/or operations. 01536 // As long as jumps are not expensive, this should improve performance. 01537 // For example, instead of something like: 01538 // cmp A, B 01539 // C = seteq 01540 // cmp D, E 01541 // F = setle 01542 // or C, F 01543 // jnz foo 01544 // Emit: 01545 // cmp A, B 01546 // je foo 01547 // cmp D, E 01548 // jle foo 01549 // 01550 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { 01551 if (!TLI.isJumpExpensive() && 01552 BOp->hasOneUse() && 01553 (BOp->getOpcode() == Instruction::And || 01554 BOp->getOpcode() == Instruction::Or)) { 01555 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, 01556 BOp->getOpcode()); 01557 // If the compares in later blocks need to use values not currently 01558 // exported from this block, export them now. This block should always 01559 // be the first entry. 01560 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); 01561 01562 // Allow some cases to be rejected. 01563 if (ShouldEmitAsBranches(SwitchCases)) { 01564 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { 01565 ExportFromCurrentBlock(SwitchCases[i].CmpLHS); 01566 ExportFromCurrentBlock(SwitchCases[i].CmpRHS); 01567 } 01568 01569 // Emit the branch for this block. 01570 visitSwitchCase(SwitchCases[0], BrMBB); 01571 SwitchCases.erase(SwitchCases.begin()); 01572 return; 01573 } 01574 01575 // Okay, we decided not to do this, remove any inserted MBB's and clear 01576 // SwitchCases. 01577 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) 01578 FuncInfo.MF->erase(SwitchCases[i].ThisBB); 01579 01580 SwitchCases.clear(); 01581 } 01582 } 01583 01584 // Create a CaseBlock record representing this branch. 01585 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), 01586 NULL, Succ0MBB, Succ1MBB, BrMBB); 01587 01588 // Use visitSwitchCase to actually insert the fast branch sequence for this 01589 // cond branch. 01590 visitSwitchCase(CB, BrMBB); 01591 } 01592 01593 /// visitSwitchCase - Emits the necessary code to represent a single node in 01594 /// the binary search tree resulting from lowering a switch instruction. 01595 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, 01596 MachineBasicBlock *SwitchBB) { 01597 SDValue Cond; 01598 SDValue CondLHS = getValue(CB.CmpLHS); 01599 DebugLoc dl = getCurDebugLoc(); 01600 01601 // Build the setcc now. 01602 if (CB.CmpMHS == NULL) { 01603 // Fold "(X == true)" to X and "(X == false)" to !X to 01604 // handle common cases produced by branch lowering. 01605 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && 01606 CB.CC == ISD::SETEQ) 01607 Cond = CondLHS; 01608 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && 01609 CB.CC == ISD::SETEQ) { 01610 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); 01611 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); 01612 } else 01613 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); 01614 } else { 01615 assert(CB.CC == ISD::SETCC_INVALID && 01616 "Condition is undefined for to-the-range belonging check."); 01617 01618 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); 01619 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); 01620 01621 SDValue CmpOp = getValue(CB.CmpMHS); 01622 EVT VT = CmpOp.getValueType(); 01623 01624 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { 01625 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), 01626 ISD::SETULE); 01627 } else { 01628 SDValue SUB = DAG.getNode(ISD::SUB, dl, 01629 VT, CmpOp, DAG.getConstant(Low, VT)); 01630 Cond = DAG.getSetCC(dl, MVT::i1, SUB, 01631 DAG.getConstant(High-Low, VT), ISD::SETULE); 01632 } 01633 } 01634 01635 // Update successor info 01636 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); 01637 // TrueBB and FalseBB are always different unless the incoming IR is 01638 // degenerate. This only happens when running llc on weird IR. 01639 if (CB.TrueBB != CB.FalseBB) 01640 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); 01641 01642 // Set NextBlock to be the MBB immediately after the current one, if any. 01643 // This is used to avoid emitting unnecessary branches to the next block. 01644 MachineBasicBlock *NextBlock = 0; 01645 MachineFunction::iterator BBI = SwitchBB; 01646 if (++BBI != FuncInfo.MF->end()) 01647 NextBlock = BBI; 01648 01649 // If the lhs block is the next block, invert the condition so that we can 01650 // fall through to the lhs instead of the rhs block. 01651 if (CB.TrueBB == NextBlock) { 01652 std::swap(CB.TrueBB, CB.FalseBB); 01653 SDValue True = DAG.getConstant(1, Cond.getValueType()); 01654 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); 01655 } 01656 01657 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, 01658 MVT::Other, getControlRoot(), Cond, 01659 DAG.getBasicBlock(CB.TrueBB)); 01660 01661 // Insert the false branch. Do this even if it's a fall through branch, 01662 // this makes it easier to do DAG optimizations which require inverting 01663 // the branch condition. 01664 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, 01665 DAG.getBasicBlock(CB.FalseBB)); 01666 01667 DAG.setRoot(BrCond); 01668 } 01669 01670 /// visitJumpTable - Emit JumpTable node in the current MBB 01671 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { 01672 // Emit the code for the jump table 01673 assert(JT.Reg != -1U && "Should lower JT Header first!"); 01674 EVT PTy = TLI.getPointerTy(); 01675 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), 01676 JT.Reg, PTy); 01677 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); 01678 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), 01679 MVT::Other, Index.getValue(1), 01680 Table, Index); 01681 DAG.setRoot(BrJumpTable); 01682 } 01683 01684 /// visitJumpTableHeader - This function emits necessary code to produce index 01685 /// in the JumpTable from switch case. 01686 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, 01687 JumpTableHeader &JTH, 01688 MachineBasicBlock *SwitchBB) { 01689 // Subtract the lowest switch case value from the value being switched on and 01690 // conditional branch to default mbb if the result is greater than the 01691 // difference between smallest and largest cases. 01692 SDValue SwitchOp = getValue(JTH.SValue); 01693 EVT VT = SwitchOp.getValueType(); 01694 SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, 01695 DAG.getConstant(JTH.First, VT)); 01696 01697 // The SDNode we just created, which holds the value being switched on minus 01698 // the smallest case value, needs to be copied to a virtual register so it 01699 // can be used as an index into the jump table in a subsequent basic block. 01700 // This value may be smaller or larger than the target's pointer type, and 01701 // therefore require extension or truncating. 01702 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); 01703 01704 unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); 01705 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), 01706 JumpTableReg, SwitchOp); 01707 JT.Reg = JumpTableReg; 01708 01709 // Emit the range check for the jump table, and branch to the default block 01710 // for the switch statement if the value being switched on exceeds the largest 01711 // case in the switch. 01712 SDValue CMP = DAG.getSetCC(getCurDebugLoc(), 01713 TLI.getSetCCResultType(*DAG.getContext(), 01714 Sub.getValueType()), 01715 Sub, 01716 DAG.getConstant(JTH.Last - JTH.First,VT), 01717 ISD::SETUGT); 01718 01719 // Set NextBlock to be the MBB immediately after the current one, if any. 01720 // This is used to avoid emitting unnecessary branches to the next block. 01721 MachineBasicBlock *NextBlock = 0; 01722 MachineFunction::iterator BBI = SwitchBB; 01723 01724 if (++BBI != FuncInfo.MF->end()) 01725 NextBlock = BBI; 01726 01727 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), 01728 MVT::Other, CopyTo, CMP, 01729 DAG.getBasicBlock(JT.Default)); 01730 01731 if (JT.MBB != NextBlock) 01732 BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, 01733 DAG.getBasicBlock(JT.MBB)); 01734 01735 DAG.setRoot(BrCond); 01736 } 01737 01738 /// visitBitTestHeader - This function emits necessary code to produce value 01739 /// suitable for "bit tests" 01740 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, 01741 MachineBasicBlock *SwitchBB) { 01742 // Subtract the minimum value 01743 SDValue SwitchOp = getValue(B.SValue); 01744 EVT VT = SwitchOp.getValueType(); 01745 SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, 01746 DAG.getConstant(B.First, VT)); 01747 01748 // Check range 01749 SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), 01750 TLI.getSetCCResultType(*DAG.getContext(), 01751 Sub.getValueType()), 01752 Sub, DAG.getConstant(B.Range, VT), 01753 ISD::SETUGT); 01754 01755 // Determine the type of the test operands. 01756 bool UsePtrType = false; 01757 if (!TLI.isTypeLegal(VT)) 01758 UsePtrType = true; 01759 else { 01760 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) 01761 if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { 01762 // Switch table case range are encoded into series of masks. 01763 // Just use pointer type, it's guaranteed to fit. 01764 UsePtrType = true; 01765 break; 01766 } 01767 } 01768 if (UsePtrType) { 01769 VT = TLI.getPointerTy(); 01770 Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); 01771 } 01772 01773 B.RegVT = VT.getSimpleVT(); 01774 B.Reg = FuncInfo.CreateReg(B.RegVT); 01775 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), 01776 B.Reg, Sub); 01777 01778 // Set NextBlock to be the MBB immediately after the current one, if any. 01779 // This is used to avoid emitting unnecessary branches to the next block. 01780 MachineBasicBlock *NextBlock = 0; 01781 MachineFunction::iterator BBI = SwitchBB; 01782 if (++BBI != FuncInfo.MF->end()) 01783 NextBlock = BBI; 01784 01785 MachineBasicBlock* MBB = B.Cases[0].ThisBB; 01786 01787 addSuccessorWithWeight(SwitchBB, B.Default); 01788 addSuccessorWithWeight(SwitchBB, MBB); 01789 01790 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), 01791 MVT::Other, CopyTo, RangeCmp, 01792 DAG.getBasicBlock(B.Default)); 01793 01794 if (MBB != NextBlock) 01795 BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, 01796 DAG.getBasicBlock(MBB)); 01797 01798 DAG.setRoot(BrRange); 01799 } 01800 01801 /// visitBitTestCase - this function produces one "bit test" 01802 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, 01803 MachineBasicBlock* NextMBB, 01804 uint32_t BranchWeightToNext, 01805 unsigned Reg, 01806 BitTestCase &B, 01807 MachineBasicBlock *SwitchBB) { 01808 MVT VT = BB.RegVT; 01809 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), 01810 Reg, VT); 01811 SDValue Cmp; 01812 unsigned PopCount = CountPopulation_64(B.Mask); 01813 if (PopCount == 1) { 01814 // Testing for a single bit; just compare the shift count with what it 01815 // would need to be to shift a 1 bit in that position. 01816 Cmp = DAG.getSetCC(getCurDebugLoc(), 01817 TLI.getSetCCResultType(*DAG.getContext(), VT), 01818 ShiftOp, 01819 DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), 01820 ISD::SETEQ); 01821 } else if (PopCount == BB.Range) { 01822 // There is only one zero bit in the range, test for it directly. 01823 Cmp = DAG.getSetCC(getCurDebugLoc(), 01824 TLI.getSetCCResultType(*DAG.getContext(), VT), 01825 ShiftOp, 01826 DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), 01827 ISD::SETNE); 01828 } else { 01829 // Make desired shift 01830 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, 01831 DAG.getConstant(1, VT), ShiftOp); 01832 01833 // Emit bit tests and jumps 01834 SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), 01835 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); 01836 Cmp = DAG.getSetCC(getCurDebugLoc(), 01837 TLI.getSetCCResultType(*DAG.getContext(), VT), 01838 AndOp, DAG.getConstant(0, VT), 01839 ISD::SETNE); 01840 } 01841 01842 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. 01843 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); 01844 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. 01845 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); 01846 01847 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), 01848 MVT::Other, getControlRoot(), 01849 Cmp, DAG.getBasicBlock(B.TargetBB)); 01850 01851 // Set NextBlock to be the MBB immediately after the current one, if any. 01852 // This is used to avoid emitting unnecessary branches to the next block. 01853 MachineBasicBlock *NextBlock = 0; 01854 MachineFunction::iterator BBI = SwitchBB; 01855 if (++BBI != FuncInfo.MF->end()) 01856 NextBlock = BBI; 01857 01858 if (NextMBB != NextBlock) 01859 BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, 01860 DAG.getBasicBlock(NextMBB)); 01861 01862 DAG.setRoot(BrAnd); 01863 } 01864 01865 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { 01866 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; 01867 01868 // Retrieve successors. 01869 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; 01870 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; 01871 01872 const Value *Callee(I.getCalledValue()); 01873 const Function *Fn = dyn_cast<Function>(Callee); 01874 if (isa<InlineAsm>(Callee)) 01875 visitInlineAsm(&I); 01876 else if (Fn && Fn->isIntrinsic()) { 01877 assert(Fn->getIntrinsicID() == Intrinsic::donothing); 01878 // Ignore invokes to @llvm.donothing: jump directly to the next BB. 01879 } else 01880 LowerCallTo(&I, getValue(Callee), false, LandingPad); 01881 01882 // If the value of the invoke is used outside of its defining block, make it 01883 // available as a virtual register. 01884 CopyToExportRegsIfNeeded(&I); 01885 01886 // Update successor info 01887 addSuccessorWithWeight(InvokeMBB, Return); 01888 addSuccessorWithWeight(InvokeMBB, LandingPad); 01889 01890 // Drop into normal successor. 01891 DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), 01892 MVT::Other, getControlRoot(), 01893 DAG.getBasicBlock(Return))); 01894 } 01895 01896 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { 01897 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); 01898 } 01899 01900 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { 01901 assert(FuncInfo.MBB->isLandingPad() && 01902 "Call to landingpad not in landing pad!"); 01903 01904 MachineBasicBlock *MBB = FuncInfo.MBB; 01905 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 01906 AddLandingPadInfo(LP, MMI, MBB); 01907 01908 // If there aren't registers to copy the values into (e.g., during SjLj 01909 // exceptions), then don't bother to create these DAG nodes. 01910 if (TLI.getExceptionPointerRegister() == 0 && 01911 TLI.getExceptionSelectorRegister() == 0) 01912 return; 01913 01914 SmallVector<EVT, 2> ValueVTs; 01915 ComputeValueVTs(TLI, LP.getType(), ValueVTs); 01916 01917 // Insert the EXCEPTIONADDR instruction. 01918 assert(FuncInfo.MBB->isLandingPad() && 01919 "Call to eh.exception not in landing pad!"); 01920 SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); 01921 SDValue Ops[2]; 01922 Ops[0] = DAG.getRoot(); 01923 SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1); 01924 SDValue Chain = Op1.getValue(1); 01925 01926 // Insert the EHSELECTION instruction. 01927 VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); 01928 Ops[0] = Op1; 01929 Ops[1] = Chain; 01930 SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2); 01931 Chain = Op2.getValue(1); 01932 Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32); 01933 01934 Ops[0] = Op1; 01935 Ops[1] = Op2; 01936 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 01937 DAG.getVTList(&ValueVTs[0], ValueVTs.size()), 01938 &Ops[0], 2); 01939 01940 std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain); 01941 setValue(&LP, RetPair.first); 01942 DAG.setRoot(RetPair.second); 01943 } 01944 01945 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for 01946 /// small case ranges). 01947 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, 01948 CaseRecVector& WorkList, 01949 const Value* SV, 01950 MachineBasicBlock *Default, 01951 MachineBasicBlock *SwitchBB) { 01952 // Size is the number of Cases represented by this range. 01953 size_t Size = CR.Range.second - CR.Range.first; 01954 if (Size > 3) 01955 return false; 01956 01957 // Get the MachineFunction which holds the current MBB. This is used when 01958 // inserting any additional MBBs necessary to represent the switch. 01959 MachineFunction *CurMF = FuncInfo.MF; 01960 01961 // Figure out which block is immediately after the current one. 01962 MachineBasicBlock *NextBlock = 0; 01963 MachineFunction::iterator BBI = CR.CaseBB; 01964 01965 if (++BBI != FuncInfo.MF->end()) 01966 NextBlock = BBI; 01967 01968 BranchProbabilityInfo *BPI = FuncInfo.BPI; 01969 // If any two of the cases has the same destination, and if one value 01970 // is the same as the other, but has one bit unset that the other has set, 01971 // use bit manipulation to do two compares at once. For example: 01972 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" 01973 // TODO: This could be extended to merge any 2 cases in switches with 3 cases. 01974 // TODO: Handle cases where CR.CaseBB != SwitchBB. 01975 if (Size == 2 && CR.CaseBB == SwitchBB) { 01976 Case &Small = *CR.Range.first; 01977 Case &Big = *(CR.Range.second-1); 01978 01979 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { 01980 const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); 01981 const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); 01982 01983 // Check that there is only one bit different. 01984 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && 01985 (SmallValue | BigValue) == BigValue) { 01986 // Isolate the common bit. 01987 APInt CommonBit = BigValue & ~SmallValue; 01988 assert((SmallValue | CommonBit) == BigValue && 01989 CommonBit.countPopulation() == 1 && "Not a common bit?"); 01990 01991 SDValue CondLHS = getValue(SV); 01992 EVT VT = CondLHS.getValueType(); 01993 DebugLoc DL = getCurDebugLoc(); 01994 01995 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, 01996 DAG.getConstant(CommonBit, VT)); 01997 SDValue Cond = DAG.getSetCC(DL, MVT::i1, 01998 Or, DAG.getConstant(BigValue, VT), 01999 ISD::SETEQ); 02000 02001 // Update successor info. 02002 // Both Small and Big will jump to Small.BB, so we sum up the weights. 02003 addSuccessorWithWeight(SwitchBB, Small.BB, 02004 Small.ExtraWeight + Big.ExtraWeight); 02005 addSuccessorWithWeight(SwitchBB, Default, 02006 // The default destination is the first successor in IR. 02007 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); 02008 02009 // Insert the true branch. 02010 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, 02011 getControlRoot(), Cond, 02012 DAG.getBasicBlock(Small.BB)); 02013 02014 // Insert the false branch. 02015 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, 02016 DAG.getBasicBlock(Default)); 02017 02018 DAG.setRoot(BrCond); 02019 return true; 02020 } 02021 } 02022 } 02023 02024 // Order cases by weight so the most likely case will be checked first. 02025 uint32_t UnhandledWeights = 0; 02026 if (BPI) { 02027 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { 02028 uint32_t IWeight = I->ExtraWeight; 02029 UnhandledWeights += IWeight; 02030 for (CaseItr J = CR.Range.first; J < I; ++J) { 02031 uint32_t JWeight = J->ExtraWeight; 02032 if (IWeight > JWeight) 02033 std::swap(*I, *J); 02034 } 02035 } 02036 } 02037 // Rearrange the case blocks so that the last one falls through if possible. 02038 Case &BackCase = *(CR.Range.second-1); 02039 if (Size > 1 && 02040 NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { 02041 // The last case block won't fall through into 'NextBlock' if we emit the 02042 // branches in this order. See if rearranging a case value would help. 02043 // We start at the bottom as it's the case with the least weight. 02044 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ 02045 if (I->BB == NextBlock) { 02046 std::swap(*I, BackCase); 02047 break; 02048 } 02049 } 02050 } 02051 02052 // Create a CaseBlock record representing a conditional branch to 02053 // the Case's target mbb if the value being switched on SV is equal 02054 // to C. 02055 MachineBasicBlock *CurBlock = CR.CaseBB; 02056 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 02057 MachineBasicBlock *FallThrough; 02058 if (I != E-1) { 02059 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); 02060 CurMF->insert(BBI, FallThrough); 02061 02062 // Put SV in a virtual register to make it available from the new blocks. 02063 ExportFromCurrentBlock(SV); 02064 } else { 02065 // If the last case doesn't match, go to the default block. 02066 FallThrough = Default; 02067 } 02068 02069 const Value *RHS, *LHS, *MHS; 02070 ISD::CondCode CC; 02071 if (I->High == I->Low) { 02072 // This is just small small case range :) containing exactly 1 case 02073 CC = ISD::SETEQ; 02074 LHS = SV; RHS = I->High; MHS = NULL; 02075 } else { 02076 CC = ISD::SETCC_INVALID; 02077 LHS = I->Low; MHS = SV; RHS = I->High; 02078 } 02079 02080 // The false weight should be sum of all un-handled cases. 02081 UnhandledWeights -= I->ExtraWeight; 02082 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, 02083 /* me */ CurBlock, 02084 /* trueweight */ I->ExtraWeight, 02085 /* falseweight */ UnhandledWeights); 02086 02087 // If emitting the first comparison, just call visitSwitchCase to emit the 02088 // code into the current block. Otherwise, push the CaseBlock onto the 02089 // vector to be later processed by SDISel, and insert the node's MBB 02090 // before the next MBB. 02091 if (CurBlock == SwitchBB) 02092 visitSwitchCase(CB, SwitchBB); 02093 else 02094 SwitchCases.push_back(CB); 02095 02096 CurBlock = FallThrough; 02097 } 02098 02099 return true; 02100 } 02101 02102 static inline bool areJTsAllowed(const TargetLowering &TLI) { 02103 return TLI.supportJumpTables() && 02104 (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 02105 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); 02106 } 02107 02108 static APInt ComputeRange(const APInt &First, const APInt &Last) { 02109 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; 02110 APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); 02111 return (LastExt - FirstExt + 1ULL); 02112 } 02113 02114 /// handleJTSwitchCase - Emit jumptable for current switch case range 02115 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, 02116 CaseRecVector &WorkList, 02117 const Value *SV, 02118 MachineBasicBlock *Default, 02119 MachineBasicBlock *SwitchBB) { 02120 Case& FrontCase = *CR.Range.first; 02121 Case& BackCase = *(CR.Range.second-1); 02122 02123 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 02124 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 02125 02126 APInt TSize(First.getBitWidth(), 0); 02127 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) 02128 TSize += I->size(); 02129 02130 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) 02131 return false; 02132 02133 APInt Range = ComputeRange(First, Last); 02134 // The density is TSize / Range. Require at least 40%. 02135 // It should not be possible for IntTSize to saturate for sane code, but make 02136 // sure we handle Range saturation correctly. 02137 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); 02138 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); 02139 if (IntTSize * 10 < IntRange * 4) 02140 return false; 02141 02142 DEBUG(dbgs() << "Lowering jump table\n" 02143 << "First entry: " << First << ". Last entry: " << Last << '\n' 02144 << "Range: " << Range << ". Size: " << TSize << ".\n\n"); 02145 02146 // Get the MachineFunction which holds the current MBB. This is used when 02147 // inserting any additional MBBs necessary to represent the switch. 02148 MachineFunction *CurMF = FuncInfo.MF; 02149 02150 // Figure out which block is immediately after the current one. 02151 MachineFunction::iterator BBI = CR.CaseBB; 02152 ++BBI; 02153 02154 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02155 02156 // Create a new basic block to hold the code for loading the address 02157 // of the jump table, and jumping to it. Update successor information; 02158 // we will either branch to the default case for the switch, or the jump 02159 // table. 02160 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02161 CurMF->insert(BBI, JumpTableBB); 02162 02163 addSuccessorWithWeight(CR.CaseBB, Default); 02164 addSuccessorWithWeight(CR.CaseBB, JumpTableBB); 02165 02166 // Build a vector of destination BBs, corresponding to each target 02167 // of the jump table. If the value of the jump table slot corresponds to 02168 // a case statement, push the case's BB onto the vector, otherwise, push 02169 // the default BB. 02170 std::vector<MachineBasicBlock*> DestBBs; 02171 APInt TEI = First; 02172 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { 02173 const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); 02174 const APInt &High = cast<ConstantInt>(I->High)->getValue(); 02175 02176 if (Low.ule(TEI) && TEI.ule(High)) { 02177 DestBBs.push_back(I->BB); 02178 if (TEI==High) 02179 ++I; 02180 } else { 02181 DestBBs.push_back(Default); 02182 } 02183 } 02184 02185 // Calculate weight for each unique destination in CR. 02186 DenseMap<MachineBasicBlock*, uint32_t> DestWeights; 02187 if (FuncInfo.BPI) 02188 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 02189 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 02190 DestWeights.find(I->BB); 02191 if (Itr != DestWeights.end()) 02192 Itr->second += I->ExtraWeight; 02193 else 02194 DestWeights[I->BB] = I->ExtraWeight; 02195 } 02196 02197 // Update successor info. Add one edge to each unique successor. 02198 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); 02199 for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), 02200 E = DestBBs.end(); I != E; ++I) { 02201 if (!SuccsHandled[(*I)->getNumber()]) { 02202 SuccsHandled[(*I)->getNumber()] = true; 02203 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 02204 DestWeights.find(*I); 02205 addSuccessorWithWeight(JumpTableBB, *I, 02206 Itr != DestWeights.end() ? Itr->second : 0); 02207 } 02208 } 02209 02210 // Create a jump table index for this jump table. 02211 unsigned JTEncoding = TLI.getJumpTableEncoding(); 02212 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) 02213 ->createJumpTableIndex(DestBBs); 02214 02215 // Set the jump table information so that we can codegen it as a second 02216 // MachineBasicBlock 02217 JumpTable JT(-1U, JTI, JumpTableBB, Default); 02218 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); 02219 if (CR.CaseBB == SwitchBB) 02220 visitJumpTableHeader(JT, JTH, SwitchBB); 02221 02222 JTCases.push_back(JumpTableBlock(JTH, JT)); 02223 return true; 02224 } 02225 02226 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into 02227 /// 2 subtrees. 02228 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, 02229 CaseRecVector& WorkList, 02230 const Value* SV, 02231 MachineBasicBlock *Default, 02232 MachineBasicBlock *SwitchBB) { 02233 // Get the MachineFunction which holds the current MBB. This is used when 02234 // inserting any additional MBBs necessary to represent the switch. 02235 MachineFunction *CurMF = FuncInfo.MF; 02236 02237 // Figure out which block is immediately after the current one. 02238 MachineFunction::iterator BBI = CR.CaseBB; 02239 ++BBI; 02240 02241 Case& FrontCase = *CR.Range.first; 02242 Case& BackCase = *(CR.Range.second-1); 02243 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02244 02245 // Size is the number of Cases represented by this range. 02246 unsigned Size = CR.Range.second - CR.Range.first; 02247 02248 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 02249 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 02250 double FMetric = 0; 02251 CaseItr Pivot = CR.Range.first + Size/2; 02252 02253 // Select optimal pivot, maximizing sum density of LHS and RHS. This will 02254 // (heuristically) allow us to emit JumpTable's later. 02255 APInt TSize(First.getBitWidth(), 0); 02256 for (CaseItr I = CR.Range.first, E = CR.Range.second; 02257 I!=E; ++I) 02258 TSize += I->size(); 02259 02260 APInt LSize = FrontCase.size(); 02261 APInt RSize = TSize-LSize; 02262 DEBUG(dbgs() << "Selecting best pivot: \n" 02263 << "First: " << First << ", Last: " << Last <<'\n' 02264 << "LSize: " << LSize << ", RSize: " << RSize << '\n'); 02265 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; 02266 J!=E; ++I, ++J) { 02267 const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); 02268 const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); 02269 APInt Range = ComputeRange(LEnd, RBegin); 02270 assert((Range - 2ULL).isNonNegative() && 02271 "Invalid case distance"); 02272 // Use volatile double here to avoid excess precision issues on some hosts, 02273 // e.g. that use 80-bit X87 registers. 02274 volatile double LDensity = 02275 (double)LSize.roundToDouble() / 02276 (LEnd - First + 1ULL).roundToDouble(); 02277 volatile double RDensity = 02278 (double)RSize.roundToDouble() / 02279 (Last - RBegin + 1ULL).roundToDouble(); 02280 double Metric = Range.logBase2()*(LDensity+RDensity); 02281 // Should always split in some non-trivial place 02282 DEBUG(dbgs() <<"=>Step\n" 02283 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' 02284 << "LDensity: " << LDensity 02285 << ", RDensity: " << RDensity << '\n' 02286 << "Metric: " << Metric << '\n'); 02287 if (FMetric < Metric) { 02288 Pivot = J; 02289 FMetric = Metric; 02290 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); 02291 } 02292 02293 LSize += J->size(); 02294 RSize -= J->size(); 02295 } 02296 if (areJTsAllowed(TLI)) { 02297 // If our case is dense we *really* should handle it earlier! 02298 assert((FMetric > 0) && "Should handle dense range earlier!"); 02299 } else { 02300 Pivot = CR.Range.first + Size/2; 02301 } 02302 02303 CaseRange LHSR(CR.Range.first, Pivot); 02304 CaseRange RHSR(Pivot, CR.Range.second); 02305 const Constant *C = Pivot->Low; 02306 MachineBasicBlock *FalseBB = 0, *TrueBB = 0; 02307 02308 // We know that we branch to the LHS if the Value being switched on is 02309 // less than the Pivot value, C. We use this to optimize our binary 02310 // tree a bit, by recognizing that if SV is greater than or equal to the 02311 // LHS's Case Value, and that Case Value is exactly one less than the 02312 // Pivot's Value, then we can branch directly to the LHS's Target, 02313 // rather than creating a leaf node for it. 02314 if ((LHSR.second - LHSR.first) == 1 && 02315 LHSR.first->High == CR.GE && 02316 cast<ConstantInt>(C)->getValue() == 02317 (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { 02318 TrueBB = LHSR.first->BB; 02319 } else { 02320 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02321 CurMF->insert(BBI, TrueBB); 02322 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); 02323 02324 // Put SV in a virtual register to make it available from the new blocks. 02325 ExportFromCurrentBlock(SV); 02326 } 02327 02328 // Similar to the optimization above, if the Value being switched on is 02329 // known to be less than the Constant CR.LT, and the current Case Value 02330 // is CR.LT - 1, then we can branch directly to the target block for 02331 // the current Case Value, rather than emitting a RHS leaf node for it. 02332 if ((RHSR.second - RHSR.first) == 1 && CR.LT && 02333 cast<ConstantInt>(RHSR.first->Low)->getValue() == 02334 (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { 02335 FalseBB = RHSR.first->BB; 02336 } else { 02337 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02338 CurMF->insert(BBI, FalseBB); 02339 WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); 02340 02341 // Put SV in a virtual register to make it available from the new blocks. 02342 ExportFromCurrentBlock(SV); 02343 } 02344 02345 // Create a CaseBlock record representing a conditional branch to 02346 // the LHS node if the value being switched on SV is less than C. 02347 // Otherwise, branch to LHS. 02348 CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); 02349 02350 if (CR.CaseBB == SwitchBB) 02351 visitSwitchCase(CB, SwitchBB); 02352 else 02353 SwitchCases.push_back(CB); 02354 02355 return true; 02356 } 02357 02358 /// handleBitTestsSwitchCase - if current case range has few destination and 02359 /// range span less, than machine word bitwidth, encode case range into series 02360 /// of masks and emit bit tests with these masks. 02361 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, 02362 CaseRecVector& WorkList, 02363 const Value* SV, 02364 MachineBasicBlock* Default, 02365 MachineBasicBlock *SwitchBB){ 02366 EVT PTy = TLI.getPointerTy(); 02367 unsigned IntPtrBits = PTy.getSizeInBits(); 02368 02369 Case& FrontCase = *CR.Range.first; 02370 Case& BackCase = *(CR.Range.second-1); 02371 02372 // Get the MachineFunction which holds the current MBB. This is used when 02373 // inserting any additional MBBs necessary to represent the switch. 02374 MachineFunction *CurMF = FuncInfo.MF; 02375 02376 // If target does not have legal shift left, do not emit bit tests at all. 02377 if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) 02378 return false; 02379 02380 size_t numCmps = 0; 02381 for (CaseItr I = CR.Range.first, E = CR.Range.second; 02382 I!=E; ++I) { 02383 // Single case counts one, case range - two. 02384 numCmps += (I->Low == I->High ? 1 : 2); 02385 } 02386 02387 // Count unique destinations 02388 SmallSet<MachineBasicBlock*, 4> Dests; 02389 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 02390 Dests.insert(I->BB); 02391 if (Dests.size() > 3) 02392 // Don't bother the code below, if there are too much unique destinations 02393 return false; 02394 } 02395 DEBUG(dbgs() << "Total number of unique destinations: " 02396 << Dests.size() << '\n' 02397 << "Total number of comparisons: " << numCmps << '\n'); 02398 02399 // Compute span of values. 02400 const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); 02401 const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); 02402 APInt cmpRange = maxValue - minValue; 02403 02404 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' 02405 << "Low bound: " << minValue << '\n' 02406 << "High bound: " << maxValue << '\n'); 02407 02408 if (cmpRange.uge(IntPtrBits) || 02409 (!(Dests.size() == 1 && numCmps >= 3) && 02410 !(Dests.size() == 2 && numCmps >= 5) && 02411 !(Dests.size() >= 3 && numCmps >= 6))) 02412 return false; 02413 02414 DEBUG(dbgs() << "Emitting bit tests\n"); 02415 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); 02416 02417 // Optimize the case where all the case values fit in a 02418 // word without having to subtract minValue. In this case, 02419 // we can optimize away the subtraction. 02420 if (maxValue.ult(IntPtrBits)) { 02421 cmpRange = maxValue; 02422 } else { 02423 lowBound = minValue; 02424 } 02425 02426 CaseBitsVector CasesBits; 02427 unsigned i, count = 0; 02428 02429 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 02430 MachineBasicBlock* Dest = I->BB; 02431 for (i = 0; i < count; ++i) 02432 if (Dest == CasesBits[i].BB) 02433 break; 02434 02435 if (i == count) { 02436 assert((count < 3) && "Too much destinations to test!"); 02437 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); 02438 count++; 02439 } 02440 02441 const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); 02442 const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); 02443 02444 uint64_t lo = (lowValue - lowBound).getZExtValue(); 02445 uint64_t hi = (highValue - lowBound).getZExtValue(); 02446 CasesBits[i].ExtraWeight += I->ExtraWeight; 02447 02448 for (uint64_t j = lo; j <= hi; j++) { 02449 CasesBits[i].Mask |= 1ULL << j; 02450 CasesBits[i].Bits++; 02451 } 02452 02453 } 02454 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); 02455 02456 BitTestInfo BTC; 02457 02458 // Figure out which block is immediately after the current one. 02459 MachineFunction::iterator BBI = CR.CaseBB; 02460 ++BBI; 02461 02462 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02463 02464 DEBUG(dbgs() << "Cases:\n"); 02465 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { 02466 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask 02467 << ", Bits: " << CasesBits[i].Bits 02468 << ", BB: " << CasesBits[i].BB << '\n'); 02469 02470 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02471 CurMF->insert(BBI, CaseBB); 02472 BTC.push_back(BitTestCase(CasesBits[i].Mask, 02473 CaseBB, 02474 CasesBits[i].BB, CasesBits[i].ExtraWeight)); 02475 02476 // Put SV in a virtual register to make it available from the new blocks. 02477 ExportFromCurrentBlock(SV); 02478 } 02479 02480 BitTestBlock BTB(lowBound, cmpRange, SV, 02481 -1U, MVT::Other, (CR.CaseBB == SwitchBB), 02482 CR.CaseBB, Default, BTC); 02483 02484 if (CR.CaseBB == SwitchBB) 02485 visitBitTestHeader(BTB, SwitchBB); 02486 02487 BitTestCases.push_back(BTB); 02488 02489 return true; 02490 } 02491 02492 /// Clusterify - Transform simple list of Cases into list of CaseRange's 02493 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, 02494 const SwitchInst& SI) { 02495 02496 /// Use a shorter form of declaration, and also 02497 /// show the we want to use CRSBuilder as Clusterifier. 02498 typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; 02499 02500 Clusterifier TheClusterifier; 02501 02502 BranchProbabilityInfo *BPI = FuncInfo.BPI; 02503 // Start with "simple" cases 02504 for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); 02505 i != e; ++i) { 02506 const BasicBlock *SuccBB = i.getCaseSuccessor(); 02507 MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; 02508 02509 TheClusterifier.add(i.getCaseValueEx(), SMBB, 02510 BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); 02511 } 02512 02513 TheClusterifier.optimize(); 02514 02515 size_t numCmps = 0; 02516 for (Clusterifier::RangeIterator i = TheClusterifier.begin(), 02517 e = TheClusterifier.end(); i != e; ++i, ++numCmps) { 02518 Clusterifier::Cluster &C = *i; 02519 // Update edge weight for the cluster. 02520 unsigned W = C.first.Weight; 02521 02522 // FIXME: Currently work with ConstantInt based numbers. 02523 // Changing it to APInt based is a pretty heavy for this commit. 02524 Cases.push_back(Case(C.first.getLow().toConstantInt(), 02525 C.first.getHigh().toConstantInt(), C.second, W)); 02526 02527 if (C.first.getLow() != C.first.getHigh()) 02528 // A range counts double, since it requires two compares. 02529 ++numCmps; 02530 } 02531 02532 return numCmps; 02533 } 02534 02535 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, 02536 MachineBasicBlock *Last) { 02537 // Update JTCases. 02538 for (unsigned i = 0, e = JTCases.size(); i != e; ++i) 02539 if (JTCases[i].first.HeaderBB == First) 02540 JTCases[i].first.HeaderBB = Last; 02541 02542 // Update BitTestCases. 02543 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) 02544 if (BitTestCases[i].Parent == First) 02545 BitTestCases[i].Parent = Last; 02546 } 02547 02548 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { 02549 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; 02550 02551 // Figure out which block is immediately after the current one. 02552 MachineBasicBlock *NextBlock = 0; 02553 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; 02554 02555 // If there is only the default destination, branch to it if it is not the 02556 // next basic block. Otherwise, just fall through. 02557 if (!SI.getNumCases()) { 02558 // Update machine-CFG edges. 02559 02560 // If this is not a fall-through branch, emit the branch. 02561 SwitchMBB->addSuccessor(Default); 02562 if (Default != NextBlock) 02563 DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), 02564 MVT::Other, getControlRoot(), 02565 DAG.getBasicBlock(Default))); 02566 02567 return; 02568 } 02569 02570 // If there are any non-default case statements, create a vector of Cases 02571 // representing each one, and sort the vector so that we can efficiently 02572 // create a binary search tree from them. 02573 CaseVector Cases; 02574 size_t numCmps = Clusterify(Cases, SI); 02575 DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() 02576 << ". Total compares: " << numCmps << '\n'); 02577 (void)numCmps; 02578 02579 // Get the Value to be switched on and default basic blocks, which will be 02580 // inserted into CaseBlock records, representing basic blocks in the binary 02581 // search tree. 02582 const Value *SV = SI.getCondition(); 02583 02584 // Push the initial CaseRec onto the worklist 02585 CaseRecVector WorkList; 02586 WorkList.push_back(CaseRec(SwitchMBB,0,0, 02587 CaseRange(Cases.begin(),Cases.end()))); 02588 02589 while (!WorkList.empty()) { 02590 // Grab a record representing a case range to process off the worklist 02591 CaseRec CR = WorkList.back(); 02592 WorkList.pop_back(); 02593 02594 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 02595 continue; 02596 02597 // If the range has few cases (two or less) emit a series of specific 02598 // tests. 02599 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) 02600 continue; 02601 02602 // If the switch has more than N blocks, and is at least 40% dense, and the 02603 // target supports indirect branches, then emit a jump table rather than 02604 // lowering the switch to a binary tree of conditional branches. 02605 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). 02606 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 02607 continue; 02608 02609 // Emit binary tree. We need to pick a pivot, and push left and right ranges 02610 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. 02611 handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); 02612 } 02613 } 02614 02615 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { 02616 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; 02617 02618 // Update machine-CFG edges with unique successors. 02619 SmallSet<BasicBlock*, 32> Done; 02620 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { 02621 BasicBlock *BB = I.getSuccessor(i); 02622 bool Inserted = Done.insert(BB); 02623 if (!Inserted) 02624 continue; 02625 02626 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; 02627 addSuccessorWithWeight(IndirectBrMBB, Succ); 02628 } 02629 02630 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), 02631 MVT::Other, getControlRoot(), 02632 getValue(I.getAddress()))); 02633 } 02634 02635 void SelectionDAGBuilder::visitFSub(const User &I) { 02636 // -0.0 - X --> fneg 02637 Type *Ty = I.getType(); 02638 if (isa<Constant>(I.getOperand(0)) && 02639 I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { 02640 SDValue Op2 = getValue(I.getOperand(1)); 02641 setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), 02642 Op2.getValueType(), Op2)); 02643 return; 02644 } 02645 02646 visitBinary(I, ISD::FSUB); 02647 } 02648 02649 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { 02650 SDValue Op1 = getValue(I.getOperand(0)); 02651 SDValue Op2 = getValue(I.getOperand(1)); 02652 setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), 02653 Op1.getValueType(), Op1, Op2)); 02654 } 02655 02656 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { 02657 SDValue Op1 = getValue(I.getOperand(0)); 02658 SDValue Op2 = getValue(I.getOperand(1)); 02659 02660 EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); 02661 02662 // Coerce the shift amount to the right type if we can. 02663 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { 02664 unsigned ShiftSize = ShiftTy.getSizeInBits(); 02665 unsigned Op2Size = Op2.getValueType().getSizeInBits(); 02666 DebugLoc DL = getCurDebugLoc(); 02667 02668 // If the operand is smaller than the shift count type, promote it. 02669 if (ShiftSize > Op2Size) 02670 Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); 02671 02672 // If the operand is larger than the shift count type but the shift 02673 // count type has enough bits to represent any shift value, truncate 02674 // it now. This is a common case and it exposes the truncate to 02675 // optimization early. 02676 else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) 02677 Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); 02678 // Otherwise we'll need to temporarily settle for some other convenient 02679 // type. Type legalization will make adjustments once the shiftee is split. 02680 else 02681 Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); 02682 } 02683 02684 setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), 02685 Op1.getValueType(), Op1, Op2)); 02686 } 02687 02688 void SelectionDAGBuilder::visitSDiv(const User &I) { 02689 SDValue Op1 = getValue(I.getOperand(0)); 02690 SDValue Op2 = getValue(I.getOperand(1)); 02691 02692 // Turn exact SDivs into multiplications. 02693 // FIXME: This should be in DAGCombiner, but it doesn't have access to the 02694 // exact bit. 02695 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && 02696 !isa<ConstantSDNode>(Op1) && 02697 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) 02698 setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); 02699 else 02700 setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), 02701 Op1, Op2)); 02702 } 02703 02704 void SelectionDAGBuilder::visitICmp(const User &I) { 02705 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; 02706 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) 02707 predicate = IC->getPredicate(); 02708 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) 02709 predicate = ICmpInst::Predicate(IC->getPredicate()); 02710 SDValue Op1 = getValue(I.getOperand(0)); 02711 SDValue Op2 = getValue(I.getOperand(1)); 02712 ISD::CondCode Opcode = getICmpCondCode(predicate); 02713 02714 EVT DestVT = TLI.getValueType(I.getType()); 02715 setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); 02716 } 02717 02718 void SelectionDAGBuilder::visitFCmp(const User &I) { 02719 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; 02720 if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) 02721 predicate = FC->getPredicate(); 02722 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) 02723 predicate = FCmpInst::Predicate(FC->getPredicate()); 02724 SDValue Op1 = getValue(I.getOperand(0)); 02725 SDValue Op2 = getValue(I.getOperand(1)); 02726 ISD::CondCode Condition = getFCmpCondCode(predicate); 02727 if (TM.Options.NoNaNsFPMath) 02728 Condition = getFCmpCodeWithoutNaN(Condition); 02729 EVT DestVT = TLI.getValueType(I.getType()); 02730 setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); 02731 } 02732 02733 void SelectionDAGBuilder::visitSelect(const User &I) { 02734 SmallVector<EVT, 4> ValueVTs; 02735 ComputeValueVTs(TLI, I.getType(), ValueVTs); 02736 unsigned NumValues = ValueVTs.size(); 02737 if (NumValues == 0) return; 02738 02739 SmallVector<SDValue, 4> Values(NumValues); 02740 SDValue Cond = getValue(I.getOperand(0)); 02741 SDValue TrueVal = getValue(I.getOperand(1)); 02742 SDValue FalseVal = getValue(I.getOperand(2)); 02743 ISD::NodeType OpCode = Cond.getValueType().isVector() ? 02744 ISD::VSELECT : ISD::SELECT; 02745 02746 for (unsigned i = 0; i != NumValues; ++i) 02747 Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), 02748 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), 02749 Cond, 02750 SDValue(TrueVal.getNode(), 02751 TrueVal.getResNo() + i), 02752 SDValue(FalseVal.getNode(), 02753 FalseVal.getResNo() + i)); 02754 02755 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 02756 DAG.getVTList(&ValueVTs[0], NumValues), 02757 &Values[0], NumValues)); 02758 } 02759 02760 void SelectionDAGBuilder::visitTrunc(const User &I) { 02761 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). 02762 SDValue N = getValue(I.getOperand(0)); 02763 EVT DestVT = TLI.getValueType(I.getType()); 02764 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); 02765 } 02766 02767 void SelectionDAGBuilder::visitZExt(const User &I) { 02768 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 02769 // ZExt also can't be a cast to bool for same reason. So, nothing much to do 02770 SDValue N = getValue(I.getOperand(0)); 02771 EVT DestVT = TLI.getValueType(I.getType()); 02772 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); 02773 } 02774 02775 void SelectionDAGBuilder::visitSExt(const User &I) { 02776 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 02777 // SExt also can't be a cast to bool for same reason. So, nothing much to do 02778 SDValue N = getValue(I.getOperand(0)); 02779 EVT DestVT = TLI.getValueType(I.getType()); 02780 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); 02781 } 02782 02783 void SelectionDAGBuilder::visitFPTrunc(const User &I) { 02784 // FPTrunc is never a no-op cast, no need to check 02785 SDValue N = getValue(I.getOperand(0)); 02786 EVT DestVT = TLI.getValueType(I.getType()); 02787 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), 02788 DestVT, N, 02789 DAG.getTargetConstant(0, TLI.getPointerTy()))); 02790 } 02791 02792 void SelectionDAGBuilder::visitFPExt(const User &I){ 02793 // FPExt is never a no-op cast, no need to check 02794 SDValue N = getValue(I.getOperand(0)); 02795 EVT DestVT = TLI.getValueType(I.getType()); 02796 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); 02797 } 02798 02799 void SelectionDAGBuilder::visitFPToUI(const User &I) { 02800 // FPToUI is never a no-op cast, no need to check 02801 SDValue N = getValue(I.getOperand(0)); 02802 EVT DestVT = TLI.getValueType(I.getType()); 02803 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); 02804 } 02805 02806 void SelectionDAGBuilder::visitFPToSI(const User &I) { 02807 // FPToSI is never a no-op cast, no need to check 02808 SDValue N = getValue(I.getOperand(0)); 02809 EVT DestVT = TLI.getValueType(I.getType()); 02810 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); 02811 } 02812 02813 void SelectionDAGBuilder::visitUIToFP(const User &I) { 02814 // UIToFP is never a no-op cast, no need to check 02815 SDValue N = getValue(I.getOperand(0)); 02816 EVT DestVT = TLI.getValueType(I.getType()); 02817 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); 02818 } 02819 02820 void SelectionDAGBuilder::visitSIToFP(const User &I){ 02821 // SIToFP is never a no-op cast, no need to check 02822 SDValue N = getValue(I.getOperand(0)); 02823 EVT DestVT = TLI.getValueType(I.getType()); 02824 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); 02825 } 02826 02827 void SelectionDAGBuilder::visitPtrToInt(const User &I) { 02828 // What to do depends on the size of the integer and the size of the pointer. 02829 // We can either truncate, zero extend, or no-op, accordingly. 02830 SDValue N = getValue(I.getOperand(0)); 02831 EVT DestVT = TLI.getValueType(I.getType()); 02832 setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); 02833 } 02834 02835 void SelectionDAGBuilder::visitIntToPtr(const User &I) { 02836 // What to do depends on the size of the integer and the size of the pointer. 02837 // We can either truncate, zero extend, or no-op, accordingly. 02838 SDValue N = getValue(I.getOperand(0)); 02839 EVT DestVT = TLI.getValueType(I.getType()); 02840 setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); 02841 } 02842 02843 void SelectionDAGBuilder::visitBitCast(const User &I) { 02844 SDValue N = getValue(I.getOperand(0)); 02845 EVT DestVT = TLI.getValueType(I.getType()); 02846 02847 // BitCast assures us that source and destination are the same size so this is 02848 // either a BITCAST or a no-op. 02849 if (DestVT != N.getValueType()) 02850 setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), 02851 DestVT, N)); // convert types. 02852 else 02853 setValue(&I, N); // noop cast. 02854 } 02855 02856 void SelectionDAGBuilder::visitInsertElement(const User &I) { 02857 SDValue InVec = getValue(I.getOperand(0)); 02858 SDValue InVal = getValue(I.getOperand(1)); 02859 SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), 02860 TLI.getPointerTy(), 02861 getValue(I.getOperand(2))); 02862 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), 02863 TLI.getValueType(I.getType()), 02864 InVec, InVal, InIdx)); 02865 } 02866 02867 void SelectionDAGBuilder::visitExtractElement(const User &I) { 02868 SDValue InVec = getValue(I.getOperand(0)); 02869 SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), 02870 TLI.getPointerTy(), 02871 getValue(I.getOperand(1))); 02872 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), 02873 TLI.getValueType(I.getType()), InVec, InIdx)); 02874 } 02875 02876 // Utility for visitShuffleVector - Return true if every element in Mask, 02877 // beginning from position Pos and ending in Pos+Size, falls within the 02878 // specified sequential range [L, L+Pos). or is undef. 02879 static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, 02880 unsigned Pos, unsigned Size, int Low) { 02881 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) 02882 if (Mask[i] >= 0 && Mask[i] != Low) 02883 return false; 02884 return true; 02885 } 02886 02887 void SelectionDAGBuilder::visitShuffleVector(const User &I) { 02888 SDValue Src1 = getValue(I.getOperand(0)); 02889 SDValue Src2 = getValue(I.getOperand(1)); 02890 02891 SmallVector<int, 8> Mask; 02892 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); 02893 unsigned MaskNumElts = Mask.size(); 02894 02895 EVT VT = TLI.getValueType(I.getType()); 02896 EVT SrcVT = Src1.getValueType(); 02897 unsigned SrcNumElts = SrcVT.getVectorNumElements(); 02898 02899 if (SrcNumElts == MaskNumElts) { 02900 setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 02901 &Mask[0])); 02902 return; 02903 } 02904 02905 // Normalize the shuffle vector since mask and vector length don't match. 02906 if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { 02907 // Mask is longer than the source vectors and is a multiple of the source 02908 // vectors. We can use concatenate vector to make the mask and vectors 02909 // lengths match. 02910 if (SrcNumElts*2 == MaskNumElts) { 02911 // First check for Src1 in low and Src2 in high 02912 if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && 02913 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { 02914 // The shuffle is concatenating two vectors together. 02915 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), 02916 VT, Src1, Src2)); 02917 return; 02918 } 02919 // Then check for Src2 in low and Src1 in high 02920 if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && 02921 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { 02922 // The shuffle is concatenating two vectors together. 02923 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), 02924 VT, Src2, Src1)); 02925 return; 02926 } 02927 } 02928 02929 // Pad both vectors with undefs to make them the same length as the mask. 02930 unsigned NumConcat = MaskNumElts / SrcNumElts; 02931 bool Src1U = Src1.getOpcode() == ISD::UNDEF; 02932 bool Src2U = Src2.getOpcode() == ISD::UNDEF; 02933 SDValue UndefVal = DAG.getUNDEF(SrcVT); 02934 02935 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); 02936 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); 02937 MOps1[0] = Src1; 02938 MOps2[0] = Src2; 02939 02940 Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 02941 getCurDebugLoc(), VT, 02942 &MOps1[0], NumConcat); 02943 Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 02944 getCurDebugLoc(), VT, 02945 &MOps2[0], NumConcat); 02946 02947 // Readjust mask for new input vector length. 02948 SmallVector<int, 8> MappedOps; 02949 for (unsigned i = 0; i != MaskNumElts; ++i) { 02950 int Idx = Mask[i]; 02951 if (Idx >= (int)SrcNumElts) 02952 Idx -= SrcNumElts - MaskNumElts; 02953 MappedOps.push_back(Idx); 02954 } 02955 02956 setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 02957 &MappedOps[0])); 02958 return; 02959 } 02960 02961 if (SrcNumElts > MaskNumElts) { 02962 // Analyze the access pattern of the vector to see if we can extract 02963 // two subvectors and do the shuffle. The analysis is done by calculating 02964 // the range of elements the mask access on both vectors. 02965 int MinRange[2] = { static_cast<int>(SrcNumElts), 02966 static_cast<int>(SrcNumElts)}; 02967 int MaxRange[2] = {-1, -1}; 02968 02969 for (unsigned i = 0; i != MaskNumElts; ++i) { 02970 int Idx = Mask[i]; 02971 unsigned Input = 0; 02972 if (Idx < 0) 02973 continue; 02974 02975 if (Idx >= (int)SrcNumElts) { 02976 Input = 1; 02977 Idx -= SrcNumElts; 02978 } 02979 if (Idx > MaxRange[Input]) 02980 MaxRange[Input] = Idx; 02981 if (Idx < MinRange[Input]) 02982 MinRange[Input] = Idx; 02983 } 02984 02985 // Check if the access is smaller than the vector size and can we find 02986 // a reasonable extract index. 02987 int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not 02988 // Extract. 02989 int StartIdx[2]; // StartIdx to extract from 02990 for (unsigned Input = 0; Input < 2; ++Input) { 02991 if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { 02992 RangeUse[Input] = 0; // Unused 02993 StartIdx[Input] = 0; 02994 continue; 02995 } 02996 02997 // Find a good start index that is a multiple of the mask length. Then 02998 // see if the rest of the elements are in range. 02999 StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; 03000 if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && 03001 StartIdx[Input] + MaskNumElts <= SrcNumElts) 03002 RangeUse[Input] = 1; // Extract from a multiple of the mask length. 03003 } 03004 03005 if (RangeUse[0] == 0 && RangeUse[1] == 0) { 03006 setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. 03007 return; 03008 } 03009 if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { 03010 // Extract appropriate subvector and generate a vector shuffle 03011 for (unsigned Input = 0; Input < 2; ++Input) { 03012 SDValue &Src = Input == 0 ? Src1 : Src2; 03013 if (RangeUse[Input] == 0) 03014 Src = DAG.getUNDEF(VT); 03015 else 03016 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, 03017 Src, DAG.getIntPtrConstant(StartIdx[Input])); 03018 } 03019 03020 // Calculate new mask. 03021 SmallVector<int, 8> MappedOps; 03022 for (unsigned i = 0; i != MaskNumElts; ++i) { 03023 int Idx = Mask[i]; 03024 if (Idx >= 0) { 03025 if (Idx < (int)SrcNumElts) 03026 Idx -= StartIdx[0]; 03027 else 03028 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; 03029 } 03030 MappedOps.push_back(Idx); 03031 } 03032 03033 setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 03034 &MappedOps[0])); 03035 return; 03036 } 03037 } 03038 03039 // We can't use either concat vectors or extract subvectors so fall back to 03040 // replacing the shuffle with extract and build vector. 03041 // to insert and build vector. 03042 EVT EltVT = VT.getVectorElementType(); 03043 EVT PtrVT = TLI.getPointerTy(); 03044 SmallVector<SDValue,8> Ops; 03045 for (unsigned i = 0; i != MaskNumElts; ++i) { 03046 int Idx = Mask[i]; 03047 SDValue Res; 03048 03049 if (Idx < 0) { 03050 Res = DAG.getUNDEF(EltVT); 03051 } else { 03052 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; 03053 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; 03054 03055 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), 03056 EltVT, Src, DAG.getConstant(Idx, PtrVT)); 03057 } 03058 03059 Ops.push_back(Res); 03060 } 03061 03062 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), 03063 VT, &Ops[0], Ops.size())); 03064 } 03065 03066 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { 03067 const Value *Op0 = I.getOperand(0); 03068 const Value *Op1 = I.getOperand(1); 03069 Type *AggTy = I.getType(); 03070 Type *ValTy = Op1->getType(); 03071 bool IntoUndef = isa<UndefValue>(Op0); 03072 bool FromUndef = isa<UndefValue>(Op1); 03073 03074 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 03075 03076 SmallVector<EVT, 4> AggValueVTs; 03077 ComputeValueVTs(TLI, AggTy, AggValueVTs); 03078 SmallVector<EVT, 4> ValValueVTs; 03079 ComputeValueVTs(TLI, ValTy, ValValueVTs); 03080 03081 unsigned NumAggValues = AggValueVTs.size(); 03082 unsigned NumValValues = ValValueVTs.size(); 03083 SmallVector<SDValue, 4> Values(NumAggValues); 03084 03085 SDValue Agg = getValue(Op0); 03086 unsigned i = 0; 03087 // Copy the beginning value(s) from the original aggregate. 03088 for (; i != LinearIndex; ++i) 03089 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03090 SDValue(Agg.getNode(), Agg.getResNo() + i); 03091 // Copy values from the inserted value(s). 03092 if (NumValValues) { 03093 SDValue Val = getValue(Op1); 03094 for (; i != LinearIndex + NumValValues; ++i) 03095 Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03096 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); 03097 } 03098 // Copy remaining value(s) from the original aggregate. 03099 for (; i != NumAggValues; ++i) 03100 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03101 SDValue(Agg.getNode(), Agg.getResNo() + i); 03102 03103 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 03104 DAG.getVTList(&AggValueVTs[0], NumAggValues), 03105 &Values[0], NumAggValues)); 03106 } 03107 03108 void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { 03109 const Value *Op0 = I.getOperand(0); 03110 Type *AggTy = Op0->getType(); 03111 Type *ValTy = I.getType(); 03112 bool OutOfUndef = isa<UndefValue>(Op0); 03113 03114 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 03115 03116 SmallVector<EVT, 4> ValValueVTs; 03117 ComputeValueVTs(TLI, ValTy, ValValueVTs); 03118 03119 unsigned NumValValues = ValValueVTs.size(); 03120 03121 // Ignore a extractvalue that produces an empty object 03122 if (!NumValValues) { 03123 setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); 03124 return; 03125 } 03126 03127 SmallVector<SDValue, 4> Values(NumValValues); 03128 03129 SDValue Agg = getValue(Op0); 03130 // Copy out the selected value(s). 03131 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) 03132 Values[i - LinearIndex] = 03133 OutOfUndef ? 03134 DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : 03135 SDValue(Agg.getNode(), Agg.getResNo() + i); 03136 03137 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 03138 DAG.getVTList(&ValValueVTs[0], NumValValues), 03139 &Values[0], NumValValues)); 03140 } 03141 03142 void SelectionDAGBuilder::visitGetElementPtr(const User &I) { 03143 SDValue N = getValue(I.getOperand(0)); 03144 // Note that the pointer operand may be a vector of pointers. Take the scalar 03145 // element which holds a pointer. 03146 Type *Ty = I.getOperand(0)->getType()->getScalarType(); 03147 03148 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); 03149 OI != E; ++OI) { 03150 const Value *Idx = *OI; 03151 if (StructType *StTy = dyn_cast<StructType>(Ty)) { 03152 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); 03153 if (Field) { 03154 // N = N + Offset 03155 uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); 03156 N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, 03157 DAG.getConstant(Offset, N.getValueType())); 03158 } 03159 03160 Ty = StTy->getElementType(Field); 03161 } else { 03162 Ty = cast<SequentialType>(Ty)->getElementType(); 03163 03164 // If this is a constant subscript, handle it quickly. 03165 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 03166 if (CI->isZero()) continue; 03167 uint64_t Offs = 03168 TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); 03169 SDValue OffsVal; 03170 EVT PTy = TLI.getPointerTy(); 03171 unsigned PtrBits = PTy.getSizeInBits(); 03172 if (PtrBits < 64) 03173 OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), 03174 TLI.getPointerTy(), 03175 DAG.getConstant(Offs, MVT::i64)); 03176 else 03177 OffsVal = DAG.getIntPtrConstant(Offs); 03178 03179 N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, 03180 OffsVal); 03181 continue; 03182 } 03183 03184 // N = N + Idx * ElementSize; 03185 APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), 03186 TD->getTypeAllocSize(Ty)); 03187 SDValue IdxN = getValue(Idx); 03188 03189 // If the index is smaller or larger than intptr_t, truncate or extend 03190 // it. 03191 IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); 03192 03193 // If this is a multiply by a power of two, turn it into a shl 03194 // immediately. This is a very common case. 03195 if (ElementSize != 1) { 03196 if (ElementSize.isPowerOf2()) { 03197 unsigned Amt = ElementSize.logBase2(); 03198 IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), 03199 N.getValueType(), IdxN, 03200 DAG.getConstant(Amt, IdxN.getValueType())); 03201 } else { 03202 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); 03203 IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), 03204 N.getValueType(), IdxN, Scale); 03205 } 03206 } 03207 03208 N = DAG.getNode(ISD::ADD, getCurDebugLoc(), 03209 N.getValueType(), N, IdxN); 03210 } 03211 } 03212 03213 setValue(&I, N); 03214 } 03215 03216 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { 03217 // If this is a fixed sized alloca in the entry block of the function, 03218 // allocate it statically on the stack. 03219 if (FuncInfo.StaticAllocaMap.count(&I)) 03220 return; // getValue will auto-populate this. 03221 03222 Type *Ty = I.getAllocatedType(); 03223 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 03224 unsigned Align = 03225 std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), 03226 I.getAlignment()); 03227 03228 SDValue AllocSize = getValue(I.getArraySize()); 03229 03230 EVT IntPtr = TLI.getPointerTy(); 03231 if (AllocSize.getValueType() != IntPtr) 03232 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); 03233 03234 AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, 03235 AllocSize, 03236 DAG.getConstant(TySize, IntPtr)); 03237 03238 // Handle alignment. If the requested alignment is less than or equal to 03239 // the stack alignment, ignore it. If the size is greater than or equal to 03240 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. 03241 unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); 03242 if (Align <= StackAlign) 03243 Align = 0; 03244 03245 // Round the size of the allocation up to the stack alignment size 03246 // by add SA-1 to the size. 03247 AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), 03248 AllocSize.getValueType(), AllocSize, 03249 DAG.getIntPtrConstant(StackAlign-1)); 03250 03251 // Mask out the low bits for alignment purposes. 03252 AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), 03253 AllocSize.getValueType(), AllocSize, 03254 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); 03255 03256 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; 03257 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); 03258 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), 03259 VTs, Ops, 3); 03260 setValue(&I, DSA); 03261 DAG.setRoot(DSA.getValue(1)); 03262 03263 // Inform the Frame Information that we have just allocated a variable-sized 03264 // object. 03265 FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); 03266 } 03267 03268 void SelectionDAGBuilder::visitLoad(const LoadInst &I) { 03269 if (I.isAtomic()) 03270 return visitAtomicLoad(I); 03271 03272 const Value *SV = I.getOperand(0); 03273 SDValue Ptr = getValue(SV); 03274 03275 Type *Ty = I.getType(); 03276 03277 bool isVolatile = I.isVolatile(); 03278 bool isNonTemporal = I.getMetadata("nontemporal") != 0; 03279 bool isInvariant = I.getMetadata("invariant.load") != 0; 03280 unsigned Alignment = I.getAlignment(); 03281 const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); 03282 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 03283 03284 SmallVector<EVT, 4> ValueVTs; 03285 SmallVector<uint64_t, 4> Offsets; 03286 ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); 03287 unsigned NumValues = ValueVTs.size(); 03288 if (NumValues == 0) 03289 return; 03290 03291 SDValue Root; 03292 bool ConstantMemory = false; 03293 if (I.isVolatile() || NumValues > MaxParallelChains) 03294 // Serialize volatile loads with other side effects. 03295 Root = getRoot(); 03296 else if (AA->pointsToConstantMemory( 03297 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { 03298 // Do not serialize (non-volatile) loads of constant memory with anything. 03299 Root = DAG.getEntryNode(); 03300 ConstantMemory = true; 03301 } else { 03302 // Do not serialize non-volatile loads against each other. 03303 Root = DAG.getRoot(); 03304 } 03305 03306 SmallVector<SDValue, 4> Values(NumValues); 03307 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 03308 NumValues)); 03309 EVT PtrVT = Ptr.getValueType(); 03310 unsigned ChainI = 0; 03311 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 03312 // Serializing loads here may result in excessive register pressure, and 03313 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling 03314 // could recover a bit by hoisting nodes upward in the chain by recognizing 03315 // they are side-effect free or do not alias. The optimizer should really 03316 // avoid this case by converting large object/array copies to llvm.memcpy 03317 // (MaxParallelChains should always remain as failsafe). 03318 if (ChainI == MaxParallelChains) { 03319 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); 03320 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 03321 MVT::Other, &Chains[0], ChainI); 03322 Root = Chain; 03323 ChainI = 0; 03324 } 03325 SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), 03326 PtrVT, Ptr, 03327 DAG.getConstant(Offsets[i], PtrVT)); 03328 SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, 03329 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, 03330 isNonTemporal, isInvariant, Alignment, TBAAInfo, 03331 Ranges); 03332 03333 Values[i] = L; 03334 Chains[ChainI] = L.getValue(1); 03335 } 03336 03337 if (!ConstantMemory) { 03338 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 03339 MVT::Other, &Chains[0], ChainI); 03340 if (isVolatile) 03341 DAG.setRoot(Chain); 03342 else 03343 PendingLoads.push_back(Chain); 03344 } 03345 03346 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 03347 DAG.getVTList(&ValueVTs[0], NumValues), 03348 &Values[0], NumValues)); 03349 } 03350 03351 void SelectionDAGBuilder::visitStore(const StoreInst &I) { 03352 if (I.isAtomic()) 03353 return visitAtomicStore(I); 03354 03355 const Value *SrcV = I.getOperand(0); 03356 const Value *PtrV = I.getOperand(1); 03357 03358 SmallVector<EVT, 4> ValueVTs; 03359 SmallVector<uint64_t, 4> Offsets; 03360 ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); 03361 unsigned NumValues = ValueVTs.size(); 03362 if (NumValues == 0) 03363 return; 03364 03365 // Get the lowered operands. Note that we do this after 03366 // checking if NumResults is zero, because with zero results 03367 // the operands won't have values in the map. 03368 SDValue Src = getValue(SrcV); 03369 SDValue Ptr = getValue(PtrV); 03370 03371 SDValue Root = getRoot(); 03372 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 03373 NumValues)); 03374 EVT PtrVT = Ptr.getValueType(); 03375 bool isVolatile = I.isVolatile(); 03376 bool isNonTemporal = I.getMetadata("nontemporal") != 0; 03377 unsigned Alignment = I.getAlignment(); 03378 const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); 03379 03380 unsigned ChainI = 0; 03381 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 03382 // See visitLoad comments. 03383 if (ChainI == MaxParallelChains) { 03384 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 03385 MVT::Other, &Chains[0], ChainI); 03386 Root = Chain; 03387 ChainI = 0; 03388 } 03389 SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, 03390 DAG.getConstant(Offsets[i], PtrVT)); 03391 SDValue St = DAG.getStore(Root, getCurDebugLoc(), 03392 SDValue(Src.getNode(), Src.getResNo() + i), 03393 Add, MachinePointerInfo(PtrV, Offsets[i]), 03394 isVolatile, isNonTemporal, Alignment, TBAAInfo); 03395 Chains[ChainI] = St; 03396 } 03397 03398 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 03399 MVT::Other, &Chains[0], ChainI); 03400 ++SDNodeOrder; 03401 AssignOrderingToNode(StoreNode.getNode()); 03402 DAG.setRoot(StoreNode); 03403 } 03404 03405 static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, 03406 SynchronizationScope Scope, 03407 bool Before, DebugLoc dl, 03408 SelectionDAG &DAG, 03409 const TargetLowering &TLI) { 03410 // Fence, if necessary 03411 if (Before) { 03412 if (Order == AcquireRelease || Order == SequentiallyConsistent) 03413 Order = Release; 03414 else if (Order == Acquire || Order == Monotonic) 03415 return Chain; 03416 } else { 03417 if (Order == AcquireRelease) 03418 Order = Acquire; 03419 else if (Order == Release || Order == Monotonic) 03420 return Chain; 03421 } 03422 SDValue Ops[3]; 03423 Ops[0] = Chain; 03424 Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); 03425 Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); 03426 return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); 03427 } 03428 03429 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { 03430 DebugLoc dl = getCurDebugLoc(); 03431 AtomicOrdering Order = I.getOrdering(); 03432 SynchronizationScope Scope = I.getSynchScope(); 03433 03434 SDValue InChain = getRoot(); 03435 03436 if (TLI.getInsertFencesForAtomic()) 03437 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 03438 DAG, TLI); 03439 03440 SDValue L = 03441 DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, 03442 getValue(I.getCompareOperand()).getValueType().getSimpleVT(), 03443 InChain, 03444 getValue(I.getPointerOperand()), 03445 getValue(I.getCompareOperand()), 03446 getValue(I.getNewValOperand()), 03447 MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, 03448 TLI.getInsertFencesForAtomic() ? Monotonic : Order, 03449 Scope); 03450 03451 SDValue OutChain = L.getValue(1); 03452 03453 if (TLI.getInsertFencesForAtomic()) 03454 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03455 DAG, TLI); 03456 03457 setValue(&I, L); 03458 DAG.setRoot(OutChain); 03459 } 03460 03461 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { 03462 DebugLoc dl = getCurDebugLoc(); 03463 ISD::NodeType NT; 03464 switch (I.getOperation()) { 03465 default: llvm_unreachable("Unknown atomicrmw operation"); 03466 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; 03467 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; 03468 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; 03469 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; 03470 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; 03471 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; 03472 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; 03473 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; 03474 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; 03475 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; 03476 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; 03477 } 03478 AtomicOrdering Order = I.getOrdering(); 03479 SynchronizationScope Scope = I.getSynchScope(); 03480 03481 SDValue InChain = getRoot(); 03482 03483 if (TLI.getInsertFencesForAtomic()) 03484 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 03485 DAG, TLI); 03486 03487 SDValue L = 03488 DAG.getAtomic(NT, dl, 03489 getValue(I.getValOperand()).getValueType().getSimpleVT(), 03490 InChain, 03491 getValue(I.getPointerOperand()), 03492 getValue(I.getValOperand()), 03493 I.getPointerOperand(), 0 /* Alignment */, 03494 TLI.getInsertFencesForAtomic() ? Monotonic : Order, 03495 Scope); 03496 03497 SDValue OutChain = L.getValue(1); 03498 03499 if (TLI.getInsertFencesForAtomic()) 03500 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03501 DAG, TLI); 03502 03503 setValue(&I, L); 03504 DAG.setRoot(OutChain); 03505 } 03506 03507 void SelectionDAGBuilder::visitFence(const FenceInst &I) { 03508 DebugLoc dl = getCurDebugLoc(); 03509 SDValue Ops[3]; 03510 Ops[0] = getRoot(); 03511 Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); 03512 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); 03513 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); 03514 } 03515 03516 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { 03517 DebugLoc dl = getCurDebugLoc(); 03518 AtomicOrdering Order = I.getOrdering(); 03519 SynchronizationScope Scope = I.getSynchScope(); 03520 03521 SDValue InChain = getRoot(); 03522 03523 EVT VT = TLI.getValueType(I.getType()); 03524 03525 if (I.getAlignment() < VT.getSizeInBits() / 8) 03526 report_fatal_error("Cannot generate unaligned atomic load"); 03527 03528 SDValue L = 03529 DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, 03530 getValue(I.getPointerOperand()), 03531 I.getPointerOperand(), I.getAlignment(), 03532 TLI.getInsertFencesForAtomic() ? Monotonic : Order, 03533 Scope); 03534 03535 SDValue OutChain = L.getValue(1); 03536 03537 if (TLI.getInsertFencesForAtomic()) 03538 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03539 DAG, TLI); 03540 03541 setValue(&I, L); 03542 DAG.setRoot(OutChain); 03543 } 03544 03545 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { 03546 DebugLoc dl = getCurDebugLoc(); 03547 03548 AtomicOrdering Order = I.getOrdering(); 03549 SynchronizationScope Scope = I.getSynchScope(); 03550 03551 SDValue InChain = getRoot(); 03552 03553 EVT VT = TLI.getValueType(I.getValueOperand()->getType()); 03554 03555 if (I.getAlignment() < VT.getSizeInBits() / 8) 03556 report_fatal_error("Cannot generate unaligned atomic store"); 03557 03558 if (TLI.getInsertFencesForAtomic()) 03559 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 03560 DAG, TLI); 03561 03562 SDValue OutChain = 03563 DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, 03564 InChain, 03565 getValue(I.getPointerOperand()), 03566 getValue(I.getValueOperand()), 03567 I.getPointerOperand(), I.getAlignment(), 03568 TLI.getInsertFencesForAtomic() ? Monotonic : Order, 03569 Scope); 03570 03571 if (TLI.getInsertFencesForAtomic()) 03572 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03573 DAG, TLI); 03574 03575 DAG.setRoot(OutChain); 03576 } 03577 03578 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC 03579 /// node. 03580 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, 03581 unsigned Intrinsic) { 03582 bool HasChain = !I.doesNotAccessMemory(); 03583 bool OnlyLoad = HasChain && I.onlyReadsMemory(); 03584 03585 // Build the operand list. 03586 SmallVector<SDValue, 8> Ops; 03587 if (HasChain) { // If this intrinsic has side-effects, chainify it. 03588 if (OnlyLoad) { 03589 // We don't need to serialize loads against other loads. 03590 Ops.push_back(DAG.getRoot()); 03591 } else { 03592 Ops.push_back(getRoot()); 03593 } 03594 } 03595 03596 // Info is set by getTgtMemInstrinsic 03597 TargetLowering::IntrinsicInfo Info; 03598 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); 03599 03600 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. 03601 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || 03602 Info.opc == ISD::INTRINSIC_W_CHAIN) 03603 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); 03604 03605 // Add all operands of the call to the operand list. 03606 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { 03607 SDValue Op = getValue(I.getArgOperand(i)); 03608 Ops.push_back(Op); 03609 } 03610 03611 SmallVector<EVT, 4> ValueVTs; 03612 ComputeValueVTs(TLI, I.getType(), ValueVTs); 03613 03614 if (HasChain) 03615 ValueVTs.push_back(MVT::Other); 03616 03617 SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); 03618 03619 // Create the node. 03620 SDValue Result; 03621 if (IsTgtIntrinsic) { 03622 // This is target intrinsic that touches memory 03623 Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), 03624 VTs, &Ops[0], Ops.size(), 03625 Info.memVT, 03626 MachinePointerInfo(Info.ptrVal, Info.offset), 03627 Info.align, Info.vol, 03628 Info.readMem, Info.writeMem); 03629 } else if (!HasChain) { 03630 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), 03631 VTs, &Ops[0], Ops.size()); 03632 } else if (!I.getType()->isVoidTy()) { 03633 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), 03634 VTs, &Ops[0], Ops.size()); 03635 } else { 03636 Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), 03637 VTs, &Ops[0], Ops.size()); 03638 } 03639 03640 if (HasChain) { 03641 SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); 03642 if (OnlyLoad) 03643 PendingLoads.push_back(Chain); 03644 else 03645 DAG.setRoot(Chain); 03646 } 03647 03648 if (!I.getType()->isVoidTy()) { 03649 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { 03650 EVT VT = TLI.getValueType(PTy); 03651 Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); 03652 } 03653 03654 setValue(&I, Result); 03655 } else { 03656 // Assign order to result here. If the intrinsic does not produce a result, 03657 // it won't be mapped to a SDNode and visit() will not assign it an order 03658 // number. 03659 ++SDNodeOrder; 03660 AssignOrderingToNode(Result.getNode()); 03661 } 03662 } 03663 03664 /// GetSignificand - Get the significand and build it into a floating-point 03665 /// number with exponent of 1: 03666 /// 03667 /// Op = (Op & 0x007fffff) | 0x3f800000; 03668 /// 03669 /// where Op is the hexadecimal representation of floating point value. 03670 static SDValue 03671 GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { 03672 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 03673 DAG.getConstant(0x007fffff, MVT::i32)); 03674 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, 03675 DAG.getConstant(0x3f800000, MVT::i32)); 03676 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); 03677 } 03678 03679 /// GetExponent - Get the exponent: 03680 /// 03681 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); 03682 /// 03683 /// where Op is the hexadecimal representation of floating point value. 03684 static SDValue 03685 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, 03686 DebugLoc dl) { 03687 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 03688 DAG.getConstant(0x7f800000, MVT::i32)); 03689 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, 03690 DAG.getConstant(23, TLI.getPointerTy())); 03691 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, 03692 DAG.getConstant(127, MVT::i32)); 03693 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); 03694 } 03695 03696 /// getF32Constant - Get 32-bit floating point constant. 03697 static SDValue 03698 getF32Constant(SelectionDAG &DAG, unsigned Flt) { 03699 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), 03700 MVT::f32); 03701 } 03702 03703 /// expandExp - Lower an exp intrinsic. Handles the special sequences for 03704 /// limited-precision mode. 03705 static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, 03706 const TargetLowering &TLI) { 03707 if (Op.getValueType() == MVT::f32 && 03708 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 03709 03710 // Put the exponent in the right bit position for later addition to the 03711 // final result: 03712 // 03713 // #define LOG2OFe 1.4426950f 03714 // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); 03715 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, 03716 getF32Constant(DAG, 0x3fb8aa3b)); 03717 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 03718 03719 // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; 03720 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 03721 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 03722 03723 // IntegerPartOfX <<= 23; 03724 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 03725 DAG.getConstant(23, TLI.getPointerTy())); 03726 03727 SDValue TwoToFracPartOfX; 03728 if (LimitFloatPrecision <= 6) { 03729 // For floating-point precision of 6: 03730 // 03731 // TwoToFractionalPartOfX = 03732 // 0.997535578f + 03733 // (0.735607626f + 0.252464424f * x) * x; 03734 // 03735 // error 0.0144103317, which is 6 bits 03736 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03737 getF32Constant(DAG, 0x3e814304)); 03738 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 03739 getF32Constant(DAG, 0x3f3c50c8)); 03740 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03741 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03742 getF32Constant(DAG, 0x3f7f5e7e)); 03743 } else if (LimitFloatPrecision <= 12) { 03744 // For floating-point precision of 12: 03745 // 03746 // TwoToFractionalPartOfX = 03747 // 0.999892986f + 03748 // (0.696457318f + 03749 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 03750 // 03751 // 0.000107046256 error, which is 13 to 14 bits 03752 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03753 getF32Constant(DAG, 0x3da235e3)); 03754 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 03755 getF32Constant(DAG, 0x3e65b8f3)); 03756 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03757 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03758 getF32Constant(DAG, 0x3f324b07)); 03759 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03760 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 03761 getF32Constant(DAG, 0x3f7ff8fd)); 03762 } else { // LimitFloatPrecision <= 18 03763 // For floating-point precision of 18: 03764 // 03765 // TwoToFractionalPartOfX = 03766 // 0.999999982f + 03767 // (0.693148872f + 03768 // (0.240227044f + 03769 // (0.554906021e-1f + 03770 // (0.961591928e-2f + 03771 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 03772 // 03773 // error 2.47208000*10^(-7), which is better than 18 bits 03774 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03775 getF32Constant(DAG, 0x3924b03e)); 03776 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 03777 getF32Constant(DAG, 0x3ab24b87)); 03778 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03779 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03780 getF32Constant(DAG, 0x3c1d8c17)); 03781 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03782 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 03783 getF32Constant(DAG, 0x3d634a1d)); 03784 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 03785 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 03786 getF32Constant(DAG, 0x3e75fe14)); 03787 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 03788 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 03789 getF32Constant(DAG, 0x3f317234)); 03790 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 03791 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 03792 getF32Constant(DAG, 0x3f800000)); 03793 } 03794 03795 // Add the exponent into the result in integer domain. 03796 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); 03797 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 03798 DAG.getNode(ISD::ADD, dl, MVT::i32, 03799 t13, IntegerPartOfX)); 03800 } 03801 03802 // No special expansion. 03803 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); 03804 } 03805 03806 /// expandLog - Lower a log intrinsic. Handles the special sequences for 03807 /// limited-precision mode. 03808 static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, 03809 const TargetLowering &TLI) { 03810 if (Op.getValueType() == MVT::f32 && 03811 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 03812 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 03813 03814 // Scale the exponent by log(2) [0.69314718f]. 03815 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 03816 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 03817 getF32Constant(DAG, 0x3f317218)); 03818 03819 // Get the significand and build it into a floating-point number with 03820 // exponent of 1. 03821 SDValue X = GetSignificand(DAG, Op1, dl); 03822 03823 SDValue LogOfMantissa; 03824 if (LimitFloatPrecision <= 6) { 03825 // For floating-point precision of 6: 03826 // 03827 // LogofMantissa = 03828 // -1.1609546f + 03829 // (1.4034025f - 0.23903021f * x) * x; 03830 // 03831 // error 0.0034276066, which is better than 8 bits 03832 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03833 getF32Constant(DAG, 0xbe74c456)); 03834 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03835 getF32Constant(DAG, 0x3fb3a2b1)); 03836 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03837 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03838 getF32Constant(DAG, 0x3f949a29)); 03839 } else if (LimitFloatPrecision <= 12) { 03840 // For floating-point precision of 12: 03841 // 03842 // LogOfMantissa = 03843 // -1.7417939f + 03844 // (2.8212026f + 03845 // (-1.4699568f + 03846 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; 03847 // 03848 // error 0.000061011436, which is 14 bits 03849 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03850 getF32Constant(DAG, 0xbd67b6d6)); 03851 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03852 getF32Constant(DAG, 0x3ee4f4b8)); 03853 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03854 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03855 getF32Constant(DAG, 0x3fbc278b)); 03856 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03857 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03858 getF32Constant(DAG, 0x40348e95)); 03859 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03860 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 03861 getF32Constant(DAG, 0x3fdef31a)); 03862 } else { // LimitFloatPrecision <= 18 03863 // For floating-point precision of 18: 03864 // 03865 // LogOfMantissa = 03866 // -2.1072184f + 03867 // (4.2372794f + 03868 // (-3.7029485f + 03869 // (2.2781945f + 03870 // (-0.87823314f + 03871 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; 03872 // 03873 // error 0.0000023660568, which is better than 18 bits 03874 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03875 getF32Constant(DAG, 0xbc91e5ac)); 03876 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03877 getF32Constant(DAG, 0x3e4350aa)); 03878 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03879 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03880 getF32Constant(DAG, 0x3f60d3e3)); 03881 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03882 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03883 getF32Constant(DAG, 0x4011cdf0)); 03884 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03885 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 03886 getF32Constant(DAG, 0x406cfd1c)); 03887 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 03888 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 03889 getF32Constant(DAG, 0x408797cb)); 03890 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 03891 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 03892 getF32Constant(DAG, 0x4006dcab)); 03893 } 03894 03895 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); 03896 } 03897 03898 // No special expansion. 03899 return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); 03900 } 03901 03902 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for 03903 /// limited-precision mode. 03904 static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, 03905 const TargetLowering &TLI) { 03906 if (Op.getValueType() == MVT::f32 && 03907 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 03908 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 03909 03910 // Get the exponent. 03911 SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); 03912 03913 // Get the significand and build it into a floating-point number with 03914 // exponent of 1. 03915 SDValue X = GetSignificand(DAG, Op1, dl); 03916 03917 // Different possible minimax approximations of significand in 03918 // floating-point for various degrees of accuracy over [1,2]. 03919 SDValue Log2ofMantissa; 03920 if (LimitFloatPrecision <= 6) { 03921 // For floating-point precision of 6: 03922 // 03923 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; 03924 // 03925 // error 0.0049451742, which is more than 7 bits 03926 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03927 getF32Constant(DAG, 0xbeb08fe0)); 03928 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03929 getF32Constant(DAG, 0x40019463)); 03930 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03931 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03932 getF32Constant(DAG, 0x3fd6633d)); 03933 } else if (LimitFloatPrecision <= 12) { 03934 // For floating-point precision of 12: 03935 // 03936 // Log2ofMantissa = 03937 // -2.51285454f + 03938 // (4.07009056f + 03939 // (-2.12067489f + 03940 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; 03941 // 03942 // error 0.0000876136000, which is better than 13 bits 03943 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03944 getF32Constant(DAG, 0xbda7262e)); 03945 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03946 getF32Constant(DAG, 0x3f25280b)); 03947 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03948 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03949 getF32Constant(DAG, 0x4007b923)); 03950 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03951 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03952 getF32Constant(DAG, 0x40823e2f)); 03953 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03954 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 03955 getF32Constant(DAG, 0x4020d29c)); 03956 } else { // LimitFloatPrecision <= 18 03957 // For floating-point precision of 18: 03958 // 03959 // Log2ofMantissa = 03960 // -3.0400495f + 03961 // (6.1129976f + 03962 // (-5.3420409f + 03963 // (3.2865683f + 03964 // (-1.2669343f + 03965 // (0.27515199f - 03966 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; 03967 // 03968 // error 0.0000018516, which is better than 18 bits 03969 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03970 getF32Constant(DAG, 0xbcd2769e)); 03971 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 03972 getF32Constant(DAG, 0x3e8ce0b9)); 03973 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 03974 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 03975 getF32Constant(DAG, 0x3fa22ae7)); 03976 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03977 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03978 getF32Constant(DAG, 0x40525723)); 03979 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03980 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 03981 getF32Constant(DAG, 0x40aaf200)); 03982 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 03983 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 03984 getF32Constant(DAG, 0x40c39dad)); 03985 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 03986 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 03987 getF32Constant(DAG, 0x4042902c)); 03988 } 03989 03990 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); 03991 } 03992 03993 // No special expansion. 03994 return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); 03995 } 03996 03997 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for 03998 /// limited-precision mode. 03999 static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, 04000 const TargetLowering &TLI) { 04001 if (Op.getValueType() == MVT::f32 && 04002 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04003 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 04004 04005 // Scale the exponent by log10(2) [0.30102999f]. 04006 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 04007 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 04008 getF32Constant(DAG, 0x3e9a209a)); 04009 04010 // Get the significand and build it into a floating-point number with 04011 // exponent of 1. 04012 SDValue X = GetSignificand(DAG, Op1, dl); 04013 04014 SDValue Log10ofMantissa; 04015 if (LimitFloatPrecision <= 6) { 04016 // For floating-point precision of 6: 04017 // 04018 // Log10ofMantissa = 04019 // -0.50419619f + 04020 // (0.60948995f - 0.10380950f * x) * x; 04021 // 04022 // error 0.0014886165, which is 6 bits 04023 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04024 getF32Constant(DAG, 0xbdd49a13)); 04025 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04026 getF32Constant(DAG, 0x3f1c0789)); 04027 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04028 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04029 getF32Constant(DAG, 0x3f011300)); 04030 } else if (LimitFloatPrecision <= 12) { 04031 // For floating-point precision of 12: 04032 // 04033 // Log10ofMantissa = 04034 // -0.64831180f + 04035 // (0.91751397f + 04036 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; 04037 // 04038 // error 0.00019228036, which is better than 12 bits 04039 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04040 getF32Constant(DAG, 0x3d431f31)); 04041 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 04042 getF32Constant(DAG, 0x3ea21fb2)); 04043 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04044 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04045 getF32Constant(DAG, 0x3f6ae232)); 04046 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04047 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 04048 getF32Constant(DAG, 0x3f25f7c3)); 04049 } else { // LimitFloatPrecision <= 18 04050 // For floating-point precision of 18: 04051 // 04052 // Log10ofMantissa = 04053 // -0.84299375f + 04054 // (1.5327582f + 04055 // (-1.0688956f + 04056 // (0.49102474f + 04057 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; 04058 // 04059 // error 0.0000037995730, which is better than 18 bits 04060 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04061 getF32Constant(DAG, 0x3c5d51ce)); 04062 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 04063 getF32Constant(DAG, 0x3e00685a)); 04064 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04065 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04066 getF32Constant(DAG, 0x3efb6798)); 04067 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04068 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 04069 getF32Constant(DAG, 0x3f88d192)); 04070 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04071 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04072 getF32Constant(DAG, 0x3fc4316c)); 04073 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04074 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, 04075 getF32Constant(DAG, 0x3f57ce70)); 04076 } 04077 04078 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); 04079 } 04080 04081 // No special expansion. 04082 return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); 04083 } 04084 04085 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for 04086 /// limited-precision mode. 04087 static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, 04088 const TargetLowering &TLI) { 04089 if (Op.getValueType() == MVT::f32 && 04090 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04091 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); 04092 04093 // FractionalPartOfX = x - (float)IntegerPartOfX; 04094 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 04095 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); 04096 04097 // IntegerPartOfX <<= 23; 04098 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 04099 DAG.getConstant(23, TLI.getPointerTy())); 04100 04101 SDValue TwoToFractionalPartOfX; 04102 if (LimitFloatPrecision <= 6) { 04103 // For floating-point precision of 6: 04104 // 04105 // TwoToFractionalPartOfX = 04106 // 0.997535578f + 04107 // (0.735607626f + 0.252464424f * x) * x; 04108 // 04109 // error 0.0144103317, which is 6 bits 04110 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04111 getF32Constant(DAG, 0x3e814304)); 04112 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04113 getF32Constant(DAG, 0x3f3c50c8)); 04114 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04115 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04116 getF32Constant(DAG, 0x3f7f5e7e)); 04117 } else if (LimitFloatPrecision <= 12) { 04118 // For floating-point precision of 12: 04119 // 04120 // TwoToFractionalPartOfX = 04121 // 0.999892986f + 04122 // (0.696457318f + 04123 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 04124 // 04125 // error 0.000107046256, which is 13 to 14 bits 04126 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04127 getF32Constant(DAG, 0x3da235e3)); 04128 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04129 getF32Constant(DAG, 0x3e65b8f3)); 04130 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04131 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04132 getF32Constant(DAG, 0x3f324b07)); 04133 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04134 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04135 getF32Constant(DAG, 0x3f7ff8fd)); 04136 } else { // LimitFloatPrecision <= 18 04137 // For floating-point precision of 18: 04138 // 04139 // TwoToFractionalPartOfX = 04140 // 0.999999982f + 04141 // (0.693148872f + 04142 // (0.240227044f + 04143 // (0.554906021e-1f + 04144 // (0.961591928e-2f + 04145 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 04146 // error 2.47208000*10^(-7), which is better than 18 bits 04147 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04148 getF32Constant(DAG, 0x3924b03e)); 04149 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04150 getF32Constant(DAG, 0x3ab24b87)); 04151 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04152 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04153 getF32Constant(DAG, 0x3c1d8c17)); 04154 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04155 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04156 getF32Constant(DAG, 0x3d634a1d)); 04157 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04158 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04159 getF32Constant(DAG, 0x3e75fe14)); 04160 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04161 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 04162 getF32Constant(DAG, 0x3f317234)); 04163 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 04164 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 04165 getF32Constant(DAG, 0x3f800000)); 04166 } 04167 04168 // Add the exponent into the result in integer domain. 04169 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, 04170 TwoToFractionalPartOfX); 04171 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04172 DAG.getNode(ISD::ADD, dl, MVT::i32, 04173 t13, IntegerPartOfX)); 04174 } 04175 04176 // No special expansion. 04177 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); 04178 } 04179 04180 /// visitPow - Lower a pow intrinsic. Handles the special sequences for 04181 /// limited-precision mode with x == 10.0f. 04182 static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, 04183 SelectionDAG &DAG, const TargetLowering &TLI) { 04184 bool IsExp10 = false; 04185 if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && 04186 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04187 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { 04188 APFloat Ten(10.0f); 04189 IsExp10 = LHSC->isExactlyValue(Ten); 04190 } 04191 } 04192 04193 if (IsExp10) { 04194 // Put the exponent in the right bit position for later addition to the 04195 // final result: 04196 // 04197 // #define LOG2OF10 3.3219281f 04198 // IntegerPartOfX = (int32_t)(x * LOG2OF10); 04199 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, 04200 getF32Constant(DAG, 0x40549a78)); 04201 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 04202 04203 // FractionalPartOfX = x - (float)IntegerPartOfX; 04204 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 04205 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 04206 04207 // IntegerPartOfX <<= 23; 04208 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 04209 DAG.getConstant(23, TLI.getPointerTy())); 04210 04211 SDValue TwoToFractionalPartOfX; 04212 if (LimitFloatPrecision <= 6) { 04213 // For floating-point precision of 6: 04214 // 04215 // twoToFractionalPartOfX = 04216 // 0.997535578f + 04217 // (0.735607626f + 0.252464424f * x) * x; 04218 // 04219 // error 0.0144103317, which is 6 bits 04220 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04221 getF32Constant(DAG, 0x3e814304)); 04222 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04223 getF32Constant(DAG, 0x3f3c50c8)); 04224 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04225 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04226 getF32Constant(DAG, 0x3f7f5e7e)); 04227 } else if (LimitFloatPrecision <= 12) { 04228 // For floating-point precision of 12: 04229 // 04230 // TwoToFractionalPartOfX = 04231 // 0.999892986f + 04232 // (0.696457318f + 04233 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 04234 // 04235 // error 0.000107046256, which is 13 to 14 bits 04236 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04237 getF32Constant(DAG, 0x3da235e3)); 04238 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04239 getF32Constant(DAG, 0x3e65b8f3)); 04240 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04241 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04242 getF32Constant(DAG, 0x3f324b07)); 04243 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04244 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04245 getF32Constant(DAG, 0x3f7ff8fd)); 04246 } else { // LimitFloatPrecision <= 18 04247 // For floating-point precision of 18: 04248 // 04249 // TwoToFractionalPartOfX = 04250 // 0.999999982f + 04251 // (0.693148872f + 04252 // (0.240227044f + 04253 // (0.554906021e-1f + 04254 // (0.961591928e-2f + 04255 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 04256 // error 2.47208000*10^(-7), which is better than 18 bits 04257 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04258 getF32Constant(DAG, 0x3924b03e)); 04259 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04260 getF32Constant(DAG, 0x3ab24b87)); 04261 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04262 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04263 getF32Constant(DAG, 0x3c1d8c17)); 04264 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04265 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04266 getF32Constant(DAG, 0x3d634a1d)); 04267 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04268 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04269 getF32Constant(DAG, 0x3e75fe14)); 04270 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04271 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 04272 getF32Constant(DAG, 0x3f317234)); 04273 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 04274 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 04275 getF32Constant(DAG, 0x3f800000)); 04276 } 04277 04278 SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); 04279 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04280 DAG.getNode(ISD::ADD, dl, MVT::i32, 04281 t13, IntegerPartOfX)); 04282 } 04283 04284 // No special expansion. 04285 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); 04286 } 04287 04288 04289 /// ExpandPowI - Expand a llvm.powi intrinsic. 04290 static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, 04291 SelectionDAG &DAG) { 04292 // If RHS is a constant, we can expand this out to a multiplication tree, 04293 // otherwise we end up lowering to a call to __powidf2 (for example). When 04294 // optimizing for size, we only want to do this if the expansion would produce 04295 // a small number of multiplies, otherwise we do the full expansion. 04296 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 04297 // Get the exponent as a positive value. 04298 unsigned Val = RHSC->getSExtValue(); 04299 if ((int)Val < 0) Val = -Val; 04300 04301 // powi(x, 0) -> 1.0 04302 if (Val == 0) 04303 return DAG.getConstantFP(1.0, LHS.getValueType()); 04304 04305 const Function *F = DAG.getMachineFunction().getFunction(); 04306 if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 04307 Attribute::OptimizeForSize) || 04308 // If optimizing for size, don't insert too many multiplies. This 04309 // inserts up to 5 multiplies. 04310 CountPopulation_32(Val)+Log2_32(Val) < 7) { 04311 // We use the simple binary decomposition method to generate the multiply 04312 // sequence. There are more optimal ways to do this (for example, 04313 // powi(x,15) generates one more multiply than it should), but this has 04314 // the benefit of being both really simple and much better than a libcall. 04315 SDValue Res; // Logically starts equal to 1.0 04316 SDValue CurSquare = LHS; 04317 while (Val) { 04318 if (Val & 1) { 04319 if (Res.getNode()) 04320 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); 04321 else 04322 Res = CurSquare; // 1.0*CurSquare. 04323 } 04324 04325 CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), 04326 CurSquare, CurSquare); 04327 Val >>= 1; 04328 } 04329 04330 // If the original was negative, invert the result, producing 1/(x*x*x). 04331 if (RHSC->getSExtValue() < 0) 04332 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), 04333 DAG.getConstantFP(1.0, LHS.getValueType()), Res); 04334 return Res; 04335 } 04336 } 04337 04338 // Otherwise, expand to a libcall. 04339 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); 04340 } 04341 04342 // getTruncatedArgReg - Find underlying register used for an truncated 04343 // argument. 04344 static unsigned getTruncatedArgReg(const SDValue &N) { 04345 if (N.getOpcode() != ISD::TRUNCATE) 04346 return 0; 04347 04348 const SDValue &Ext = N.getOperand(0); 04349 if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ 04350 const SDValue &CFR = Ext.getOperand(0); 04351 if (CFR.getOpcode() == ISD::CopyFromReg) 04352 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); 04353 if (CFR.getOpcode() == ISD::TRUNCATE) 04354 return getTruncatedArgReg(CFR); 04355 } 04356 return 0; 04357 } 04358 04359 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function 04360 /// argument, create the corresponding DBG_VALUE machine instruction for it now. 04361 /// At the end of instruction selection, they will be inserted to the entry BB. 04362 bool 04363 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, 04364 int64_t Offset, 04365 const SDValue &N) { 04366 const Argument *Arg = dyn_cast<Argument>(V); 04367 if (!Arg) 04368 return false; 04369 04370 MachineFunction &MF = DAG.getMachineFunction(); 04371 const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); 04372 const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); 04373 04374 // Ignore inlined function arguments here. 04375 DIVariable DV(Variable); 04376 if (DV.isInlinedFnArgument(MF.getFunction())) 04377 return false; 04378 04379 unsigned Reg = 0; 04380 // Some arguments' frame index is recorded during argument lowering. 04381 Offset = FuncInfo.getArgumentFrameIndex(Arg); 04382 if (Offset) 04383 Reg = TRI->getFrameRegister(MF); 04384 04385 if (!Reg && N.getNode()) { 04386 if (N.getOpcode() == ISD::CopyFromReg) 04387 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); 04388 else 04389 Reg = getTruncatedArgReg(N); 04390 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { 04391 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 04392 unsigned PR = RegInfo.getLiveInPhysReg(Reg); 04393 if (PR) 04394 Reg = PR; 04395 } 04396 } 04397 04398 if (!Reg) { 04399 // Check if ValueMap has reg number. 04400 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 04401 if (VMI != FuncInfo.ValueMap.end()) 04402 Reg = VMI->second; 04403 } 04404 04405 if (!Reg && N.getNode()) { 04406 // Check if frame index is available. 04407 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) 04408 if (FrameIndexSDNode *FINode = 04409 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { 04410 Reg = TRI->getFrameRegister(MF); 04411 Offset = FINode->getIndex(); 04412 } 04413 } 04414 04415 if (!Reg) 04416 return false; 04417 04418 MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), 04419 TII->get(TargetOpcode::DBG_VALUE)) 04420 .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); 04421 FuncInfo.ArgDbgValues.push_back(&*MIB); 04422 return true; 04423 } 04424 04425 // VisualStudio defines setjmp as _setjmp 04426 #if defined(_MSC_VER) && defined(setjmp) && \ 04427 !defined(setjmp_undefined_for_msvc) 04428 # pragma push_macro("setjmp") 04429 # undef setjmp 04430 # define setjmp_undefined_for_msvc 04431 #endif 04432 04433 /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If 04434 /// we want to emit this as a call to a named external function, return the name 04435 /// otherwise lower it and return null. 04436 const char * 04437 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 04438 DebugLoc dl = getCurDebugLoc(); 04439 SDValue Res; 04440 04441 switch (Intrinsic) { 04442 default: 04443 // By default, turn this into a target intrinsic node. 04444 visitTargetIntrinsic(I, Intrinsic); 04445 return 0; 04446 case Intrinsic::vastart: visitVAStart(I); return 0; 04447 case Intrinsic::vaend: visitVAEnd(I); return 0; 04448 case Intrinsic::vacopy: visitVACopy(I); return 0; 04449 case Intrinsic::returnaddress: 04450 setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), 04451 getValue(I.getArgOperand(0)))); 04452 return 0; 04453 case Intrinsic::frameaddress: 04454 setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), 04455 getValue(I.getArgOperand(0)))); 04456 return 0; 04457 case Intrinsic::setjmp: 04458 return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; 04459 case Intrinsic::longjmp: 04460 return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; 04461 case Intrinsic::memcpy: { 04462 // Assert for address < 256 since we support only user defined address 04463 // spaces. 04464 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04465 < 256 && 04466 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 04467 < 256 && 04468 "Unknown address space"); 04469 SDValue Op1 = getValue(I.getArgOperand(0)); 04470 SDValue Op2 = getValue(I.getArgOperand(1)); 04471 SDValue Op3 = getValue(I.getArgOperand(2)); 04472 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04473 if (!Align) 04474 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. 04475 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04476 DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, 04477 MachinePointerInfo(I.getArgOperand(0)), 04478 MachinePointerInfo(I.getArgOperand(1)))); 04479 return 0; 04480 } 04481 case Intrinsic::memset: { 04482 // Assert for address < 256 since we support only user defined address 04483 // spaces. 04484 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04485 < 256 && 04486 "Unknown address space"); 04487 SDValue Op1 = getValue(I.getArgOperand(0)); 04488 SDValue Op2 = getValue(I.getArgOperand(1)); 04489 SDValue Op3 = getValue(I.getArgOperand(2)); 04490 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04491 if (!Align) 04492 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. 04493 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04494 DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 04495 MachinePointerInfo(I.getArgOperand(0)))); 04496 return 0; 04497 } 04498 case Intrinsic::memmove: { 04499 // Assert for address < 256 since we support only user defined address 04500 // spaces. 04501 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04502 < 256 && 04503 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 04504 < 256 && 04505 "Unknown address space"); 04506 SDValue Op1 = getValue(I.getArgOperand(0)); 04507 SDValue Op2 = getValue(I.getArgOperand(1)); 04508 SDValue Op3 = getValue(I.getArgOperand(2)); 04509 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04510 if (!Align) 04511 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. 04512 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04513 DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 04514 MachinePointerInfo(I.getArgOperand(0)), 04515 MachinePointerInfo(I.getArgOperand(1)))); 04516 return 0; 04517 } 04518 case Intrinsic::dbg_declare: { 04519 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 04520 MDNode *Variable = DI.getVariable(); 04521 const Value *Address = DI.getAddress(); 04522 if (!Address || !DIVariable(Variable).Verify()) { 04523 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04524 return 0; 04525 } 04526 04527 // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder 04528 // but do not always have a corresponding SDNode built. The SDNodeOrder 04529 // absolute, but not relative, values are different depending on whether 04530 // debug info exists. 04531 ++SDNodeOrder; 04532 04533 // Check if address has undef value. 04534 if (isa<UndefValue>(Address) || 04535 (Address->use_empty() && !isa<Argument>(Address))) { 04536 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04537 return 0; 04538 } 04539 04540 SDValue &N = NodeMap[Address]; 04541 if (!N.getNode() && isa<Argument>(Address)) 04542 // Check unused arguments map. 04543 N = UnusedArgNodeMap[Address]; 04544 SDDbgValue *SDV; 04545 if (N.getNode()) { 04546 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) 04547 Address = BCI->getOperand(0); 04548 // Parameters are handled specially. 04549 bool isParameter = 04550 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || 04551 isa<Argument>(Address)); 04552 04553 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); 04554 04555 if (isParameter && !AI) { 04556 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); 04557 if (FINode) 04558 // Byval parameter. We have a frame index at this point. 04559 SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 04560 0, dl, SDNodeOrder); 04561 else { 04562 // Address is an argument, so try to emit its dbg value using 04563 // virtual register info from the FuncInfo.ValueMap. 04564 EmitFuncArgumentDbgValue(Address, Variable, 0, N); 04565 return 0; 04566 } 04567 } else if (AI) 04568 SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), 04569 0, dl, SDNodeOrder); 04570 else { 04571 // Can't do anything with other non-AI cases yet. 04572 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04573 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); 04574 DEBUG(Address->dump()); 04575 return 0; 04576 } 04577 DAG.AddDbgValue(SDV, N.getNode(), isParameter); 04578 } else { 04579 // If Address is an argument then try to emit its dbg value using 04580 // virtual register info from the FuncInfo.ValueMap. 04581 if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { 04582 // If variable is pinned by a alloca in dominating bb then 04583 // use StaticAllocaMap. 04584 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { 04585 if (AI->getParent() != DI.getParent()) { 04586 DenseMap<const AllocaInst*, int>::iterator SI = 04587 FuncInfo.StaticAllocaMap.find(AI); 04588 if (SI != FuncInfo.StaticAllocaMap.end()) { 04589 SDV = DAG.getDbgValue(Variable, SI->second, 04590 0, dl, SDNodeOrder); 04591 DAG.AddDbgValue(SDV, 0, false); 04592 return 0; 04593 } 04594 } 04595 } 04596 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04597 } 04598 } 04599 return 0; 04600 } 04601 case Intrinsic::dbg_value: { 04602 const DbgValueInst &DI = cast<DbgValueInst>(I); 04603 if (!DIVariable(DI.getVariable()).Verify()) 04604 return 0; 04605 04606 MDNode *Variable = DI.getVariable(); 04607 uint64_t Offset = DI.getOffset(); 04608 const Value *V = DI.getValue(); 04609 if (!V) 04610 return 0; 04611 04612 // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder 04613 // but do not always have a corresponding SDNode built. The SDNodeOrder 04614 // absolute, but not relative, values are different depending on whether 04615 // debug info exists. 04616 ++SDNodeOrder; 04617 SDDbgValue *SDV; 04618 if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { 04619 SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); 04620 DAG.AddDbgValue(SDV, 0, false); 04621 } else { 04622 // Do not use getValue() in here; we don't want to generate code at 04623 // this point if it hasn't been done yet. 04624 SDValue N = NodeMap[V]; 04625 if (!N.getNode() && isa<Argument>(V)) 04626 // Check unused arguments map. 04627 N = UnusedArgNodeMap[V]; 04628 if (N.getNode()) { 04629 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { 04630 SDV = DAG.getDbgValue(Variable, N.getNode(), 04631 N.getResNo(), Offset, dl, SDNodeOrder); 04632 DAG.AddDbgValue(SDV, N.getNode(), false); 04633 } 04634 } else if (!V->use_empty() ) { 04635 // Do not call getValue(V) yet, as we don't want to generate code. 04636 // Remember it for later. 04637 DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); 04638 DanglingDebugInfoMap[V] = DDI; 04639 } else { 04640 // We may expand this to cover more cases. One case where we have no 04641 // data available is an unreferenced parameter. 04642 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04643 } 04644 } 04645 04646 // Build a debug info table entry. 04647 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) 04648 V = BCI->getOperand(0); 04649 const AllocaInst *AI = dyn_cast<AllocaInst>(V); 04650 // Don't handle byval struct arguments or VLAs, for example. 04651 if (!AI) { 04652 DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); 04653 DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); 04654 return 0; 04655 } 04656 DenseMap<const AllocaInst*, int>::iterator SI = 04657 FuncInfo.StaticAllocaMap.find(AI); 04658 if (SI == FuncInfo.StaticAllocaMap.end()) 04659 return 0; // VLAs. 04660 int FI = SI->second; 04661 04662 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 04663 if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) 04664 MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); 04665 return 0; 04666 } 04667 04668 case Intrinsic::eh_typeid_for: { 04669 // Find the type id for the given typeinfo. 04670 GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); 04671 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); 04672 Res = DAG.getConstant(TypeID, MVT::i32); 04673 setValue(&I, Res); 04674 return 0; 04675 } 04676 04677 case Intrinsic::eh_return_i32: 04678 case Intrinsic::eh_return_i64: 04679 DAG.getMachineFunction().getMMI().setCallsEHReturn(true); 04680 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, 04681 MVT::Other, 04682 getControlRoot(), 04683 getValue(I.getArgOperand(0)), 04684 getValue(I.getArgOperand(1)))); 04685 return 0; 04686 case Intrinsic::eh_unwind_init: 04687 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); 04688 return 0; 04689 case Intrinsic::eh_dwarf_cfa: { 04690 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, 04691 TLI.getPointerTy()); 04692 SDValue Offset = DAG.getNode(ISD::ADD, dl, 04693 TLI.getPointerTy(), 04694 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, 04695 TLI.getPointerTy()), 04696 CfaArg); 04697 SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, 04698 TLI.getPointerTy(), 04699 DAG.getConstant(0, TLI.getPointerTy())); 04700 setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), 04701 FA, Offset)); 04702 return 0; 04703 } 04704 case Intrinsic::eh_sjlj_callsite: { 04705 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 04706 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); 04707 assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); 04708 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); 04709 04710 MMI.setCurrentCallSite(CI->getZExtValue()); 04711 return 0; 04712 } 04713 case Intrinsic::eh_sjlj_functioncontext: { 04714 // Get and store the index of the function context. 04715 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 04716 AllocaInst *FnCtx = 04717 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); 04718 int FI = FuncInfo.StaticAllocaMap[FnCtx]; 04719 MFI->setFunctionContextIndex(FI); 04720 return 0; 04721 } 04722 case Intrinsic::eh_sjlj_setjmp: { 04723 SDValue Ops[2]; 04724 Ops[0] = getRoot(); 04725 Ops[1] = getValue(I.getArgOperand(0)); 04726 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, 04727 DAG.getVTList(MVT::i32, MVT::Other), 04728 Ops, 2); 04729 setValue(&I, Op.getValue(0)); 04730 DAG.setRoot(Op.getValue(1)); 04731 return 0; 04732 } 04733 case Intrinsic::eh_sjlj_longjmp: { 04734 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, 04735 getRoot(), getValue(I.getArgOperand(0)))); 04736 return 0; 04737 } 04738 04739 case Intrinsic::x86_mmx_pslli_w: 04740 case Intrinsic::x86_mmx_pslli_d: 04741 case Intrinsic::x86_mmx_pslli_q: 04742 case Intrinsic::x86_mmx_psrli_w: 04743 case Intrinsic::x86_mmx_psrli_d: 04744 case Intrinsic::x86_mmx_psrli_q: 04745 case Intrinsic::x86_mmx_psrai_w: 04746 case Intrinsic::x86_mmx_psrai_d: { 04747 SDValue ShAmt = getValue(I.getArgOperand(1)); 04748 if (isa<ConstantSDNode>(ShAmt)) { 04749 visitTargetIntrinsic(I, Intrinsic); 04750 return 0; 04751 } 04752 unsigned NewIntrinsic = 0; 04753 EVT ShAmtVT = MVT::v2i32; 04754 switch (Intrinsic) { 04755 case Intrinsic::x86_mmx_pslli_w: 04756 NewIntrinsic = Intrinsic::x86_mmx_psll_w; 04757 break; 04758 case Intrinsic::x86_mmx_pslli_d: 04759 NewIntrinsic = Intrinsic::x86_mmx_psll_d; 04760 break; 04761 case Intrinsic::x86_mmx_pslli_q: 04762 NewIntrinsic = Intrinsic::x86_mmx_psll_q; 04763 break; 04764 case Intrinsic::x86_mmx_psrli_w: 04765 NewIntrinsic = Intrinsic::x86_mmx_psrl_w; 04766 break; 04767 case Intrinsic::x86_mmx_psrli_d: 04768 NewIntrinsic = Intrinsic::x86_mmx_psrl_d; 04769 break; 04770 case Intrinsic::x86_mmx_psrli_q: 04771 NewIntrinsic = Intrinsic::x86_mmx_psrl_q; 04772 break; 04773 case Intrinsic::x86_mmx_psrai_w: 04774 NewIntrinsic = Intrinsic::x86_mmx_psra_w; 04775 break; 04776 case Intrinsic::x86_mmx_psrai_d: 04777 NewIntrinsic = Intrinsic::x86_mmx_psra_d; 04778 break; 04779 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 04780 } 04781 04782 // The vector shift intrinsics with scalars uses 32b shift amounts but 04783 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 04784 // to be zero. 04785 // We must do this early because v2i32 is not a legal type. 04786 SDValue ShOps[2]; 04787 ShOps[0] = ShAmt; 04788 ShOps[1] = DAG.getConstant(0, MVT::i32); 04789 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); 04790 EVT DestVT = TLI.getValueType(I.getType()); 04791 ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); 04792 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 04793 DAG.getConstant(NewIntrinsic, MVT::i32), 04794 getValue(I.getArgOperand(0)), ShAmt); 04795 setValue(&I, Res); 04796 return 0; 04797 } 04798 case Intrinsic::x86_avx_vinsertf128_pd_256: 04799 case Intrinsic::x86_avx_vinsertf128_ps_256: 04800 case Intrinsic::x86_avx_vinsertf128_si_256: 04801 case Intrinsic::x86_avx2_vinserti128: { 04802 EVT DestVT = TLI.getValueType(I.getType()); 04803 EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); 04804 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * 04805 ElVT.getVectorNumElements(); 04806 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, 04807 getValue(I.getArgOperand(0)), 04808 getValue(I.getArgOperand(1)), 04809 DAG.getIntPtrConstant(Idx)); 04810 setValue(&I, Res); 04811 return 0; 04812 } 04813 case Intrinsic::x86_avx_vextractf128_pd_256: 04814 case Intrinsic::x86_avx_vextractf128_ps_256: 04815 case Intrinsic::x86_avx_vextractf128_si_256: 04816 case Intrinsic::x86_avx2_vextracti128: { 04817 EVT DestVT = TLI.getValueType(I.getType()); 04818 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * 04819 DestVT.getVectorNumElements(); 04820 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, 04821 getValue(I.getArgOperand(0)), 04822 DAG.getIntPtrConstant(Idx)); 04823 setValue(&I, Res); 04824 return 0; 04825 } 04826 case Intrinsic::convertff: 04827 case Intrinsic::convertfsi: 04828 case Intrinsic::convertfui: 04829 case Intrinsic::convertsif: 04830 case Intrinsic::convertuif: 04831 case Intrinsic::convertss: 04832 case Intrinsic::convertsu: 04833 case Intrinsic::convertus: 04834 case Intrinsic::convertuu: { 04835 ISD::CvtCode Code = ISD::CVT_INVALID; 04836 switch (Intrinsic) { 04837 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 04838 case Intrinsic::convertff: Code = ISD::CVT_FF; break; 04839 case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; 04840 case Intrinsic::convertfui: Code = ISD::CVT_FU; break; 04841 case Intrinsic::convertsif: Code = ISD::CVT_SF; break; 04842 case Intrinsic::convertuif: Code = ISD::CVT_UF; break; 04843 case Intrinsic::convertss: Code = ISD::CVT_SS; break; 04844 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; 04845 case Intrinsic::convertus: Code = ISD::CVT_US; break; 04846 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; 04847 } 04848 EVT DestVT = TLI.getValueType(I.getType()); 04849 const Value *Op1 = I.getArgOperand(0); 04850 Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), 04851 DAG.getValueType(DestVT), 04852 DAG.getValueType(getValue(Op1).getValueType()), 04853 getValue(I.getArgOperand(1)), 04854 getValue(I.getArgOperand(2)), 04855 Code); 04856 setValue(&I, Res); 04857 return 0; 04858 } 04859 case Intrinsic::powi: 04860 setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), 04861 getValue(I.getArgOperand(1)), DAG)); 04862 return 0; 04863 case Intrinsic::log: 04864 setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); 04865 return 0; 04866 case Intrinsic::log2: 04867 setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); 04868 return 0; 04869 case Intrinsic::log10: 04870 setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); 04871 return 0; 04872 case Intrinsic::exp: 04873 setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); 04874 return 0; 04875 case Intrinsic::exp2: 04876 setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); 04877 return 0; 04878 case Intrinsic::pow: 04879 setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), 04880 getValue(I.getArgOperand(1)), DAG, TLI)); 04881 return 0; 04882 case Intrinsic::sqrt: 04883 case Intrinsic::fabs: 04884 case Intrinsic::sin: 04885 case Intrinsic::cos: 04886 case Intrinsic::floor: 04887 case Intrinsic::ceil: 04888 case Intrinsic::trunc: 04889 case Intrinsic::rint: 04890 case Intrinsic::nearbyint: { 04891 unsigned Opcode; 04892 switch (Intrinsic) { 04893 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 04894 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 04895 case Intrinsic::fabs: Opcode = ISD::FABS; break; 04896 case Intrinsic::sin: Opcode = ISD::FSIN; break; 04897 case Intrinsic::cos: Opcode = ISD::FCOS; break; 04898 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 04899 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 04900 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 04901 case Intrinsic::rint: Opcode = ISD::FRINT; break; 04902 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 04903 } 04904 04905 setValue(&I, DAG.getNode(Opcode, dl, 04906 getValue(I.getArgOperand(0)).getValueType(), 04907 getValue(I.getArgOperand(0)))); 04908 return 0; 04909 } 04910 case Intrinsic::fma: 04911 setValue(&I, DAG.getNode(ISD::FMA, dl, 04912 getValue(I.getArgOperand(0)).getValueType(), 04913 getValue(I.getArgOperand(0)), 04914 getValue(I.getArgOperand(1)), 04915 getValue(I.getArgOperand(2)))); 04916 return 0; 04917 case Intrinsic::fmuladd: { 04918 EVT VT = TLI.getValueType(I.getType()); 04919 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && 04920 TLI.isFMAFasterThanMulAndAdd(VT)){ 04921 setValue(&I, DAG.getNode(ISD::FMA, dl, 04922 getValue(I.getArgOperand(0)).getValueType(), 04923 getValue(I.getArgOperand(0)), 04924 getValue(I.getArgOperand(1)), 04925 getValue(I.getArgOperand(2)))); 04926 } else { 04927 SDValue Mul = DAG.getNode(ISD::FMUL, dl, 04928 getValue(I.getArgOperand(0)).getValueType(), 04929 getValue(I.getArgOperand(0)), 04930 getValue(I.getArgOperand(1))); 04931 SDValue Add = DAG.getNode(ISD::FADD, dl, 04932 getValue(I.getArgOperand(0)).getValueType(), 04933 Mul, 04934 getValue(I.getArgOperand(2))); 04935 setValue(&I, Add); 04936 } 04937 return 0; 04938 } 04939 case Intrinsic::convert_to_fp16: 04940 setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, 04941 MVT::i16, getValue(I.getArgOperand(0)))); 04942 return 0; 04943 case Intrinsic::convert_from_fp16: 04944 setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, 04945 MVT::f32, getValue(I.getArgOperand(0)))); 04946 return 0; 04947 case Intrinsic::pcmarker: { 04948 SDValue Tmp = getValue(I.getArgOperand(0)); 04949 DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); 04950 return 0; 04951 } 04952 case Intrinsic::readcyclecounter: { 04953 SDValue Op = getRoot(); 04954 Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, 04955 DAG.getVTList(MVT::i64, MVT::Other), 04956 &Op, 1); 04957 setValue(&I, Res); 04958 DAG.setRoot(Res.getValue(1)); 04959 return 0; 04960 } 04961 case Intrinsic::bswap: 04962 setValue(&I, DAG.getNode(ISD::BSWAP, dl, 04963 getValue(I.getArgOperand(0)).getValueType(), 04964 getValue(I.getArgOperand(0)))); 04965 return 0; 04966 case Intrinsic::cttz: { 04967 SDValue Arg = getValue(I.getArgOperand(0)); 04968 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 04969 EVT Ty = Arg.getValueType(); 04970 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, 04971 dl, Ty, Arg)); 04972 return 0; 04973 } 04974 case Intrinsic::ctlz: { 04975 SDValue Arg = getValue(I.getArgOperand(0)); 04976 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 04977 EVT Ty = Arg.getValueType(); 04978 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, 04979 dl, Ty, Arg)); 04980 return 0; 04981 } 04982 case Intrinsic::ctpop: { 04983 SDValue Arg = getValue(I.getArgOperand(0)); 04984 EVT Ty = Arg.getValueType(); 04985 setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); 04986 return 0; 04987 } 04988 case Intrinsic::stacksave: { 04989 SDValue Op = getRoot(); 04990 Res = DAG.getNode(ISD::STACKSAVE, dl, 04991 DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); 04992 setValue(&I, Res); 04993 DAG.setRoot(Res.getValue(1)); 04994 return 0; 04995 } 04996 case Intrinsic::stackrestore: { 04997 Res = getValue(I.getArgOperand(0)); 04998 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); 04999 return 0; 05000 } 05001 case Intrinsic::stackprotector: { 05002 // Emit code into the DAG to store the stack guard onto the stack. 05003 MachineFunction &MF = DAG.getMachineFunction(); 05004 MachineFrameInfo *MFI = MF.getFrameInfo(); 05005 EVT PtrTy = TLI.getPointerTy(); 05006 05007 SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. 05008 AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 05009 05010 int FI = FuncInfo.StaticAllocaMap[Slot]; 05011 MFI->setStackProtectorIndex(FI); 05012 05013 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); 05014 05015 // Store the stack protector onto the stack. 05016 Res = DAG.getStore(getRoot(), dl, Src, FIN, 05017 MachinePointerInfo::getFixedStack(FI), 05018 true, false, 0); 05019 setValue(&I, Res); 05020 DAG.setRoot(Res); 05021 return 0; 05022 } 05023 case Intrinsic::objectsize: { 05024 // If we don't know by now, we're never going to know. 05025 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 05026 05027 assert(CI && "Non-constant type in __builtin_object_size?"); 05028 05029 SDValue Arg = getValue(I.getCalledValue()); 05030 EVT Ty = Arg.getValueType(); 05031 05032 if (CI->isZero()) 05033 Res = DAG.getConstant(-1ULL, Ty); 05034 else 05035 Res = DAG.getConstant(0, Ty); 05036 05037 setValue(&I, Res); 05038 return 0; 05039 } 05040 case Intrinsic::annotation: 05041 case Intrinsic::ptr_annotation: 05042 // Drop the intrinsic, but forward the value 05043 setValue(&I, getValue(I.getOperand(0))); 05044 return 0; 05045 case Intrinsic::var_annotation: 05046 // Discard annotate attributes 05047 return 0; 05048 05049 case Intrinsic::init_trampoline: { 05050 const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); 05051 05052 SDValue Ops[6]; 05053 Ops[0] = getRoot(); 05054 Ops[1] = getValue(I.getArgOperand(0)); 05055 Ops[2] = getValue(I.getArgOperand(1)); 05056 Ops[3] = getValue(I.getArgOperand(2)); 05057 Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); 05058 Ops[5] = DAG.getSrcValue(F); 05059 05060 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); 05061 05062 DAG.setRoot(Res); 05063 return 0; 05064 } 05065 case Intrinsic::adjust_trampoline: { 05066 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, 05067 TLI.getPointerTy(), 05068 getValue(I.getArgOperand(0)))); 05069 return 0; 05070 } 05071 case Intrinsic::gcroot: 05072 if (GFI) { 05073 const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); 05074 const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); 05075 05076 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); 05077 GFI->addStackRoot(FI->getIndex(), TypeMap); 05078 } 05079 return 0; 05080 case Intrinsic::gcread: 05081 case Intrinsic::gcwrite: 05082 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); 05083 case Intrinsic::flt_rounds: 05084 setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); 05085 return 0; 05086 05087 case Intrinsic::expect: { 05088 // Just replace __builtin_expect(exp, c) with EXP. 05089 setValue(&I, getValue(I.getArgOperand(0))); 05090 return 0; 05091 } 05092 05093 case Intrinsic::debugtrap: 05094 case Intrinsic::trap: { 05095 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); 05096 if (TrapFuncName.empty()) { 05097 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 05098 ISD::TRAP : ISD::DEBUGTRAP; 05099 DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); 05100 return 0; 05101 } 05102 TargetLowering::ArgListTy Args; 05103 TargetLowering:: 05104 CallLoweringInfo CLI(getRoot(), I.getType(), 05105 false, false, false, false, 0, CallingConv::C, 05106 /*isTailCall=*/false, 05107 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 05108 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), 05109 Args, DAG, dl); 05110 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); 05111 DAG.setRoot(Result.second); 05112 return 0; 05113 } 05114 05115 case Intrinsic::uadd_with_overflow: 05116 case Intrinsic::sadd_with_overflow: 05117 case Intrinsic::usub_with_overflow: 05118 case Intrinsic::ssub_with_overflow: 05119 case Intrinsic::umul_with_overflow: 05120 case Intrinsic::smul_with_overflow: { 05121 ISD::NodeType Op; 05122 switch (Intrinsic) { 05123 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 05124 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; 05125 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; 05126 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; 05127 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; 05128 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; 05129 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; 05130 } 05131 SDValue Op1 = getValue(I.getArgOperand(0)); 05132 SDValue Op2 = getValue(I.getArgOperand(1)); 05133 05134 SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); 05135 setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); 05136 return 0; 05137 } 05138 case Intrinsic::prefetch: { 05139 SDValue Ops[5]; 05140 unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 05141 Ops[0] = getRoot(); 05142 Ops[1] = getValue(I.getArgOperand(0)); 05143 Ops[2] = getValue(I.getArgOperand(1)); 05144 Ops[3] = getValue(I.getArgOperand(2)); 05145 Ops[4] = getValue(I.getArgOperand(3)); 05146 DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, 05147 DAG.getVTList(MVT::Other), 05148 &Ops[0], 5, 05149 EVT::getIntegerVT(*Context, 8), 05150 MachinePointerInfo(I.getArgOperand(0)), 05151 0, /* align */ 05152 false, /* volatile */ 05153 rw==0, /* read */ 05154 rw==1)); /* write */ 05155 return 0; 05156 } 05157 case Intrinsic::lifetime_start: 05158 case Intrinsic::lifetime_end: { 05159 bool IsStart = (Intrinsic == Intrinsic::lifetime_start); 05160 // Stack coloring is not enabled in O0, discard region information. 05161 if (TM.getOptLevel() == CodeGenOpt::None) 05162 return 0; 05163 05164 SmallVector<Value *, 4> Allocas; 05165 GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); 05166 05167 for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), 05168 E = Allocas.end(); Object != E; ++Object) { 05169 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); 05170 05171 // Could not find an Alloca. 05172 if (!LifetimeObject) 05173 continue; 05174 05175 int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; 05176 05177 SDValue Ops[2]; 05178 Ops[0] = getRoot(); 05179 Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); 05180 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); 05181 05182 Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); 05183 DAG.setRoot(Res); 05184 } 05185 return 0; 05186 } 05187 case Intrinsic::invariant_start: 05188 // Discard region information. 05189 setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); 05190 return 0; 05191 case Intrinsic::invariant_end: 05192 // Discard region information. 05193 return 0; 05194 case Intrinsic::donothing: 05195 // ignore 05196 return 0; 05197 } 05198 } 05199 05200 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, 05201 bool isTailCall, 05202 MachineBasicBlock *LandingPad) { 05203 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 05204 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 05205 Type *RetTy = FTy->getReturnType(); 05206 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 05207 MCSymbol *BeginLabel = 0; 05208 05209 TargetLowering::ArgListTy Args; 05210 TargetLowering::ArgListEntry Entry; 05211 Args.reserve(CS.arg_size()); 05212 05213 // Check whether the function can return without sret-demotion. 05214 SmallVector<ISD::OutputArg, 4> Outs; 05215 GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); 05216 05217 bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 05218 DAG.getMachineFunction(), 05219 FTy->isVarArg(), Outs, 05220 FTy->getContext()); 05221 05222 SDValue DemoteStackSlot; 05223 int DemoteStackIdx = -100; 05224 05225 if (!CanLowerReturn) { 05226 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( 05227 FTy->getReturnType()); 05228 unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( 05229 FTy->getReturnType()); 05230 MachineFunction &MF = DAG.getMachineFunction(); 05231 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 05232 Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); 05233 05234 DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); 05235 Entry.Node = DemoteStackSlot; 05236 Entry.Ty = StackSlotPtrType; 05237 Entry.isSExt = false; 05238 Entry.isZExt = false; 05239 Entry.isInReg = false; 05240 Entry.isSRet = true; 05241 Entry.isNest = false; 05242 Entry.isByVal = false; 05243 Entry.isReturned = false; 05244 Entry.Alignment = Align; 05245 Args.push_back(Entry); 05246 RetTy = Type::getVoidTy(FTy->getContext()); 05247 } 05248 05249 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 05250 i != e; ++i) { 05251 const Value *V = *i; 05252 05253 // Skip empty types 05254 if (V->getType()->isEmptyTy()) 05255 continue; 05256 05257 SDValue ArgNode = getValue(V); 05258 Entry.Node = ArgNode; Entry.Ty = V->getType(); 05259 05260 unsigned attrInd = i - CS.arg_begin() + 1; 05261 Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); 05262 Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); 05263 Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); 05264 Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); 05265 Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); 05266 Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); 05267 Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); 05268 Entry.Alignment = CS.getParamAlignment(attrInd); 05269 Args.push_back(Entry); 05270 } 05271 05272 if (LandingPad) { 05273 // Insert a label before the invoke call to mark the try range. This can be 05274 // used to detect deletion of the invoke via the MachineModuleInfo. 05275 BeginLabel = MMI.getContext().CreateTempSymbol(); 05276 05277 // For SjLj, keep track of which landing pads go with which invokes 05278 // so as to maintain the ordering of pads in the LSDA. 05279 unsigned CallSiteIndex = MMI.getCurrentCallSite(); 05280 if (CallSiteIndex) { 05281 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); 05282 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); 05283 05284 // Now that the call site is handled, stop tracking it. 05285 MMI.setCurrentCallSite(0); 05286 } 05287 05288 // Both PendingLoads and PendingExports must be flushed here; 05289 // this call might not return. 05290 (void)getRoot(); 05291 DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); 05292 } 05293 05294 // Check if target-independent constraints permit a tail call here. 05295 // Target-dependent constraints are checked within TLI.LowerCallTo. 05296 if (isTailCall && !isInTailCallPosition(CS, TLI)) 05297 isTailCall = false; 05298 05299 TargetLowering:: 05300 CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, 05301 getCurDebugLoc(), CS); 05302 std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); 05303 assert((isTailCall || Result.second.getNode()) && 05304 "Non-null chain expected with non-tail call!"); 05305 assert((Result.second.getNode() || !Result.first.getNode()) && 05306 "Null value expected with tail call!"); 05307 if (Result.first.getNode()) { 05308 setValue(CS.getInstruction(), Result.first); 05309 } else if (!CanLowerReturn && Result.second.getNode()) { 05310 // The instruction result is the result of loading from the 05311 // hidden sret parameter. 05312 SmallVector<EVT, 1> PVTs; 05313 Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); 05314 05315 ComputeValueVTs(TLI, PtrRetTy, PVTs); 05316 assert(PVTs.size() == 1 && "Pointers should fit in one register"); 05317 EVT PtrVT = PVTs[0]; 05318 05319 SmallVector<EVT, 4> RetTys; 05320 SmallVector<uint64_t, 4> Offsets; 05321 RetTy = FTy->getReturnType(); 05322 ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); 05323 05324 unsigned NumValues = RetTys.size(); 05325 SmallVector<SDValue, 4> Values(NumValues); 05326 SmallVector<SDValue, 4> Chains(NumValues); 05327 05328 for (unsigned i = 0; i < NumValues; ++i) { 05329 SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, 05330 DemoteStackSlot, 05331 DAG.getConstant(Offsets[i], PtrVT)); 05332 SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, 05333 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), 05334 false, false, false, 1); 05335 Values[i] = L; 05336 Chains[i] = L.getValue(1); 05337 } 05338 05339 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), 05340 MVT::Other, &Chains[0], NumValues); 05341 PendingLoads.push_back(Chain); 05342 05343 setValue(CS.getInstruction(), 05344 DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), 05345 DAG.getVTList(&RetTys[0], RetTys.size()), 05346 &Values[0], Values.size())); 05347 } 05348 05349 // Assign order to nodes here. If the call does not produce a result, it won't 05350 // be mapped to a SDNode and visit() will not assign it an order number. 05351 if (!Result.second.getNode()) { 05352 // As a special case, a null chain means that a tail call has been emitted and 05353 // the DAG root is already updated. 05354 HasTailCall = true; 05355 ++SDNodeOrder; 05356 AssignOrderingToNode(DAG.getRoot().getNode()); 05357 } else { 05358 DAG.setRoot(Result.second); 05359 ++SDNodeOrder; 05360 AssignOrderingToNode(Result.second.getNode()); 05361 } 05362 05363 if (LandingPad) { 05364 // Insert a label at the end of the invoke call to mark the try range. This 05365 // can be used to detect deletion of the invoke via the MachineModuleInfo. 05366 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); 05367 DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); 05368 05369 // Inform MachineModuleInfo of range. 05370 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); 05371 } 05372 } 05373 05374 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the 05375 /// value is equal or not-equal to zero. 05376 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { 05377 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); 05378 UI != E; ++UI) { 05379 if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) 05380 if (IC->isEquality()) 05381 if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) 05382 if (C->isNullValue()) 05383 continue; 05384 // Unknown instruction. 05385 return false; 05386 } 05387 return true; 05388 } 05389 05390 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, 05391 Type *LoadTy, 05392 SelectionDAGBuilder &Builder) { 05393 05394 // Check to see if this load can be trivially constant folded, e.g. if the 05395 // input is from a string literal. 05396 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { 05397 // Cast pointer to the type we really want to load. 05398 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), 05399 PointerType::getUnqual(LoadTy)); 05400 05401 if (const Constant *LoadCst = 05402 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), 05403 Builder.TD)) 05404 return Builder.getValue(LoadCst); 05405 } 05406 05407 // Otherwise, we have to emit the load. If the pointer is to unfoldable but 05408 // still constant memory, the input chain can be the entry node. 05409 SDValue Root; 05410 bool ConstantMemory = false; 05411 05412 // Do not serialize (non-volatile) loads of constant memory with anything. 05413 if (Builder.AA->pointsToConstantMemory(PtrVal)) { 05414 Root = Builder.DAG.getEntryNode(); 05415 ConstantMemory = true; 05416 } else { 05417 // Do not serialize non-volatile loads against each other. 05418 Root = Builder.DAG.getRoot(); 05419 } 05420 05421 SDValue Ptr = Builder.getValue(PtrVal); 05422 SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, 05423 Ptr, MachinePointerInfo(PtrVal), 05424 false /*volatile*/, 05425 false /*nontemporal*/, 05426 false /*isinvariant*/, 1 /* align=1 */); 05427 05428 if (!ConstantMemory) 05429 Builder.PendingLoads.push_back(LoadVal.getValue(1)); 05430 return LoadVal; 05431 } 05432 05433 05434 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. 05435 /// If so, return true and lower it, otherwise return false and it will be 05436 /// lowered like a normal call. 05437 bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { 05438 // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) 05439 if (I.getNumArgOperands() != 3) 05440 return false; 05441 05442 const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); 05443 if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || 05444 !I.getArgOperand(2)->getType()->isIntegerTy() || 05445 !I.getType()->isIntegerTy()) 05446 return false; 05447 05448 const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); 05449 05450 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 05451 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 05452 if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { 05453 bool ActuallyDoIt = true; 05454 MVT LoadVT; 05455 Type *LoadTy; 05456 switch (Size->getZExtValue()) { 05457 default: 05458 LoadVT = MVT::Other; 05459 LoadTy = 0; 05460 ActuallyDoIt = false; 05461 break; 05462 case 2: 05463 LoadVT = MVT::i16; 05464 LoadTy = Type::getInt16Ty(Size->getContext()); 05465 break; 05466 case 4: 05467 LoadVT = MVT::i32; 05468 LoadTy = Type::getInt32Ty(Size->getContext()); 05469 break; 05470 case 8: 05471 LoadVT = MVT::i64; 05472 LoadTy = Type::getInt64Ty(Size->getContext()); 05473 break; 05474 /* 05475 case 16: 05476 LoadVT = MVT::v4i32; 05477 LoadTy = Type::getInt32Ty(Size->getContext()); 05478 LoadTy = VectorType::get(LoadTy, 4); 05479 break; 05480 */ 05481 } 05482 05483 // This turns into unaligned loads. We only do this if the target natively 05484 // supports the MVT we'll be loading or if it is small enough (<= 4) that 05485 // we'll only produce a small number of byte loads. 05486 05487 // Require that we can find a legal MVT, and only do this if the target 05488 // supports unaligned loads of that type. Expanding into byte loads would 05489 // bloat the code. 05490 if (ActuallyDoIt && Size->getZExtValue() > 4) { 05491 // TODO: Handle 5 byte compare as 4-byte + 1 byte. 05492 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. 05493 if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) 05494 ActuallyDoIt = false; 05495 } 05496 05497 if (ActuallyDoIt) { 05498 SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); 05499 SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); 05500 05501 SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, 05502 ISD::SETNE); 05503 EVT CallVT = TLI.getValueType(I.getType(), true); 05504 setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); 05505 return true; 05506 } 05507 } 05508 05509 05510 return false; 05511 } 05512 05513 /// visitUnaryFloatCall - If a call instruction is a unary floating-point 05514 /// operation (as expected), translate it to an SDNode with the specified opcode 05515 /// and return true. 05516 bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, 05517 unsigned Opcode) { 05518 // Sanity check that it really is a unary floating-point call. 05519 if (I.getNumArgOperands() != 1 || 05520 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 05521 I.getType() != I.getArgOperand(0)->getType() || 05522 !I.onlyReadsMemory()) 05523 return false; 05524 05525 SDValue Tmp = getValue(I.getArgOperand(0)); 05526 setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); 05527 return true; 05528 } 05529 05530 void SelectionDAGBuilder::visitCall(const CallInst &I) { 05531 // Handle inline assembly differently. 05532 if (isa<InlineAsm>(I.getCalledValue())) { 05533 visitInlineAsm(&I); 05534 return; 05535 } 05536 05537 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 05538 ComputeUsesVAFloatArgument(I, &MMI); 05539 05540 const char *RenameFn = 0; 05541 if (Function *F = I.getCalledFunction()) { 05542 if (F->isDeclaration()) { 05543 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { 05544 if (unsigned IID = II->getIntrinsicID(F)) { 05545 RenameFn = visitIntrinsicCall(I, IID); 05546 if (!RenameFn) 05547 return; 05548 } 05549 } 05550 if (unsigned IID = F->getIntrinsicID()) { 05551 RenameFn = visitIntrinsicCall(I, IID); 05552 if (!RenameFn) 05553 return; 05554 } 05555 } 05556 05557 // Check for well-known libc/libm calls. If the function is internal, it 05558 // can't be a library call. 05559 LibFunc::Func Func; 05560 if (!F->hasLocalLinkage() && F->hasName() && 05561 LibInfo->getLibFunc(F->getName(), Func) && 05562 LibInfo->hasOptimizedCodeGen(Func)) { 05563 switch (Func) { 05564 default: break; 05565 case LibFunc::copysign: 05566 case LibFunc::copysignf: 05567 case LibFunc::copysignl: 05568 if (I.getNumArgOperands() == 2 && // Basic sanity checks. 05569 I.getArgOperand(0)->getType()->isFloatingPointTy() && 05570 I.getType() == I.getArgOperand(0)->getType() && 05571 I.getType() == I.getArgOperand(1)->getType() && 05572 I.onlyReadsMemory()) { 05573 SDValue LHS = getValue(I.getArgOperand(0)); 05574 SDValue RHS = getValue(I.getArgOperand(1)); 05575 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), 05576 LHS.getValueType(), LHS, RHS)); 05577 return; 05578 } 05579 break; 05580 case LibFunc::fabs: 05581 case LibFunc::fabsf: 05582 case LibFunc::fabsl: 05583 if (visitUnaryFloatCall(I, ISD::FABS)) 05584 return; 05585 break; 05586 case LibFunc::sin: 05587 case LibFunc::sinf: 05588 case LibFunc::sinl: 05589 if (visitUnaryFloatCall(I, ISD::FSIN)) 05590 return; 05591 break; 05592 case LibFunc::cos: 05593 case LibFunc::cosf: 05594 case LibFunc::cosl: 05595 if (visitUnaryFloatCall(I, ISD::FCOS)) 05596 return; 05597 break; 05598 case LibFunc::sqrt: 05599 case LibFunc::sqrtf: 05600 case LibFunc::sqrtl: 05601 if (visitUnaryFloatCall(I, ISD::FSQRT)) 05602 return; 05603 break; 05604 case LibFunc::floor: 05605 case LibFunc::floorf: 05606 case LibFunc::floorl: 05607 if (visitUnaryFloatCall(I, ISD::FFLOOR)) 05608 return; 05609 break; 05610 case LibFunc::nearbyint: 05611 case LibFunc::nearbyintf: 05612 case LibFunc::nearbyintl: 05613 if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) 05614 return; 05615 break; 05616 case LibFunc::ceil: 05617 case LibFunc::ceilf: 05618 case LibFunc::ceill: 05619 if (visitUnaryFloatCall(I, ISD::FCEIL)) 05620 return; 05621 break; 05622 case LibFunc::rint: 05623 case LibFunc::rintf: 05624 case LibFunc::rintl: 05625 if (visitUnaryFloatCall(I, ISD::FRINT)) 05626 return; 05627 break; 05628 case LibFunc::trunc: 05629 case LibFunc::truncf: 05630 case LibFunc::truncl: 05631 if (visitUnaryFloatCall(I, ISD::FTRUNC)) 05632 return; 05633 break; 05634 case LibFunc::log2: 05635 case LibFunc::log2f: 05636 case LibFunc::log2l: 05637 if (visitUnaryFloatCall(I, ISD::FLOG2)) 05638 return; 05639 break; 05640 case LibFunc::exp2: 05641 case LibFunc::exp2f: 05642 case LibFunc::exp2l: 05643 if (visitUnaryFloatCall(I, ISD::FEXP2)) 05644 return; 05645 break; 05646 case LibFunc::memcmp: 05647 if (visitMemCmpCall(I)) 05648 return; 05649 break; 05650 } 05651 } 05652 } 05653 05654 SDValue Callee; 05655 if (!RenameFn) 05656 Callee = getValue(I.getCalledValue()); 05657 else 05658 Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); 05659 05660 // Check if we can potentially perform a tail call. More detailed checking is 05661 // be done within LowerCallTo, after more information about the call is known. 05662 LowerCallTo(&I, Callee, I.isTailCall()); 05663 } 05664 05665 namespace { 05666 05667 /// AsmOperandInfo - This contains information for each constraint that we are 05668 /// lowering. 05669 class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { 05670 public: 05671 /// CallOperand - If this is the result output operand or a clobber 05672 /// this is null, otherwise it is the incoming operand to the CallInst. 05673 /// This gets modified as the asm is processed. 05674 SDValue CallOperand; 05675 05676 /// AssignedRegs - If this is a register or register class operand, this 05677 /// contains the set of register corresponding to the operand. 05678 RegsForValue AssignedRegs; 05679 05680 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) 05681 : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { 05682 } 05683 05684 /// getCallOperandValEVT - Return the EVT of the Value* that this operand 05685 /// corresponds to. If there is no Value* for this operand, it returns 05686 /// MVT::Other. 05687 EVT getCallOperandValEVT(LLVMContext &Context, 05688 const TargetLowering &TLI, 05689 const DataLayout *TD) const { 05690 if (CallOperandVal == 0) return MVT::Other; 05691 05692 if (isa<BasicBlock>(CallOperandVal)) 05693 return TLI.getPointerTy(); 05694 05695 llvm::Type *OpTy = CallOperandVal->getType(); 05696 05697 // FIXME: code duplicated from TargetLowering::ParseConstraints(). 05698 // If this is an indirect operand, the operand is a pointer to the 05699 // accessed type. 05700 if (isIndirect) { 05701 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); 05702 if (!PtrTy) 05703 report_fatal_error("Indirect operand for inline asm not a pointer!"); 05704 OpTy = PtrTy->getElementType(); 05705 } 05706 05707 // Look for vector wrapped in a struct. e.g. { <16 x i8> }. 05708 if (StructType *STy = dyn_cast<StructType>(OpTy)) 05709 if (STy->getNumElements() == 1) 05710 OpTy = STy->getElementType(0); 05711 05712 // If OpTy is not a single value, it may be a struct/union that we 05713 // can tile with integers. 05714 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 05715 unsigned BitSize = TD->getTypeSizeInBits(OpTy); 05716 switch (BitSize) { 05717 default: break; 05718 case 1: 05719 case 8: 05720 case 16: 05721 case 32: 05722 case 64: 05723 case 128: 05724 OpTy = IntegerType::get(Context, BitSize); 05725 break; 05726 } 05727 } 05728 05729 return TLI.getValueType(OpTy, true); 05730 } 05731 }; 05732 05733 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; 05734 05735 } // end anonymous namespace 05736 05737 /// GetRegistersForValue - Assign registers (virtual or physical) for the 05738 /// specified operand. We prefer to assign virtual registers, to allow the 05739 /// register allocator to handle the assignment process. However, if the asm 05740 /// uses features that we can't model on machineinstrs, we have SDISel do the 05741 /// allocation. This produces generally horrible, but correct, code. 05742 /// 05743 /// OpInfo describes the operand. 05744 /// 05745 static void GetRegistersForValue(SelectionDAG &DAG, 05746 const TargetLowering &TLI, 05747 DebugLoc DL, 05748 SDISelAsmOperandInfo &OpInfo) { 05749 LLVMContext &Context = *DAG.getContext(); 05750 05751 MachineFunction &MF = DAG.getMachineFunction(); 05752 SmallVector<unsigned, 4> Regs; 05753 05754 // If this is a constraint for a single physreg, or a constraint for a 05755 // register class, find it. 05756 std::pair<unsigned, const TargetRegisterClass*> PhysReg = 05757 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 05758 OpInfo.ConstraintVT); 05759 05760 unsigned NumRegs = 1; 05761 if (OpInfo.ConstraintVT != MVT::Other) { 05762 // If this is a FP input in an integer register (or visa versa) insert a bit 05763 // cast of the input value. More generally, handle any case where the input 05764 // value disagrees with the register class we plan to stick this in. 05765 if (OpInfo.Type == InlineAsm::isInput && 05766 PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { 05767 // Try to convert to the first EVT that the reg class contains. If the 05768 // types are identical size, use a bitcast to convert (e.g. two differing 05769 // vector types). 05770 MVT RegVT = *PhysReg.second->vt_begin(); 05771 if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { 05772 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 05773 RegVT, OpInfo.CallOperand); 05774 OpInfo.ConstraintVT = RegVT; 05775 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { 05776 // If the input is a FP value and we want it in FP registers, do a 05777 // bitcast to the corresponding integer type. This turns an f64 value 05778 // into i64, which can be passed with two i32 values on a 32-bit 05779 // machine. 05780 RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); 05781 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 05782 RegVT, OpInfo.CallOperand); 05783 OpInfo.ConstraintVT = RegVT; 05784 } 05785 } 05786 05787 NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); 05788 } 05789 05790 MVT RegVT; 05791 EVT ValueVT = OpInfo.ConstraintVT; 05792 05793 // If this is a constraint for a specific physical register, like {r17}, 05794 // assign it now. 05795 if (unsigned AssignedReg = PhysReg.first) { 05796 const TargetRegisterClass *RC = PhysReg.second; 05797 if (OpInfo.ConstraintVT == MVT::Other) 05798 ValueVT = *RC->vt_begin(); 05799 05800 // Get the actual register value type. This is important, because the user 05801 // may have asked for (e.g.) the AX register in i32 type. We need to 05802 // remember that AX is actually i16 to get the right extension. 05803 RegVT = *RC->vt_begin(); 05804 05805 // This is a explicit reference to a physical register. 05806 Regs.push_back(AssignedReg); 05807 05808 // If this is an expanded reference, add the rest of the regs to Regs. 05809 if (NumRegs != 1) { 05810 TargetRegisterClass::iterator I = RC->begin(); 05811 for (; *I != AssignedReg; ++I) 05812 assert(I != RC->end() && "Didn't find reg!"); 05813 05814 // Already added the first reg. 05815 --NumRegs; ++I; 05816 for (; NumRegs; --NumRegs, ++I) { 05817 assert(I != RC->end() && "Ran out of registers to allocate!"); 05818 Regs.push_back(*I); 05819 } 05820 } 05821 05822 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 05823 return; 05824 } 05825 05826 // Otherwise, if this was a reference to an LLVM register class, create vregs 05827 // for this reference. 05828 if (const TargetRegisterClass *RC = PhysReg.second) { 05829 RegVT = *RC->vt_begin(); 05830 if (OpInfo.ConstraintVT == MVT::Other) 05831 ValueVT = RegVT; 05832 05833 // Create the appropriate number of virtual registers. 05834 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 05835 for (; NumRegs; --NumRegs) 05836 Regs.push_back(RegInfo.createVirtualRegister(RC)); 05837 05838 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 05839 return; 05840 } 05841 05842 // Otherwise, we couldn't allocate enough registers for this. 05843 } 05844 05845 /// visitInlineAsm - Handle a call to an InlineAsm object. 05846 /// 05847 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { 05848 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 05849 05850 /// ConstraintOperands - Information about all of the constraints. 05851 SDISelAsmOperandInfoVector ConstraintOperands; 05852 05853 TargetLowering::AsmOperandInfoVector 05854 TargetConstraints = TLI.ParseConstraints(CS); 05855 05856 bool hasMemory = false; 05857 05858 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 05859 unsigned ResNo = 0; // ResNo - The result number of the next output. 05860 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 05861 ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); 05862 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); 05863 05864 MVT OpVT = MVT::Other; 05865 05866 // Compute the value type for each operand. 05867 switch (OpInfo.Type) { 05868 case InlineAsm::isOutput: 05869 // Indirect outputs just consume an argument. 05870 if (OpInfo.isIndirect) { 05871 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 05872 break; 05873 } 05874 05875 // The return value of the call is this value. As such, there is no 05876 // corresponding argument. 05877 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 05878 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 05879 OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); 05880 } else { 05881 assert(ResNo == 0 && "Asm only has one result!"); 05882 OpVT = TLI.getSimpleValueType(CS.getType()); 05883 } 05884 ++ResNo; 05885 break; 05886 case InlineAsm::isInput: 05887 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 05888 break; 05889 case InlineAsm::isClobber: 05890 // Nothing to do. 05891 break; 05892 } 05893 05894 // If this is an input or an indirect output, process the call argument. 05895 // BasicBlocks are labels, currently appearing only in asm's. 05896 if (OpInfo.CallOperandVal) { 05897 if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { 05898 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); 05899 } else { 05900 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); 05901 } 05902 05903 OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). 05904 getSimpleVT(); 05905 } 05906 05907 OpInfo.ConstraintVT = OpVT; 05908 05909 // Indirect operand accesses access memory. 05910 if (OpInfo.isIndirect) 05911 hasMemory = true; 05912 else { 05913 for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { 05914 TargetLowering::ConstraintType 05915 CType = TLI.getConstraintType(OpInfo.Codes[j]); 05916 if (CType == TargetLowering::C_Memory) { 05917 hasMemory = true; 05918 break; 05919 } 05920 } 05921 } 05922 } 05923 05924 SDValue Chain, Flag; 05925 05926 // We won't need to flush pending loads if this asm doesn't touch 05927 // memory and is nonvolatile. 05928 if (hasMemory || IA->hasSideEffects()) 05929 Chain = getRoot(); 05930 else 05931 Chain = DAG.getRoot(); 05932 05933 // Second pass over the constraints: compute which constraint option to use 05934 // and assign registers to constraints that want a specific physreg. 05935 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 05936 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 05937 05938 // If this is an output operand with a matching input operand, look up the 05939 // matching input. If their types mismatch, e.g. one is an integer, the 05940 // other is floating point, or their sizes are different, flag it as an 05941 // error. 05942 if (OpInfo.hasMatchingInput()) { 05943 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 05944 05945 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 05946 std::pair<unsigned, const TargetRegisterClass*> MatchRC = 05947 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 05948 OpInfo.ConstraintVT); 05949 std::pair<unsigned, const TargetRegisterClass*> InputRC = 05950 TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, 05951 Input.ConstraintVT); 05952 if ((OpInfo.ConstraintVT.isInteger() != 05953 Input.ConstraintVT.isInteger()) || 05954 (MatchRC.second != InputRC.second)) { 05955 report_fatal_error("Unsupported asm: input constraint" 05956 " with a matching output constraint of" 05957 " incompatible type!"); 05958 } 05959 Input.ConstraintVT = OpInfo.ConstraintVT; 05960 } 05961 } 05962 05963 // Compute the constraint code and ConstraintType to use. 05964 TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); 05965 05966 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 05967 OpInfo.Type == InlineAsm::isClobber) 05968 continue; 05969 05970 // If this is a memory input, and if the operand is not indirect, do what we 05971 // need to to provide an address for the memory input. 05972 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 05973 !OpInfo.isIndirect) { 05974 assert((OpInfo.isMultipleAlternative || 05975 (OpInfo.Type == InlineAsm::isInput)) && 05976 "Can only indirectify direct input operands!"); 05977 05978 // Memory operands really want the address of the value. If we don't have 05979 // an indirect input, put it in the constpool if we can, otherwise spill 05980 // it to a stack slot. 05981 // TODO: This isn't quite right. We need to handle these according to 05982 // the addressing mode that the constraint wants. Also, this may take 05983 // an additional register for the computation and we don't want that 05984 // either. 05985 05986 // If the operand is a float, integer, or vector constant, spill to a 05987 // constant pool entry to get its address. 05988 const Value *OpVal = OpInfo.CallOperandVal; 05989 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || 05990 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { 05991 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), 05992 TLI.getPointerTy()); 05993 } else { 05994 // Otherwise, create a stack slot and emit a store to it before the 05995 // asm. 05996 Type *Ty = OpVal->getType(); 05997 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 05998 unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); 05999 MachineFunction &MF = DAG.getMachineFunction(); 06000 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 06001 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); 06002 Chain = DAG.getStore(Chain, getCurDebugLoc(), 06003 OpInfo.CallOperand, StackSlot, 06004 MachinePointerInfo::getFixedStack(SSFI), 06005 false, false, 0); 06006 OpInfo.CallOperand = StackSlot; 06007 } 06008 06009 // There is no longer a Value* corresponding to this operand. 06010 OpInfo.CallOperandVal = 0; 06011 06012 // It is now an indirect operand. 06013 OpInfo.isIndirect = true; 06014 } 06015 06016 // If this constraint is for a specific register, allocate it before 06017 // anything else. 06018 if (OpInfo.ConstraintType == TargetLowering::C_Register) 06019 GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); 06020 } 06021 06022 // Second pass - Loop over all of the operands, assigning virtual or physregs 06023 // to register class operands. 06024 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 06025 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 06026 06027 // C_Register operands have already been allocated, Other/Memory don't need 06028 // to be. 06029 if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) 06030 GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); 06031 } 06032 06033 // AsmNodeOperands - The operands for the ISD::INLINEASM node. 06034 std::vector<SDValue> AsmNodeOperands; 06035 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain 06036 AsmNodeOperands.push_back( 06037 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 06038 TLI.getPointerTy())); 06039 06040 // If we have a !srcloc metadata node associated with it, we want to attach 06041 // this to the ultimately generated inline asm machineinstr. To do this, we 06042 // pass in the third operand as this (potentially null) inline asm MDNode. 06043 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); 06044 AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); 06045 06046 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore 06047 // bits as operand 3. 06048 unsigned ExtraInfo = 0; 06049 if (IA->hasSideEffects()) 06050 ExtraInfo |= InlineAsm::Extra_HasSideEffects; 06051 if (IA->isAlignStack()) 06052 ExtraInfo |= InlineAsm::Extra_IsAlignStack; 06053 // Set the asm dialect. 06054 ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; 06055 06056 // Determine if this InlineAsm MayLoad or MayStore based on the constraints. 06057 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 06058 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 06059 06060 // Compute the constraint code and ConstraintType to use. 06061 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 06062 06063 // Ideally, we would only check against memory constraints. However, the 06064 // meaning of an other constraint can be target-specific and we can't easily 06065 // reason about it. Therefore, be conservative and set MayLoad/MayStore 06066 // for other constriants as well. 06067 if (OpInfo.ConstraintType == TargetLowering::C_Memory || 06068 OpInfo.ConstraintType == TargetLowering::C_Other) { 06069 if (OpInfo.Type == InlineAsm::isInput) 06070 ExtraInfo |= InlineAsm::Extra_MayLoad; 06071 else if (OpInfo.Type == InlineAsm::isOutput) 06072 ExtraInfo |= InlineAsm::Extra_MayStore; 06073 else if (OpInfo.Type == InlineAsm::isClobber) 06074 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); 06075 } 06076 } 06077 06078 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, 06079 TLI.getPointerTy())); 06080 06081 // Loop over all of the inputs, copying the operand values into the 06082 // appropriate registers and processing the output regs. 06083 RegsForValue RetValRegs; 06084 06085 // IndirectStoresToEmit - The set of stores to emit after the inline asm node. 06086 std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; 06087 06088 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 06089 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 06090 06091 switch (OpInfo.Type) { 06092 case InlineAsm::isOutput: { 06093 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && 06094 OpInfo.ConstraintType != TargetLowering::C_Register) { 06095 // Memory output, or 'other' output (e.g. 'X' constraint). 06096 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); 06097 06098 // Add information to the INLINEASM node to know about this output. 06099 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 06100 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, 06101 TLI.getPointerTy())); 06102 AsmNodeOperands.push_back(OpInfo.CallOperand); 06103 break; 06104 } 06105 06106 // Otherwise, this is a register or register class output. 06107 06108 // Copy the output from the appropriate register. Find a register that 06109 // we can use. 06110 if (OpInfo.AssignedRegs.Regs.empty()) { 06111 LLVMContext &Ctx = *DAG.getContext(); 06112 Ctx.emitError(CS.getInstruction(), 06113 "couldn't allocate output register for constraint '" + 06114 Twine(OpInfo.ConstraintCode) + "'"); 06115 break; 06116 } 06117 06118 // If this is an indirect operand, store through the pointer after the 06119 // asm. 06120 if (OpInfo.isIndirect) { 06121 IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, 06122 OpInfo.CallOperandVal)); 06123 } else { 06124 // This is the result value of the call. 06125 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 06126 // Concatenate this output onto the outputs list. 06127 RetValRegs.append(OpInfo.AssignedRegs); 06128 } 06129 06130 // Add information to the INLINEASM node to know that this register is 06131 // set. 06132 OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? 06133 InlineAsm::Kind_RegDefEarlyClobber : 06134 InlineAsm::Kind_RegDef, 06135 false, 06136 0, 06137 DAG, 06138 AsmNodeOperands); 06139 break; 06140 } 06141 case InlineAsm::isInput: { 06142 SDValue InOperandVal = OpInfo.CallOperand; 06143 06144 if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? 06145 // If this is required to match an output register we have already set, 06146 // just use its register. 06147 unsigned OperandNo = OpInfo.getMatchedOperand(); 06148 06149 // Scan until we find the definition we already emitted of this operand. 06150 // When we find it, create a RegsForValue operand. 06151 unsigned CurOp = InlineAsm::Op_FirstOperand; 06152 for (; OperandNo; --OperandNo) { 06153 // Advance to the next operand. 06154 unsigned OpFlag = 06155 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 06156 assert((InlineAsm::isRegDefKind(OpFlag) || 06157 InlineAsm::isRegDefEarlyClobberKind(OpFlag) || 06158 InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); 06159 CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; 06160 } 06161 06162 unsigned OpFlag = 06163 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 06164 if (InlineAsm::isRegDefKind(OpFlag) || 06165 InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { 06166 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. 06167 if (OpInfo.isIndirect) { 06168 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c 06169 LLVMContext &Ctx = *DAG.getContext(); 06170 Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" 06171 " don't know how to handle tied " 06172 "indirect register inputs"); 06173 report_fatal_error("Cannot handle indirect register inputs!"); 06174 } 06175 06176 RegsForValue MatchedRegs; 06177 MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); 06178 MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); 06179 MatchedRegs.RegVTs.push_back(RegVT); 06180 MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); 06181 for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); 06182 i != e; ++i) { 06183 if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) 06184 MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); 06185 else { 06186 LLVMContext &Ctx = *DAG.getContext(); 06187 Ctx.emitError(CS.getInstruction(), "inline asm error: This value" 06188 " type register class is not natively supported!"); 06189 report_fatal_error("inline asm error: This value type register " 06190 "class is not natively supported!"); 06191 } 06192 } 06193 // Use the produced MatchedRegs object to 06194 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), 06195 Chain, &Flag, CS.getInstruction()); 06196 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, 06197 true, OpInfo.getMatchedOperand(), 06198 DAG, AsmNodeOperands); 06199 break; 06200 } 06201 06202 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); 06203 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && 06204 "Unexpected number of operands"); 06205 // Add information to the INLINEASM node to know about this input. 06206 // See InlineAsm.h isUseOperandTiedToDef. 06207 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, 06208 OpInfo.getMatchedOperand()); 06209 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, 06210 TLI.getPointerTy())); 06211 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); 06212 break; 06213 } 06214 06215 // Treat indirect 'X' constraint as memory. 06216 if (OpInfo.ConstraintType == TargetLowering::C_Other && 06217 OpInfo.isIndirect) 06218 OpInfo.ConstraintType = TargetLowering::C_Memory; 06219 06220 if (OpInfo.ConstraintType == TargetLowering::C_Other) { 06221 std::vector<SDValue> Ops; 06222 TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, 06223 Ops, DAG); 06224 if (Ops.empty()) { 06225 LLVMContext &Ctx = *DAG.getContext(); 06226 Ctx.emitError(CS.getInstruction(), 06227 "invalid operand for inline asm constraint '" + 06228 Twine(OpInfo.ConstraintCode) + "'"); 06229 break; 06230 } 06231 06232 // Add information to the INLINEASM node to know about this input. 06233 unsigned ResOpType = 06234 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); 06235 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 06236 TLI.getPointerTy())); 06237 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); 06238 break; 06239 } 06240 06241 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { 06242 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); 06243 assert(InOperandVal.getValueType() == TLI.getPointerTy() && 06244 "Memory operands expect pointer values"); 06245 06246 // Add information to the INLINEASM node to know about this input. 06247 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 06248 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 06249 TLI.getPointerTy())); 06250 AsmNodeOperands.push_back(InOperandVal); 06251 break; 06252 } 06253 06254 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || 06255 OpInfo.ConstraintType == TargetLowering::C_Register) && 06256 "Unknown constraint type!"); 06257 06258 // TODO: Support this. 06259 if (OpInfo.isIndirect) { 06260 LLVMContext &Ctx = *DAG.getContext(); 06261 Ctx.emitError(CS.getInstruction(), 06262 "Don't know how to handle indirect register inputs yet " 06263 "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); 06264 break; 06265 } 06266 06267 // Copy the input into the appropriate registers. 06268 if (OpInfo.AssignedRegs.Regs.empty()) { 06269 LLVMContext &Ctx = *DAG.getContext(); 06270 Ctx.emitError(CS.getInstruction(), 06271 "couldn't allocate input reg for constraint '" + 06272 Twine(OpInfo.ConstraintCode) + "'"); 06273 break; 06274 } 06275 06276 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), 06277 Chain, &Flag, CS.getInstruction()); 06278 06279 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, 06280 DAG, AsmNodeOperands); 06281 break; 06282 } 06283 case InlineAsm::isClobber: { 06284 // Add the clobbered value to the operand list, so that the register 06285 // allocator is aware that the physreg got clobbered. 06286 if (!OpInfo.AssignedRegs.Regs.empty()) 06287 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, 06288 false, 0, DAG, 06289 AsmNodeOperands); 06290 break; 06291 } 06292 } 06293 } 06294 06295 // Finish up input operands. Set the input chain and add the flag last. 06296 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 06297 if (Flag.getNode()) AsmNodeOperands.push_back(Flag); 06298 06299 Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), 06300 DAG.getVTList(MVT::Other, MVT::Glue), 06301 &AsmNodeOperands[0], AsmNodeOperands.size()); 06302 Flag = Chain.getValue(1); 06303 06304 // If this asm returns a register value, copy the result from that register 06305 // and set it as the value of the call. 06306 if (!RetValRegs.Regs.empty()) { 06307 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), 06308 Chain, &Flag, CS.getInstruction()); 06309 06310 // FIXME: Why don't we do this for inline asms with MRVs? 06311 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { 06312 EVT ResultType = TLI.getValueType(CS.getType()); 06313 06314 // If any of the results of the inline asm is a vector, it may have the 06315 // wrong width/num elts. This can happen for register classes that can 06316 // contain multiple different value types. The preg or vreg allocated may 06317 // not have the same VT as was expected. Convert it to the right type 06318 // with bit_convert. 06319 if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { 06320 Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), 06321 ResultType, Val); 06322 06323 } else if (ResultType != Val.getValueType() && 06324 ResultType.isInteger() && Val.getValueType().isInteger()) { 06325 // If a result value was tied to an input value, the computed result may 06326 // have a wider width than the expected result. Extract the relevant 06327 // portion. 06328 Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); 06329 } 06330 06331 assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); 06332 } 06333 06334 setValue(CS.getInstruction(), Val); 06335 // Don't need to use this as a chain in this case. 06336 if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) 06337 return; 06338 } 06339 06340 std::vector<std::pair<SDValue, const Value *> > StoresToEmit; 06341 06342 // Process indirect outputs, first output all of the flagged copies out of 06343 // physregs. 06344 for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { 06345 RegsForValue &OutRegs = IndirectStoresToEmit[i].first; 06346 const Value *Ptr = IndirectStoresToEmit[i].second; 06347 SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), 06348 Chain, &Flag, IA); 06349 StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); 06350 } 06351 06352 // Emit the non-flagged stores from the physregs. 06353 SmallVector<SDValue, 8> OutChains; 06354 for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { 06355 SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),