LLVM API Documentation
00001 // 00002 // The LLVM Compiler Infrastructure 00003 // 00004 // This file is distributed under the University of Illinois Open Source 00005 // License. See LICENSE.TXT for details. 00006 // 00007 //===----------------------------------------------------------------------===// 00008 // 00009 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 00010 // selection DAG. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "NVPTXISelLowering.h" 00015 #include "NVPTX.h" 00016 #include "NVPTXTargetMachine.h" 00017 #include "NVPTXTargetObjectFile.h" 00018 #include "NVPTXUtilities.h" 00019 #include "llvm/CodeGen/Analysis.h" 00020 #include "llvm/CodeGen/MachineFrameInfo.h" 00021 #include "llvm/CodeGen/MachineFunction.h" 00022 #include "llvm/CodeGen/MachineInstrBuilder.h" 00023 #include "llvm/CodeGen/MachineRegisterInfo.h" 00024 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 00025 #include "llvm/IR/DerivedTypes.h" 00026 #include "llvm/IR/Function.h" 00027 #include "llvm/IR/GlobalValue.h" 00028 #include "llvm/IR/IntrinsicInst.h" 00029 #include "llvm/IR/Intrinsics.h" 00030 #include "llvm/IR/Module.h" 00031 #include "llvm/MC/MCSectionELF.h" 00032 #include "llvm/Support/CallSite.h" 00033 #include "llvm/Support/CommandLine.h" 00034 #include "llvm/Support/Debug.h" 00035 #include "llvm/Support/ErrorHandling.h" 00036 #include "llvm/Support/raw_ostream.h" 00037 #include <sstream> 00038 00039 #undef DEBUG_TYPE 00040 #define DEBUG_TYPE "nvptx-lower" 00041 00042 using namespace llvm; 00043 00044 static unsigned int uniqueCallSite = 0; 00045 00046 static cl::opt<bool> sched4reg( 00047 "nvptx-sched4reg", 00048 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 00049 00050 static bool IsPTXVectorType(MVT VT) { 00051 switch (VT.SimpleTy) { 00052 default: 00053 return false; 00054 case MVT::v2i8: 00055 case MVT::v4i8: 00056 case MVT::v2i16: 00057 case MVT::v4i16: 00058 case MVT::v2i32: 00059 case MVT::v4i32: 00060 case MVT::v2i64: 00061 case MVT::v2f32: 00062 case MVT::v4f32: 00063 case MVT::v2f64: 00064 return true; 00065 } 00066 } 00067 00068 // NVPTXTargetLowering Constructor. 00069 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) 00070 : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), 00071 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 00072 00073 // always lower memset, memcpy, and memmove intrinsics to load/store 00074 // instructions, rather 00075 // then generating calls to memset, mempcy or memmove. 00076 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 00077 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 00078 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 00079 00080 setBooleanContents(ZeroOrNegativeOneBooleanContent); 00081 00082 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 00083 // condition branches. 00084 setJumpIsExpensive(true); 00085 00086 // By default, use the Source scheduling 00087 if (sched4reg) 00088 setSchedulingPreference(Sched::RegPressure); 00089 else 00090 setSchedulingPreference(Sched::Source); 00091 00092 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 00093 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); 00094 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 00095 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 00096 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 00097 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 00098 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 00099 00100 // Operations not directly supported by NVPTX. 00101 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 00102 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 00103 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 00104 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 00105 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 00106 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 00107 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 00108 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 00109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); 00110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 00111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 00112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 00113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 00114 00115 if (nvptxSubtarget.hasROT64()) { 00116 setOperationAction(ISD::ROTL, MVT::i64, Legal); 00117 setOperationAction(ISD::ROTR, MVT::i64, Legal); 00118 } else { 00119 setOperationAction(ISD::ROTL, MVT::i64, Expand); 00120 setOperationAction(ISD::ROTR, MVT::i64, Expand); 00121 } 00122 if (nvptxSubtarget.hasROT32()) { 00123 setOperationAction(ISD::ROTL, MVT::i32, Legal); 00124 setOperationAction(ISD::ROTR, MVT::i32, Legal); 00125 } else { 00126 setOperationAction(ISD::ROTL, MVT::i32, Expand); 00127 setOperationAction(ISD::ROTR, MVT::i32, Expand); 00128 } 00129 00130 setOperationAction(ISD::ROTL, MVT::i16, Expand); 00131 setOperationAction(ISD::ROTR, MVT::i16, Expand); 00132 setOperationAction(ISD::ROTL, MVT::i8, Expand); 00133 setOperationAction(ISD::ROTR, MVT::i8, Expand); 00134 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 00135 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 00136 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 00137 00138 // Indirect branch is not supported. 00139 // This also disables Jump Table creation. 00140 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 00141 setOperationAction(ISD::BRIND, MVT::Other, Expand); 00142 00143 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 00144 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 00145 00146 // We want to legalize constant related memmove and memcopy 00147 // intrinsics. 00148 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 00149 00150 // Turn FP extload into load/fextend 00151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 00152 // Turn FP truncstore into trunc + store. 00153 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 00154 00155 // PTX does not support load / store predicate registers 00156 setOperationAction(ISD::LOAD, MVT::i1, Custom); 00157 setOperationAction(ISD::STORE, MVT::i1, Custom); 00158 00159 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 00160 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 00161 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 00162 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 00163 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 00164 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 00165 00166 // This is legal in NVPTX 00167 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 00168 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 00169 00170 // TRAP can be lowered to PTX trap 00171 setOperationAction(ISD::TRAP, MVT::Other, Legal); 00172 00173 // Register custom handling for vector loads/stores 00174 for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; 00175 ++i) { 00176 MVT VT = (MVT::SimpleValueType) i; 00177 if (IsPTXVectorType(VT)) { 00178 setOperationAction(ISD::LOAD, VT, Custom); 00179 setOperationAction(ISD::STORE, VT, Custom); 00180 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 00181 } 00182 } 00183 00184 // Now deduce the information based on the above mentioned 00185 // actions 00186 computeRegisterProperties(); 00187 } 00188 00189 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 00190 switch (Opcode) { 00191 default: 00192 return 0; 00193 case NVPTXISD::CALL: 00194 return "NVPTXISD::CALL"; 00195 case NVPTXISD::RET_FLAG: 00196 return "NVPTXISD::RET_FLAG"; 00197 case NVPTXISD::Wrapper: 00198 return "NVPTXISD::Wrapper"; 00199 case NVPTXISD::NVBuiltin: 00200 return "NVPTXISD::NVBuiltin"; 00201 case NVPTXISD::DeclareParam: 00202 return "NVPTXISD::DeclareParam"; 00203 case NVPTXISD::DeclareScalarParam: 00204 return "NVPTXISD::DeclareScalarParam"; 00205 case NVPTXISD::DeclareRet: 00206 return "NVPTXISD::DeclareRet"; 00207 case NVPTXISD::DeclareRetParam: 00208 return "NVPTXISD::DeclareRetParam"; 00209 case NVPTXISD::PrintCall: 00210 return "NVPTXISD::PrintCall"; 00211 case NVPTXISD::LoadParam: 00212 return "NVPTXISD::LoadParam"; 00213 case NVPTXISD::StoreParam: 00214 return "NVPTXISD::StoreParam"; 00215 case NVPTXISD::StoreParamS32: 00216 return "NVPTXISD::StoreParamS32"; 00217 case NVPTXISD::StoreParamU32: 00218 return "NVPTXISD::StoreParamU32"; 00219 case NVPTXISD::MoveToParam: 00220 return "NVPTXISD::MoveToParam"; 00221 case NVPTXISD::CallArgBegin: 00222 return "NVPTXISD::CallArgBegin"; 00223 case NVPTXISD::CallArg: 00224 return "NVPTXISD::CallArg"; 00225 case NVPTXISD::LastCallArg: 00226 return "NVPTXISD::LastCallArg"; 00227 case NVPTXISD::CallArgEnd: 00228 return "NVPTXISD::CallArgEnd"; 00229 case NVPTXISD::CallVoid: 00230 return "NVPTXISD::CallVoid"; 00231 case NVPTXISD::CallVal: 00232 return "NVPTXISD::CallVal"; 00233 case NVPTXISD::CallSymbol: 00234 return "NVPTXISD::CallSymbol"; 00235 case NVPTXISD::Prototype: 00236 return "NVPTXISD::Prototype"; 00237 case NVPTXISD::MoveParam: 00238 return "NVPTXISD::MoveParam"; 00239 case NVPTXISD::MoveRetval: 00240 return "NVPTXISD::MoveRetval"; 00241 case NVPTXISD::MoveToRetval: 00242 return "NVPTXISD::MoveToRetval"; 00243 case NVPTXISD::StoreRetval: 00244 return "NVPTXISD::StoreRetval"; 00245 case NVPTXISD::PseudoUseParam: 00246 return "NVPTXISD::PseudoUseParam"; 00247 case NVPTXISD::RETURN: 00248 return "NVPTXISD::RETURN"; 00249 case NVPTXISD::CallSeqBegin: 00250 return "NVPTXISD::CallSeqBegin"; 00251 case NVPTXISD::CallSeqEnd: 00252 return "NVPTXISD::CallSeqEnd"; 00253 case NVPTXISD::LoadV2: 00254 return "NVPTXISD::LoadV2"; 00255 case NVPTXISD::LoadV4: 00256 return "NVPTXISD::LoadV4"; 00257 case NVPTXISD::LDGV2: 00258 return "NVPTXISD::LDGV2"; 00259 case NVPTXISD::LDGV4: 00260 return "NVPTXISD::LDGV4"; 00261 case NVPTXISD::LDUV2: 00262 return "NVPTXISD::LDUV2"; 00263 case NVPTXISD::LDUV4: 00264 return "NVPTXISD::LDUV4"; 00265 case NVPTXISD::StoreV2: 00266 return "NVPTXISD::StoreV2"; 00267 case NVPTXISD::StoreV4: 00268 return "NVPTXISD::StoreV4"; 00269 } 00270 } 00271 00272 bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const { 00273 return VT == MVT::i1; 00274 } 00275 00276 SDValue 00277 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 00278 DebugLoc dl = Op.getDebugLoc(); 00279 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 00280 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 00281 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 00282 } 00283 00284 std::string NVPTXTargetLowering::getPrototype( 00285 Type *retTy, const ArgListTy &Args, 00286 const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { 00287 00288 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 00289 00290 std::stringstream O; 00291 O << "prototype_" << uniqueCallSite << " : .callprototype "; 00292 00293 if (retTy->getTypeID() == Type::VoidTyID) 00294 O << "()"; 00295 else { 00296 O << "("; 00297 if (isABI) { 00298 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { 00299 unsigned size = 0; 00300 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 00301 size = ITy->getBitWidth(); 00302 if (size < 32) 00303 size = 32; 00304 } else { 00305 assert(retTy->isFloatingPointTy() && 00306 "Floating point type expected here"); 00307 size = retTy->getPrimitiveSizeInBits(); 00308 } 00309 00310 O << ".param .b" << size << " _"; 00311 } else if (isa<PointerType>(retTy)) 00312 O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 00313 else { 00314 if ((retTy->getTypeID() == Type::StructTyID) || 00315 isa<VectorType>(retTy)) { 00316 SmallVector<EVT, 16> vtparts; 00317 ComputeValueVTs(*this, retTy, vtparts); 00318 unsigned totalsz = 0; 00319 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00320 unsigned elems = 1; 00321 EVT elemtype = vtparts[i]; 00322 if (vtparts[i].isVector()) { 00323 elems = vtparts[i].getVectorNumElements(); 00324 elemtype = vtparts[i].getVectorElementType(); 00325 } 00326 for (unsigned j = 0, je = elems; j != je; ++j) { 00327 unsigned sz = elemtype.getSizeInBits(); 00328 if (elemtype.isInteger() && (sz < 8)) 00329 sz = 8; 00330 totalsz += sz / 8; 00331 } 00332 } 00333 O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; 00334 } else { 00335 assert(false && "Unknown return type"); 00336 } 00337 } 00338 } else { 00339 SmallVector<EVT, 16> vtparts; 00340 ComputeValueVTs(*this, retTy, vtparts); 00341 unsigned idx = 0; 00342 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00343 unsigned elems = 1; 00344 EVT elemtype = vtparts[i]; 00345 if (vtparts[i].isVector()) { 00346 elems = vtparts[i].getVectorNumElements(); 00347 elemtype = vtparts[i].getVectorElementType(); 00348 } 00349 00350 for (unsigned j = 0, je = elems; j != je; ++j) { 00351 unsigned sz = elemtype.getSizeInBits(); 00352 if (elemtype.isInteger() && (sz < 32)) 00353 sz = 32; 00354 O << ".reg .b" << sz << " _"; 00355 if (j < je - 1) 00356 O << ", "; 00357 ++idx; 00358 } 00359 if (i < e - 1) 00360 O << ", "; 00361 } 00362 } 00363 O << ") "; 00364 } 00365 O << "_ ("; 00366 00367 bool first = true; 00368 MVT thePointerTy = getPointerTy(); 00369 00370 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 00371 const Type *Ty = Args[i].Ty; 00372 if (!first) { 00373 O << ", "; 00374 } 00375 first = false; 00376 00377 if (Outs[i].Flags.isByVal() == false) { 00378 unsigned sz = 0; 00379 if (isa<IntegerType>(Ty)) { 00380 sz = cast<IntegerType>(Ty)->getBitWidth(); 00381 if (sz < 32) 00382 sz = 32; 00383 } else if (isa<PointerType>(Ty)) 00384 sz = thePointerTy.getSizeInBits(); 00385 else 00386 sz = Ty->getPrimitiveSizeInBits(); 00387 if (isABI) 00388 O << ".param .b" << sz << " "; 00389 else 00390 O << ".reg .b" << sz << " "; 00391 O << "_"; 00392 continue; 00393 } 00394 const PointerType *PTy = dyn_cast<PointerType>(Ty); 00395 assert(PTy && "Param with byval attribute should be a pointer type"); 00396 Type *ETy = PTy->getElementType(); 00397 00398 if (isABI) { 00399 unsigned align = Outs[i].Flags.getByValAlign(); 00400 unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 00401 O << ".param .align " << align << " .b8 "; 00402 O << "_"; 00403 O << "[" << sz << "]"; 00404 continue; 00405 } else { 00406 SmallVector<EVT, 16> vtparts; 00407 ComputeValueVTs(*this, ETy, vtparts); 00408 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00409 unsigned elems = 1; 00410 EVT elemtype = vtparts[i]; 00411 if (vtparts[i].isVector()) { 00412 elems = vtparts[i].getVectorNumElements(); 00413 elemtype = vtparts[i].getVectorElementType(); 00414 } 00415 00416 for (unsigned j = 0, je = elems; j != je; ++j) { 00417 unsigned sz = elemtype.getSizeInBits(); 00418 if (elemtype.isInteger() && (sz < 32)) 00419 sz = 32; 00420 O << ".reg .b" << sz << " "; 00421 O << "_"; 00422 if (j < je - 1) 00423 O << ", "; 00424 } 00425 if (i < e - 1) 00426 O << ", "; 00427 } 00428 continue; 00429 } 00430 } 00431 O << ");"; 00432 return O.str(); 00433 } 00434 00435 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 00436 SmallVectorImpl<SDValue> &InVals) const { 00437 SelectionDAG &DAG = CLI.DAG; 00438 DebugLoc &dl = CLI.DL; 00439 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 00440 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 00441 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 00442 SDValue Chain = CLI.Chain; 00443 SDValue Callee = CLI.Callee; 00444 bool &isTailCall = CLI.IsTailCall; 00445 ArgListTy &Args = CLI.Args; 00446 Type *retTy = CLI.RetTy; 00447 ImmutableCallSite *CS = CLI.CS; 00448 00449 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 00450 00451 SDValue tempChain = Chain; 00452 Chain = 00453 DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true)); 00454 SDValue InFlag = Chain.getValue(1); 00455 00456 assert((Outs.size() == Args.size()) && 00457 "Unexpected number of arguments to function call"); 00458 unsigned paramCount = 0; 00459 // Declare the .params or .reg need to pass values 00460 // to the function 00461 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 00462 EVT VT = Outs[i].VT; 00463 00464 if (Outs[i].Flags.isByVal() == false) { 00465 // Plain scalar 00466 // for ABI, declare .param .b<size> .param<n>; 00467 // for nonABI, declare .reg .b<size> .param<n>; 00468 unsigned isReg = 1; 00469 if (isABI) 00470 isReg = 0; 00471 unsigned sz = VT.getSizeInBits(); 00472 if (VT.isInteger() && (sz < 32)) 00473 sz = 32; 00474 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00475 SDValue DeclareParamOps[] = { Chain, 00476 DAG.getConstant(paramCount, MVT::i32), 00477 DAG.getConstant(sz, MVT::i32), 00478 DAG.getConstant(isReg, MVT::i32), InFlag }; 00479 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 00480 DeclareParamOps, 5); 00481 InFlag = Chain.getValue(1); 00482 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00483 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 00484 DAG.getConstant(0, MVT::i32), OutVals[i], 00485 InFlag }; 00486 00487 unsigned opcode = NVPTXISD::StoreParam; 00488 if (isReg) 00489 opcode = NVPTXISD::MoveToParam; 00490 else { 00491 if (Outs[i].Flags.isZExt()) 00492 opcode = NVPTXISD::StoreParamU32; 00493 else if (Outs[i].Flags.isSExt()) 00494 opcode = NVPTXISD::StoreParamS32; 00495 } 00496 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); 00497 00498 InFlag = Chain.getValue(1); 00499 ++paramCount; 00500 continue; 00501 } 00502 // struct or vector 00503 SmallVector<EVT, 16> vtparts; 00504 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 00505 assert(PTy && "Type of a byval parameter should be pointer"); 00506 ComputeValueVTs(*this, PTy->getElementType(), vtparts); 00507 00508 if (isABI) { 00509 // declare .param .align 16 .b8 .param<n>[<size>]; 00510 unsigned sz = Outs[i].Flags.getByValSize(); 00511 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00512 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we 00513 // don't need to 00514 // worry about natural alignment or not. See TargetLowering::LowerCallTo() 00515 SDValue DeclareParamOps[] = { 00516 Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), 00517 DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 00518 InFlag 00519 }; 00520 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 00521 DeclareParamOps, 5); 00522 InFlag = Chain.getValue(1); 00523 unsigned curOffset = 0; 00524 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 00525 unsigned elems = 1; 00526 EVT elemtype = vtparts[j]; 00527 if (vtparts[j].isVector()) { 00528 elems = vtparts[j].getVectorNumElements(); 00529 elemtype = vtparts[j].getVectorElementType(); 00530 } 00531 for (unsigned k = 0, ke = elems; k != ke; ++k) { 00532 unsigned sz = elemtype.getSizeInBits(); 00533 if (elemtype.isInteger() && (sz < 8)) 00534 sz = 8; 00535 SDValue srcAddr = 00536 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], 00537 DAG.getConstant(curOffset, getPointerTy())); 00538 SDValue theVal = 00539 DAG.getLoad(elemtype, dl, tempChain, srcAddr, 00540 MachinePointerInfo(), false, false, false, 0); 00541 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00542 SDValue CopyParamOps[] = { Chain, 00543 DAG.getConstant(paramCount, MVT::i32), 00544 DAG.getConstant(curOffset, MVT::i32), 00545 theVal, InFlag }; 00546 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 00547 CopyParamOps, 5); 00548 InFlag = Chain.getValue(1); 00549 curOffset += sz / 8; 00550 } 00551 } 00552 ++paramCount; 00553 continue; 00554 } 00555 // Non-abi, struct or vector 00556 // Declare a bunch or .reg .b<size> .param<n> 00557 unsigned curOffset = 0; 00558 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 00559 unsigned elems = 1; 00560 EVT elemtype = vtparts[j]; 00561 if (vtparts[j].isVector()) { 00562 elems = vtparts[j].getVectorNumElements(); 00563 elemtype = vtparts[j].getVectorElementType(); 00564 } 00565 for (unsigned k = 0, ke = elems; k != ke; ++k) { 00566 unsigned sz = elemtype.getSizeInBits(); 00567 if (elemtype.isInteger() && (sz < 32)) 00568 sz = 32; 00569 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00570 SDValue DeclareParamOps[] = { Chain, 00571 DAG.getConstant(paramCount, MVT::i32), 00572 DAG.getConstant(sz, MVT::i32), 00573 DAG.getConstant(1, MVT::i32), InFlag }; 00574 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 00575 DeclareParamOps, 5); 00576 InFlag = Chain.getValue(1); 00577 SDValue srcAddr = 00578 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], 00579 DAG.getConstant(curOffset, getPointerTy())); 00580 SDValue theVal = 00581 DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), 00582 false, false, false, 0); 00583 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00584 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 00585 DAG.getConstant(0, MVT::i32), theVal, 00586 InFlag }; 00587 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, 00588 CopyParamOps, 5); 00589 InFlag = Chain.getValue(1); 00590 ++paramCount; 00591 } 00592 } 00593 } 00594 00595 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 00596 unsigned retAlignment = 0; 00597 00598 // Handle Result 00599 unsigned retCount = 0; 00600 if (Ins.size() > 0) { 00601 SmallVector<EVT, 16> resvtparts; 00602 ComputeValueVTs(*this, retTy, resvtparts); 00603 00604 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or 00605 // individual .reg .b<size> func_retval<0..> for non ABI 00606 unsigned resultsz = 0; 00607 for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { 00608 unsigned elems = 1; 00609 EVT elemtype = resvtparts[i]; 00610 if (resvtparts[i].isVector()) { 00611 elems = resvtparts[i].getVectorNumElements(); 00612 elemtype = resvtparts[i].getVectorElementType(); 00613 } 00614 for (unsigned j = 0, je = elems; j != je; ++j) { 00615 unsigned sz = elemtype.getSizeInBits(); 00616 if (isABI == false) { 00617 if (elemtype.isInteger() && (sz < 32)) 00618 sz = 32; 00619 } else { 00620 if (elemtype.isInteger() && (sz < 8)) 00621 sz = 8; 00622 } 00623 if (isABI == false) { 00624 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00625 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), 00626 DAG.getConstant(sz, MVT::i32), 00627 DAG.getConstant(retCount, MVT::i32), 00628 InFlag }; 00629 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 00630 DeclareRetOps, 5); 00631 InFlag = Chain.getValue(1); 00632 ++retCount; 00633 } 00634 resultsz += sz; 00635 } 00636 } 00637 if (isABI) { 00638 if (retTy->isPrimitiveType() || retTy->isIntegerTy() || 00639 retTy->isPointerTy()) { 00640 // Scalar needs to be at least 32bit wide 00641 if (resultsz < 32) 00642 resultsz = 32; 00643 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00644 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 00645 DAG.getConstant(resultsz, MVT::i32), 00646 DAG.getConstant(0, MVT::i32), InFlag }; 00647 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 00648 DeclareRetOps, 5); 00649 InFlag = Chain.getValue(1); 00650 } else { 00651 if (Func) { // direct call 00652 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) 00653 retAlignment = getDataLayout()->getABITypeAlignment(retTy); 00654 } else { // indirect call 00655 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); 00656 if (!llvm::getAlign(*CallI, 0, retAlignment)) 00657 retAlignment = getDataLayout()->getABITypeAlignment(retTy); 00658 } 00659 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00660 SDValue DeclareRetOps[] = { Chain, 00661 DAG.getConstant(retAlignment, MVT::i32), 00662 DAG.getConstant(resultsz / 8, MVT::i32), 00663 DAG.getConstant(0, MVT::i32), InFlag }; 00664 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 00665 DeclareRetOps, 5); 00666 InFlag = Chain.getValue(1); 00667 } 00668 } 00669 } 00670 00671 if (!Func) { 00672 // This is indirect function call case : PTX requires a prototype of the 00673 // form 00674 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 00675 // to be emitted, and the label has to used as the last arg of call 00676 // instruction. 00677 // The prototype is embedded in a string and put as the operand for an 00678 // INLINEASM SDNode. 00679 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00680 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); 00681 const char *asmstr = nvTM->getManagedStrPool() 00682 ->getManagedString(proto_string.c_str())->c_str(); 00683 SDValue InlineAsmOps[] = { 00684 Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()), 00685 DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag 00686 }; 00687 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); 00688 InFlag = Chain.getValue(1); 00689 } 00690 // Op to just print "call" 00691 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00692 SDValue PrintCallOps[] = { 00693 Chain, 00694 DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), 00695 InFlag 00696 }; 00697 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 00698 dl, PrintCallVTs, PrintCallOps, 3); 00699 InFlag = Chain.getValue(1); 00700 00701 // Ops to print out the function name 00702 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00703 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 00704 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); 00705 InFlag = Chain.getValue(1); 00706 00707 // Ops to print out the param list 00708 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00709 SDValue CallArgBeginOps[] = { Chain, InFlag }; 00710 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 00711 CallArgBeginOps, 2); 00712 InFlag = Chain.getValue(1); 00713 00714 for (unsigned i = 0, e = paramCount; i != e; ++i) { 00715 unsigned opcode; 00716 if (i == (e - 1)) 00717 opcode = NVPTXISD::LastCallArg; 00718 else 00719 opcode = NVPTXISD::CallArg; 00720 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00721 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 00722 DAG.getConstant(i, MVT::i32), InFlag }; 00723 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); 00724 InFlag = Chain.getValue(1); 00725 } 00726 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00727 SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 00728 InFlag }; 00729 Chain = 00730 DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); 00731 InFlag = Chain.getValue(1); 00732 00733 if (!Func) { 00734 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 00735 SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 00736 InFlag }; 00737 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); 00738 InFlag = Chain.getValue(1); 00739 } 00740 00741 // Generate loads from param memory/moves from registers for result 00742 if (Ins.size() > 0) { 00743 if (isABI) { 00744 unsigned resoffset = 0; 00745 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 00746 unsigned sz = Ins[i].VT.getSizeInBits(); 00747 if (Ins[i].VT.isInteger() && (sz < 8)) 00748 sz = 8; 00749 EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; 00750 SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 00751 DAG.getConstant(resoffset, MVT::i32), InFlag }; 00752 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, 00753 LoadRetOps, array_lengthof(LoadRetOps)); 00754 Chain = retval.getValue(1); 00755 InFlag = retval.getValue(2); 00756 InVals.push_back(retval); 00757 resoffset += sz / 8; 00758 } 00759 } else { 00760 SmallVector<EVT, 16> resvtparts; 00761 ComputeValueVTs(*this, retTy, resvtparts); 00762 00763 assert(Ins.size() == resvtparts.size() && 00764 "Unexpected number of return values in non-ABI case"); 00765 unsigned paramNum = 0; 00766 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 00767 assert(EVT(Ins[i].VT) == resvtparts[i] && 00768 "Unexpected EVT type in non-ABI case"); 00769 unsigned numelems = 1; 00770 EVT elemtype = Ins[i].VT; 00771 if (Ins[i].VT.isVector()) { 00772 numelems = Ins[i].VT.getVectorNumElements(); 00773 elemtype = Ins[i].VT.getVectorElementType(); 00774 } 00775 std::vector<SDValue> tempRetVals; 00776 for (unsigned j = 0; j < numelems; ++j) { 00777 EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; 00778 SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), 00779 DAG.getConstant(paramNum, MVT::i32), 00780 InFlag }; 00781 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, 00782 MoveRetOps, array_lengthof(MoveRetOps)); 00783 Chain = retval.getValue(1); 00784 InFlag = retval.getValue(2); 00785 tempRetVals.push_back(retval); 00786 ++paramNum; 00787 } 00788 if (Ins[i].VT.isVector()) 00789 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, 00790 &tempRetVals[0], tempRetVals.size())); 00791 else 00792 InVals.push_back(tempRetVals[0]); 00793 } 00794 } 00795 } 00796 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 00797 DAG.getIntPtrConstant(uniqueCallSite + 1, true), 00798 InFlag); 00799 uniqueCallSite++; 00800 00801 // set isTailCall to false for now, until we figure out how to express 00802 // tail call optimization in PTX 00803 isTailCall = false; 00804 return Chain; 00805 } 00806 00807 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 00808 // (see LegalizeDAG.cpp). This is slow and uses local memory. 00809 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 00810 SDValue 00811 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 00812 SDNode *Node = Op.getNode(); 00813 DebugLoc dl = Node->getDebugLoc(); 00814 SmallVector<SDValue, 8> Ops; 00815 unsigned NumOperands = Node->getNumOperands(); 00816 for (unsigned i = 0; i < NumOperands; ++i) { 00817 SDValue SubOp = Node->getOperand(i); 00818 EVT VVT = SubOp.getNode()->getValueType(0); 00819 EVT EltVT = VVT.getVectorElementType(); 00820 unsigned NumSubElem = VVT.getVectorNumElements(); 00821 for (unsigned j = 0; j < NumSubElem; ++j) { 00822 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 00823 DAG.getIntPtrConstant(j))); 00824 } 00825 } 00826 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], 00827 Ops.size()); 00828 } 00829 00830 SDValue 00831 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 00832 switch (Op.getOpcode()) { 00833 case ISD::RETURNADDR: 00834 return SDValue(); 00835 case ISD::FRAMEADDR: 00836 return SDValue(); 00837 case ISD::GlobalAddress: 00838 return LowerGlobalAddress(Op, DAG); 00839 case ISD::INTRINSIC_W_CHAIN: 00840 return Op; 00841 case ISD::BUILD_VECTOR: 00842 case ISD::EXTRACT_SUBVECTOR: 00843 return Op; 00844 case ISD::CONCAT_VECTORS: 00845 return LowerCONCAT_VECTORS(Op, DAG); 00846 case ISD::STORE: 00847 return LowerSTORE(Op, DAG); 00848 case ISD::LOAD: 00849 return LowerLOAD(Op, DAG); 00850 default: 00851 llvm_unreachable("Custom lowering not defined for operation"); 00852 } 00853 } 00854 00855 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 00856 if (Op.getValueType() == MVT::i1) 00857 return LowerLOADi1(Op, DAG); 00858 else 00859 return SDValue(); 00860 } 00861 00862 // v = ld i1* addr 00863 // => 00864 // v1 = ld i8* addr 00865 // v = trunc v1 to i1 00866 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 00867 SDNode *Node = Op.getNode(); 00868 LoadSDNode *LD = cast<LoadSDNode>(Node); 00869 DebugLoc dl = Node->getDebugLoc(); 00870 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 00871 assert(Node->getValueType(0) == MVT::i1 && 00872 "Custom lowering for i1 load only"); 00873 SDValue newLD = 00874 DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), 00875 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 00876 LD->isInvariant(), LD->getAlignment()); 00877 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 00878 // The legalizer (the caller) is expecting two values from the legalized 00879 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 00880 // in LegalizeDAG.cpp which also uses MergeValues. 00881 SDValue Ops[] = { result, LD->getChain() }; 00882 return DAG.getMergeValues(Ops, 2, dl); 00883 } 00884 00885 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 00886 EVT ValVT = Op.getOperand(1).getValueType(); 00887 if (ValVT == MVT::i1) 00888 return LowerSTOREi1(Op, DAG); 00889 else if (ValVT.isVector()) 00890 return LowerSTOREVector(Op, DAG); 00891 else 00892 return SDValue(); 00893 } 00894 00895 SDValue 00896 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 00897 SDNode *N = Op.getNode(); 00898 SDValue Val = N->getOperand(1); 00899 DebugLoc DL = N->getDebugLoc(); 00900 EVT ValVT = Val.getValueType(); 00901 00902 if (ValVT.isVector()) { 00903 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 00904 // legal. We can (and should) split that into 2 stores of <2 x double> here 00905 // but I'm leaving that as a TODO for now. 00906 if (!ValVT.isSimple()) 00907 return SDValue(); 00908 switch (ValVT.getSimpleVT().SimpleTy) { 00909 default: 00910 return SDValue(); 00911 case MVT::v2i8: 00912 case MVT::v2i16: 00913 case MVT::v2i32: 00914 case MVT::v2i64: 00915 case MVT::v2f32: 00916 case MVT::v2f64: 00917 case MVT::v4i8: 00918 case MVT::v4i16: 00919 case MVT::v4i32: 00920 case MVT::v4f32: 00921 // This is a "native" vector type 00922 break; 00923 } 00924 00925 unsigned Opcode = 0; 00926 EVT EltVT = ValVT.getVectorElementType(); 00927 unsigned NumElts = ValVT.getVectorNumElements(); 00928 00929 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 00930 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 00931 // stored type to i16 and propogate the "real" type as the memory type. 00932 bool NeedExt = false; 00933 if (EltVT.getSizeInBits() < 16) 00934 NeedExt = true; 00935 00936 switch (NumElts) { 00937 default: 00938 return SDValue(); 00939 case 2: 00940 Opcode = NVPTXISD::StoreV2; 00941 break; 00942 case 4: { 00943 Opcode = NVPTXISD::StoreV4; 00944 break; 00945 } 00946 } 00947 00948 SmallVector<SDValue, 8> Ops; 00949 00950 // First is the chain 00951 Ops.push_back(N->getOperand(0)); 00952 00953 // Then the split values 00954 for (unsigned i = 0; i < NumElts; ++i) { 00955 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 00956 DAG.getIntPtrConstant(i)); 00957 if (NeedExt) 00958 // ANY_EXTEND is correct here since the store will only look at the 00959 // lower-order bits anyway. 00960 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 00961 Ops.push_back(ExtVal); 00962 } 00963 00964 // Then any remaining arguments 00965 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { 00966 Ops.push_back(N->getOperand(i)); 00967 } 00968 00969 MemSDNode *MemSD = cast<MemSDNode>(N); 00970 00971 SDValue NewSt = DAG.getMemIntrinsicNode( 00972 Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), 00973 MemSD->getMemoryVT(), MemSD->getMemOperand()); 00974 00975 //return DCI.CombineTo(N, NewSt, true); 00976 return NewSt; 00977 } 00978 00979 return SDValue(); 00980 } 00981 00982 // st i1 v, addr 00983 // => 00984 // v1 = zxt v to i8 00985 // st i8, addr 00986 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 00987 SDNode *Node = Op.getNode(); 00988 DebugLoc dl = Node->getDebugLoc(); 00989 StoreSDNode *ST = cast<StoreSDNode>(Node); 00990 SDValue Tmp1 = ST->getChain(); 00991 SDValue Tmp2 = ST->getBasePtr(); 00992 SDValue Tmp3 = ST->getValue(); 00993 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 00994 unsigned Alignment = ST->getAlignment(); 00995 bool isVolatile = ST->isVolatile(); 00996 bool isNonTemporal = ST->isNonTemporal(); 00997 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); 00998 SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), 00999 isVolatile, isNonTemporal, Alignment); 01000 return Result; 01001 } 01002 01003 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 01004 int idx, EVT v) const { 01005 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 01006 std::stringstream suffix; 01007 suffix << idx; 01008 *name += suffix.str(); 01009 return DAG.getTargetExternalSymbol(name->c_str(), v); 01010 } 01011 01012 SDValue 01013 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 01014 return getExtSymb(DAG, ".PARAM", idx, v); 01015 } 01016 01017 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 01018 return getExtSymb(DAG, ".HLPPARAM", idx); 01019 } 01020 01021 // Check to see if the kernel argument is image*_t or sampler_t 01022 01023 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 01024 static const char *const specialTypes[] = { "struct._image2d_t", 01025 "struct._image3d_t", 01026 "struct._sampler_t" }; 01027 01028 const Type *Ty = arg->getType(); 01029 const PointerType *PTy = dyn_cast<PointerType>(Ty); 01030 01031 if (!PTy) 01032 return false; 01033 01034 if (!context) 01035 return false; 01036 01037 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 01038 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 01039 01040 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 01041 if (TypeName == specialTypes[i]) 01042 return true; 01043 01044 return false; 01045 } 01046 01047 SDValue NVPTXTargetLowering::LowerFormalArguments( 01048 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 01049 const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, 01050 SmallVectorImpl<SDValue> &InVals) const { 01051 MachineFunction &MF = DAG.getMachineFunction(); 01052 const DataLayout *TD = getDataLayout(); 01053 01054 const Function *F = MF.getFunction(); 01055 const AttributeSet &PAL = F->getAttributes(); 01056 01057 SDValue Root = DAG.getRoot(); 01058 std::vector<SDValue> OutChains; 01059 01060 bool isKernel = llvm::isKernelFunction(*F); 01061 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 01062 01063 std::vector<Type *> argTypes; 01064 std::vector<const Argument *> theArgs; 01065 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 01066 I != E; ++I) { 01067 theArgs.push_back(I); 01068 argTypes.push_back(I->getType()); 01069 } 01070 //assert(argTypes.size() == Ins.size() && 01071 // "Ins types and function types did not match"); 01072 01073 int idx = 0; 01074 for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { 01075 Type *Ty = argTypes[i]; 01076 EVT ObjectVT = getValueType(Ty); 01077 //assert(ObjectVT == Ins[i].VT && 01078 // "Ins type did not match function type"); 01079 01080 // If the kernel argument is image*_t or sampler_t, convert it to 01081 // a i32 constant holding the parameter position. This can later 01082 // matched in the AsmPrinter to output the correct mangled name. 01083 if (isImageOrSamplerVal( 01084 theArgs[i], 01085 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 01086 : 0))) { 01087 assert(isKernel && "Only kernels can have image/sampler params"); 01088 InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 01089 continue; 01090 } 01091 01092 if (theArgs[i]->use_empty()) { 01093 // argument is dead 01094 if (ObjectVT.isVector()) { 01095 EVT EltVT = ObjectVT.getVectorElementType(); 01096 unsigned NumElts = ObjectVT.getVectorNumElements(); 01097 for (unsigned vi = 0; vi < NumElts; ++vi) { 01098 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); 01099 } 01100 } else { 01101 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); 01102 } 01103 continue; 01104 } 01105 01106 // In the following cases, assign a node order of "idx+1" 01107 // to newly created nodes. The SDNOdes for params have to 01108 // appear in the same order as their order of appearance 01109 // in the original function. "idx+1" holds that order. 01110 if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { 01111 if (ObjectVT.isVector()) { 01112 unsigned NumElts = ObjectVT.getVectorNumElements(); 01113 EVT EltVT = ObjectVT.getVectorElementType(); 01114 unsigned Offset = 0; 01115 for (unsigned vi = 0; vi < NumElts; ++vi) { 01116 SDValue A = getParamSymbol(DAG, idx, getPointerTy()); 01117 SDValue B = DAG.getIntPtrConstant(Offset); 01118 SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), 01119 //getParamSymbol(DAG, idx, EltVT), 01120 //DAG.getConstant(Offset, getPointerTy())); 01121 A, B); 01122 Value *SrcValue = Constant::getNullValue(PointerType::get( 01123 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 01124 SDValue Ld = DAG.getLoad( 01125 EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, 01126 false, 01127 TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 01128 Offset += EltVT.getStoreSizeInBits() / 8; 01129 InVals.push_back(Ld); 01130 } 01131 continue; 01132 } 01133 01134 // A plain scalar. 01135 if (isABI || isKernel) { 01136 // If ABI, load from the param symbol 01137 SDValue Arg = getParamSymbol(DAG, idx); 01138 // Conjure up a value that we can get the address space from. 01139 // FIXME: Using a constant here is a hack. 01140 Value *srcValue = Constant::getNullValue( 01141 PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), 01142 llvm::ADDRESS_SPACE_PARAM)); 01143 SDValue p = DAG.getLoad( 01144 ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, 01145 false, 01146 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 01147 if (p.getNode()) 01148 DAG.AssignOrdering(p.getNode(), idx + 1); 01149 InVals.push_back(p); 01150 } else { 01151 // If no ABI, just move the param symbol 01152 SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); 01153 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 01154 if (p.getNode()) 01155 DAG.AssignOrdering(p.getNode(), idx + 1); 01156 InVals.push_back(p); 01157 } 01158 continue; 01159 } 01160 01161 // Param has ByVal attribute 01162 if (isABI || isKernel) { 01163 // Return MoveParam(param symbol). 01164 // Ideally, the param symbol can be returned directly, 01165 // but when SDNode builder decides to use it in a CopyToReg(), 01166 // machine instruction fails because TargetExternalSymbol 01167 // (not lowered) is target dependent, and CopyToReg assumes 01168 // the source is lowered. 01169 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 01170 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 01171 if (p.getNode()) 01172 DAG.AssignOrdering(p.getNode(), idx + 1); 01173 if (isKernel) 01174 InVals.push_back(p); 01175 else { 01176 SDValue p2 = DAG.getNode( 01177 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 01178 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 01179 InVals.push_back(p2); 01180 } 01181 } else { 01182 // Have to move a set of param symbols to registers and 01183 // store them locally and return the local pointer in InVals 01184 const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); 01185 assert(elemPtrType && "Byval parameter should be a pointer type"); 01186 Type *elemType = elemPtrType->getElementType(); 01187 // Compute the constituent parts 01188 SmallVector<EVT, 16> vtparts; 01189 SmallVector<uint64_t, 16> offsets; 01190 ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); 01191 unsigned totalsize = 0; 01192 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) 01193 totalsize += vtparts[j].getStoreSizeInBits(); 01194 SDValue localcopy = DAG.getFrameIndex( 01195 MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), 01196 getPointerTy()); 01197 unsigned sizesofar = 0; 01198 std::vector<SDValue> theChains; 01199 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 01200 unsigned numElems = 1; 01201 if (vtparts[j].isVector()) 01202 numElems = vtparts[j].getVectorNumElements(); 01203 for (unsigned k = 0, ke = numElems; k != ke; ++k) { 01204 EVT tmpvt = vtparts[j]; 01205 if (tmpvt.isVector()) 01206 tmpvt = tmpvt.getVectorElementType(); 01207 SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, 01208 getParamSymbol(DAG, idx, tmpvt)); 01209 SDValue addr = 01210 DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, 01211 DAG.getConstant(sizesofar, getPointerTy())); 01212 theChains.push_back(DAG.getStore( 01213 Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); 01214 sizesofar += tmpvt.getStoreSizeInBits() / 8; 01215 ++idx; 01216 } 01217 } 01218 --idx; 01219 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], 01220 theChains.size()); 01221 InVals.push_back(localcopy); 01222 } 01223 } 01224 01225 // Clang will check explicit VarArg and issue error if any. However, Clang 01226 // will let code with 01227 // implicit var arg like f() pass. 01228 // We treat this case as if the arg list is empty. 01229 //if (F.isVarArg()) { 01230 // assert(0 && "VarArg not supported yet!"); 01231 //} 01232 01233 if (!OutChains.empty()) 01234 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], 01235 OutChains.size())); 01236 01237 return Chain; 01238 } 01239 01240 SDValue NVPTXTargetLowering::LowerReturn( 01241 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 01242 const SmallVectorImpl<ISD::OutputArg> &Outs, 01243 const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, 01244 SelectionDAG &DAG) const { 01245 01246 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 01247 01248 unsigned sizesofar = 0; 01249 unsigned idx = 0; 01250 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 01251 SDValue theVal = OutVals[i]; 01252 EVT theValType = theVal.getValueType(); 01253 unsigned numElems = 1; 01254 if (theValType.isVector()) 01255 numElems = theValType.getVectorNumElements(); 01256 for (unsigned j = 0, je = numElems; j != je; ++j) { 01257 SDValue tmpval = theVal; 01258 if (theValType.isVector()) 01259 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 01260 theValType.getVectorElementType(), tmpval, 01261 DAG.getIntPtrConstant(j)); 01262 Chain = DAG.getNode( 01263 isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, 01264 MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), 01265 tmpval); 01266 if (theValType.isVector()) 01267 sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; 01268 else 01269 sizesofar += theValType.getStoreSizeInBits() / 8; 01270 ++idx; 01271 } 01272 } 01273 01274 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 01275 } 01276 01277 void NVPTXTargetLowering::LowerAsmOperandForConstraint( 01278 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 01279 SelectionDAG &DAG) const { 01280 if (Constraint.length() > 1) 01281 return; 01282 else 01283 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 01284 } 01285 01286 // NVPTX suuport vector of legal types of any length in Intrinsics because the 01287 // NVPTX specific type legalizer 01288 // will legalize them to the PTX supported length. 01289 bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 01290 if (isTypeLegal(VT)) 01291 return true; 01292 if (VT.isVector()) { 01293 MVT eVT = VT.getVectorElementType(); 01294 if (isTypeLegal(eVT)) 01295 return true; 01296 } 01297 return false; 01298 } 01299 01300 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 01301 // TgtMemIntrinsic 01302 // because we need the information that is only available in the "Value" type 01303 // of destination 01304 // pointer. In particular, the address space information. 01305 bool NVPTXTargetLowering::getTgtMemIntrinsic( 01306 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 01307 switch (Intrinsic) { 01308 default: 01309 return false; 01310 01311 case Intrinsic::nvvm_atomic_load_add_f32: 01312 Info.opc = ISD::INTRINSIC_W_CHAIN; 01313 Info.memVT = MVT::f32; 01314 Info.ptrVal = I.getArgOperand(0); 01315 Info.offset = 0; 01316 Info.vol = 0; 01317 Info.readMem = true; 01318 Info.writeMem = true; 01319 Info.align = 0; 01320 return true; 01321 01322 case Intrinsic::nvvm_atomic_load_inc_32: 01323 case Intrinsic::nvvm_atomic_load_dec_32: 01324 Info.opc = ISD::INTRINSIC_W_CHAIN; 01325 Info.memVT = MVT::i32; 01326 Info.ptrVal = I.getArgOperand(0); 01327 Info.offset = 0; 01328 Info.vol = 0; 01329 Info.readMem = true; 01330 Info.writeMem = true; 01331 Info.align = 0; 01332 return true; 01333 01334 case Intrinsic::nvvm_ldu_global_i: 01335 case Intrinsic::nvvm_ldu_global_f: 01336 case Intrinsic::nvvm_ldu_global_p: 01337 01338 Info.opc = ISD::INTRINSIC_W_CHAIN; 01339 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 01340 Info.memVT = MVT::i32; 01341 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) 01342 Info.memVT = getPointerTy(); 01343 else 01344 Info.memVT = MVT::f32; 01345 Info.ptrVal = I.getArgOperand(0); 01346 Info.offset = 0; 01347 Info.vol = 0; 01348 Info.readMem = true; 01349 Info.writeMem = false; 01350 Info.align = 0; 01351 return true; 01352 01353 } 01354 return false; 01355 } 01356 01357 /// isLegalAddressingMode - Return true if the addressing mode represented 01358 /// by AM is legal for this target, for a load/store of the specified type. 01359 /// Used to guide target specific optimizations, like loop strength reduction 01360 /// (LoopStrengthReduce.cpp) and memory optimization for address mode 01361 /// (CodeGenPrepare.cpp) 01362 bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 01363 Type *Ty) const { 01364 01365 // AddrMode - This represents an addressing mode of: 01366 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 01367 // 01368 // The legal address modes are 01369 // - [avar] 01370 // - [areg] 01371 // - [areg+immoff] 01372 // - [immAddr] 01373 01374 if (AM.BaseGV) { 01375 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 01376 return false; 01377 return true; 01378 } 01379 01380 switch (AM.Scale) { 01381 case 0: // "r", "r+i" or "i" is allowed 01382 break; 01383 case 1: 01384 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 01385 return false; 01386 // Otherwise we have r+i. 01387 break; 01388 default: 01389 // No scale > 1 is allowed 01390 return false; 01391 } 01392 return true; 01393 } 01394 01395 //===----------------------------------------------------------------------===// 01396 // NVPTX Inline Assembly Support 01397 //===----------------------------------------------------------------------===// 01398 01399 /// getConstraintType - Given a constraint letter, return the type of 01400 /// constraint it is for this target. 01401 NVPTXTargetLowering::ConstraintType 01402 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 01403 if (Constraint.size() == 1) { 01404 switch (Constraint[0]) { 01405 default: 01406 break; 01407 case 'r': 01408 case 'h': 01409 case 'c': 01410 case 'l': 01411 case 'f': 01412 case 'd': 01413 case '0': 01414 case 'N': 01415 return C_RegisterClass; 01416 } 01417 } 01418 return TargetLowering::getConstraintType(Constraint); 01419 } 01420 01421 std::pair<unsigned, const TargetRegisterClass *> 01422 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 01423 EVT VT) const { 01424 if (Constraint.size() == 1) { 01425 switch (Constraint[0]) { 01426 case 'c': 01427 return std::make_pair(0U, &NVPTX::Int8RegsRegClass); 01428 case 'h': 01429 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 01430 case 'r': 01431 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 01432 case 'l': 01433 case 'N': 01434 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 01435 case 'f': 01436 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 01437 case 'd': 01438 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 01439 } 01440 } 01441 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 01442 } 01443 01444 /// getFunctionAlignment - Return the Log2 alignment of this function. 01445 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 01446 return 4; 01447 } 01448 01449 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 01450 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 01451 SmallVectorImpl<SDValue> &Results) { 01452 EVT ResVT = N->getValueType(0); 01453 DebugLoc DL = N->getDebugLoc(); 01454 01455 assert(ResVT.isVector() && "Vector load must have vector type"); 01456 01457 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 01458 // legal. We can (and should) split that into 2 loads of <2 x double> here 01459 // but I'm leaving that as a TODO for now. 01460 assert(ResVT.isSimple() && "Can only handle simple types"); 01461 switch (ResVT.getSimpleVT().SimpleTy) { 01462 default: 01463 return; 01464 case MVT::v2i8: 01465 case MVT::v2i16: 01466 case MVT::v2i32: 01467 case MVT::v2i64: 01468 case MVT::v2f32: 01469 case MVT::v2f64: 01470 case MVT::v4i8: 01471 case MVT::v4i16: 01472 case MVT::v4i32: 01473 case MVT::v4f32: 01474 // This is a "native" vector type 01475 break; 01476 } 01477 01478 EVT EltVT = ResVT.getVectorElementType(); 01479 unsigned NumElts = ResVT.getVectorNumElements(); 01480 01481 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 01482 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 01483 // loaded type to i16 and propogate the "real" type as the memory type. 01484 bool NeedTrunc = false; 01485 if (EltVT.getSizeInBits() < 16) { 01486 EltVT = MVT::i16; 01487 NeedTrunc = true; 01488 } 01489 01490 unsigned Opcode = 0; 01491 SDVTList LdResVTs; 01492 01493 switch (NumElts) { 01494 default: 01495 return; 01496 case 2: 01497 Opcode = NVPTXISD::LoadV2; 01498 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 01499 break; 01500 case 4: { 01501 Opcode = NVPTXISD::LoadV4; 01502 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 01503 LdResVTs = DAG.getVTList(ListVTs, 5); 01504 break; 01505 } 01506 } 01507 01508 SmallVector<SDValue, 8> OtherOps; 01509 01510 // Copy regular operands 01511 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01512 OtherOps.push_back(N->getOperand(i)); 01513 01514 LoadSDNode *LD = cast<LoadSDNode>(N); 01515 01516 // The select routine does not have access to the LoadSDNode instance, so 01517 // pass along the extension information 01518 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 01519 01520 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], 01521 OtherOps.size(), LD->getMemoryVT(), 01522 LD->getMemOperand()); 01523 01524 SmallVector<SDValue, 4> ScalarRes; 01525 01526 for (unsigned i = 0; i < NumElts; ++i) { 01527 SDValue Res = NewLD.getValue(i); 01528 if (NeedTrunc) 01529 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 01530 ScalarRes.push_back(Res); 01531 } 01532 01533 SDValue LoadChain = NewLD.getValue(NumElts); 01534 01535 SDValue BuildVec = 01536 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 01537 01538 Results.push_back(BuildVec); 01539 Results.push_back(LoadChain); 01540 } 01541 01542 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 01543 SmallVectorImpl<SDValue> &Results) { 01544 SDValue Chain = N->getOperand(0); 01545 SDValue Intrin = N->getOperand(1); 01546 DebugLoc DL = N->getDebugLoc(); 01547 01548 // Get the intrinsic ID 01549 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 01550 switch (IntrinNo) { 01551 default: 01552 return; 01553 case Intrinsic::nvvm_ldg_global_i: 01554 case Intrinsic::nvvm_ldg_global_f: 01555 case Intrinsic::nvvm_ldg_global_p: 01556 case Intrinsic::nvvm_ldu_global_i: 01557 case Intrinsic::nvvm_ldu_global_f: 01558 case Intrinsic::nvvm_ldu_global_p: { 01559 EVT ResVT = N->getValueType(0); 01560 01561 if (ResVT.isVector()) { 01562 // Vector LDG/LDU 01563 01564 unsigned NumElts = ResVT.getVectorNumElements(); 01565 EVT EltVT = ResVT.getVectorElementType(); 01566 01567 // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. 01568 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 01569 // loaded type to i16 and propogate the "real" type as the memory type. 01570 bool NeedTrunc = false; 01571 if (EltVT.getSizeInBits() < 16) { 01572 EltVT = MVT::i16; 01573 NeedTrunc = true; 01574 } 01575 01576 unsigned Opcode = 0; 01577 SDVTList LdResVTs; 01578 01579 switch (NumElts) { 01580 default: 01581 return; 01582 case 2: 01583 switch (IntrinNo) { 01584 default: 01585 return; 01586 case Intrinsic::nvvm_ldg_global_i: 01587 case Intrinsic::nvvm_ldg_global_f: 01588 case Intrinsic::nvvm_ldg_global_p: 01589 Opcode = NVPTXISD::LDGV2; 01590 break; 01591 case Intrinsic::nvvm_ldu_global_i: 01592 case Intrinsic::nvvm_ldu_global_f: 01593 case Intrinsic::nvvm_ldu_global_p: 01594 Opcode = NVPTXISD::LDUV2; 01595 break; 01596 } 01597 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 01598 break; 01599 case 4: { 01600 switch (IntrinNo) { 01601 default: 01602 return; 01603 case Intrinsic::nvvm_ldg_global_i: 01604 case Intrinsic::nvvm_ldg_global_f: 01605 case Intrinsic::nvvm_ldg_global_p: 01606 Opcode = NVPTXISD::LDGV4; 01607 break; 01608 case Intrinsic::nvvm_ldu_global_i: 01609 case Intrinsic::nvvm_ldu_global_f: 01610 case Intrinsic::nvvm_ldu_global_p: 01611 Opcode = NVPTXISD::LDUV4; 01612 break; 01613 } 01614 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 01615 LdResVTs = DAG.getVTList(ListVTs, 5); 01616 break; 01617 } 01618 } 01619 01620 SmallVector<SDValue, 8> OtherOps; 01621 01622 // Copy regular operands 01623 01624 OtherOps.push_back(Chain); // Chain 01625 // Skip operand 1 (intrinsic ID) 01626 // Others 01627 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) 01628 OtherOps.push_back(N->getOperand(i)); 01629 01630 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 01631 01632 SDValue NewLD = DAG.getMemIntrinsicNode( 01633 Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), 01634 MemSD->getMemoryVT(), MemSD->getMemOperand()); 01635 01636 SmallVector<SDValue, 4> ScalarRes; 01637 01638 for (unsigned i = 0; i < NumElts; ++i) { 01639 SDValue Res = NewLD.getValue(i); 01640 if (NeedTrunc) 01641 Res = 01642 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 01643 ScalarRes.push_back(Res); 01644 } 01645 01646 SDValue LoadChain = NewLD.getValue(NumElts); 01647 01648 SDValue BuildVec = 01649 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 01650 01651 Results.push_back(BuildVec); 01652 Results.push_back(LoadChain); 01653 } else { 01654 // i8 LDG/LDU 01655 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 01656 "Custom handling of non-i8 ldu/ldg?"); 01657 01658 // Just copy all operands as-is 01659 SmallVector<SDValue, 4> Ops; 01660 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01661 Ops.push_back(N->getOperand(i)); 01662 01663 // Force output to i16 01664 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 01665 01666 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 01667 01668 // We make sure the memory type is i8, which will be used during isel 01669 // to select the proper instruction. 01670 SDValue NewLD = 01671 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], 01672 Ops.size(), MVT::i8, MemSD->getMemOperand()); 01673 01674 Results.push_back(NewLD.getValue(0)); 01675 Results.push_back(NewLD.getValue(1)); 01676 } 01677 } 01678 } 01679 } 01680 01681 void NVPTXTargetLowering::ReplaceNodeResults( 01682 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 01683 switch (N->getOpcode()) { 01684 default: 01685 report_fatal_error("Unhandled custom legalization"); 01686 case ISD::LOAD: 01687 ReplaceLoadVector(N, DAG, Results); 01688 return; 01689 case ISD::INTRINSIC_W_CHAIN: 01690 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 01691 return; 01692 } 01693 }