LLVM API Documentation

NVPTXISelLowering.cpp
Go to the documentation of this file.
00001 //
00002 //                     The LLVM Compiler Infrastructure
00003 //
00004 // This file is distributed under the University of Illinois Open Source
00005 // License. See LICENSE.TXT for details.
00006 //
00007 //===----------------------------------------------------------------------===//
00008 //
00009 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
00010 // selection DAG.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "NVPTXISelLowering.h"
00015 #include "NVPTX.h"
00016 #include "NVPTXTargetMachine.h"
00017 #include "NVPTXTargetObjectFile.h"
00018 #include "NVPTXUtilities.h"
00019 #include "llvm/CodeGen/Analysis.h"
00020 #include "llvm/CodeGen/MachineFrameInfo.h"
00021 #include "llvm/CodeGen/MachineFunction.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineRegisterInfo.h"
00024 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00025 #include "llvm/IR/DerivedTypes.h"
00026 #include "llvm/IR/Function.h"
00027 #include "llvm/IR/GlobalValue.h"
00028 #include "llvm/IR/IntrinsicInst.h"
00029 #include "llvm/IR/Intrinsics.h"
00030 #include "llvm/IR/Module.h"
00031 #include "llvm/MC/MCSectionELF.h"
00032 #include "llvm/Support/CallSite.h"
00033 #include "llvm/Support/CommandLine.h"
00034 #include "llvm/Support/Debug.h"
00035 #include "llvm/Support/ErrorHandling.h"
00036 #include "llvm/Support/raw_ostream.h"
00037 #include <sstream>
00038 
00039 #undef DEBUG_TYPE
00040 #define DEBUG_TYPE "nvptx-lower"
00041 
00042 using namespace llvm;
00043 
00044 static unsigned int uniqueCallSite = 0;
00045 
00046 static cl::opt<bool> sched4reg(
00047     "nvptx-sched4reg",
00048     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
00049 
00050 static bool IsPTXVectorType(MVT VT) {
00051   switch (VT.SimpleTy) {
00052   default:
00053     return false;
00054   case MVT::v2i8:
00055   case MVT::v4i8:
00056   case MVT::v2i16:
00057   case MVT::v4i16:
00058   case MVT::v2i32:
00059   case MVT::v4i32:
00060   case MVT::v2i64:
00061   case MVT::v2f32:
00062   case MVT::v4f32:
00063   case MVT::v2f64:
00064     return true;
00065   }
00066 }
00067 
00068 // NVPTXTargetLowering Constructor.
00069 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
00070     : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
00071       nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
00072 
00073   // always lower memset, memcpy, and memmove intrinsics to load/store
00074   // instructions, rather
00075   // then generating calls to memset, mempcy or memmove.
00076   MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
00077   MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
00078   MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
00079 
00080   setBooleanContents(ZeroOrNegativeOneBooleanContent);
00081 
00082   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
00083   // condition branches.
00084   setJumpIsExpensive(true);
00085 
00086   // By default, use the Source scheduling
00087   if (sched4reg)
00088     setSchedulingPreference(Sched::RegPressure);
00089   else
00090     setSchedulingPreference(Sched::Source);
00091 
00092   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
00093   addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
00094   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
00095   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
00096   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
00097   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
00098   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
00099 
00100   // Operations not directly supported by NVPTX.
00101   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
00102   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
00103   setOperationAction(ISD::BR_CC, MVT::f64, Expand);
00104   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
00105   setOperationAction(ISD::BR_CC, MVT::i8, Expand);
00106   setOperationAction(ISD::BR_CC, MVT::i16, Expand);
00107   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
00108   setOperationAction(ISD::BR_CC, MVT::i64, Expand);
00109   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
00110   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
00111   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
00112   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
00113   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00114 
00115   if (nvptxSubtarget.hasROT64()) {
00116     setOperationAction(ISD::ROTL, MVT::i64, Legal);
00117     setOperationAction(ISD::ROTR, MVT::i64, Legal);
00118   } else {
00119     setOperationAction(ISD::ROTL, MVT::i64, Expand);
00120     setOperationAction(ISD::ROTR, MVT::i64, Expand);
00121   }
00122   if (nvptxSubtarget.hasROT32()) {
00123     setOperationAction(ISD::ROTL, MVT::i32, Legal);
00124     setOperationAction(ISD::ROTR, MVT::i32, Legal);
00125   } else {
00126     setOperationAction(ISD::ROTL, MVT::i32, Expand);
00127     setOperationAction(ISD::ROTR, MVT::i32, Expand);
00128   }
00129 
00130   setOperationAction(ISD::ROTL, MVT::i16, Expand);
00131   setOperationAction(ISD::ROTR, MVT::i16, Expand);
00132   setOperationAction(ISD::ROTL, MVT::i8, Expand);
00133   setOperationAction(ISD::ROTR, MVT::i8, Expand);
00134   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
00135   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00136   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
00137 
00138   // Indirect branch is not supported.
00139   // This also disables Jump Table creation.
00140   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
00141   setOperationAction(ISD::BRIND, MVT::Other, Expand);
00142 
00143   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00144   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00145 
00146   // We want to legalize constant related memmove and memcopy
00147   // intrinsics.
00148   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
00149 
00150   // Turn FP extload into load/fextend
00151   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
00152   // Turn FP truncstore into trunc + store.
00153   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00154 
00155   // PTX does not support load / store predicate registers
00156   setOperationAction(ISD::LOAD, MVT::i1, Custom);
00157   setOperationAction(ISD::STORE, MVT::i1, Custom);
00158 
00159   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00160   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
00161   setTruncStoreAction(MVT::i64, MVT::i1, Expand);
00162   setTruncStoreAction(MVT::i32, MVT::i1, Expand);
00163   setTruncStoreAction(MVT::i16, MVT::i1, Expand);
00164   setTruncStoreAction(MVT::i8, MVT::i1, Expand);
00165 
00166   // This is legal in NVPTX
00167   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
00168   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
00169 
00170   // TRAP can be lowered to PTX trap
00171   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00172 
00173   // Register custom handling for vector loads/stores
00174   for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
00175        ++i) {
00176     MVT VT = (MVT::SimpleValueType) i;
00177     if (IsPTXVectorType(VT)) {
00178       setOperationAction(ISD::LOAD, VT, Custom);
00179       setOperationAction(ISD::STORE, VT, Custom);
00180       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
00181     }
00182   }
00183 
00184   // Now deduce the information based on the above mentioned
00185   // actions
00186   computeRegisterProperties();
00187 }
00188 
00189 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
00190   switch (Opcode) {
00191   default:
00192     return 0;
00193   case NVPTXISD::CALL:
00194     return "NVPTXISD::CALL";
00195   case NVPTXISD::RET_FLAG:
00196     return "NVPTXISD::RET_FLAG";
00197   case NVPTXISD::Wrapper:
00198     return "NVPTXISD::Wrapper";
00199   case NVPTXISD::NVBuiltin:
00200     return "NVPTXISD::NVBuiltin";
00201   case NVPTXISD::DeclareParam:
00202     return "NVPTXISD::DeclareParam";
00203   case NVPTXISD::DeclareScalarParam:
00204     return "NVPTXISD::DeclareScalarParam";
00205   case NVPTXISD::DeclareRet:
00206     return "NVPTXISD::DeclareRet";
00207   case NVPTXISD::DeclareRetParam:
00208     return "NVPTXISD::DeclareRetParam";
00209   case NVPTXISD::PrintCall:
00210     return "NVPTXISD::PrintCall";
00211   case NVPTXISD::LoadParam:
00212     return "NVPTXISD::LoadParam";
00213   case NVPTXISD::StoreParam:
00214     return "NVPTXISD::StoreParam";
00215   case NVPTXISD::StoreParamS32:
00216     return "NVPTXISD::StoreParamS32";
00217   case NVPTXISD::StoreParamU32:
00218     return "NVPTXISD::StoreParamU32";
00219   case NVPTXISD::MoveToParam:
00220     return "NVPTXISD::MoveToParam";
00221   case NVPTXISD::CallArgBegin:
00222     return "NVPTXISD::CallArgBegin";
00223   case NVPTXISD::CallArg:
00224     return "NVPTXISD::CallArg";
00225   case NVPTXISD::LastCallArg:
00226     return "NVPTXISD::LastCallArg";
00227   case NVPTXISD::CallArgEnd:
00228     return "NVPTXISD::CallArgEnd";
00229   case NVPTXISD::CallVoid:
00230     return "NVPTXISD::CallVoid";
00231   case NVPTXISD::CallVal:
00232     return "NVPTXISD::CallVal";
00233   case NVPTXISD::CallSymbol:
00234     return "NVPTXISD::CallSymbol";
00235   case NVPTXISD::Prototype:
00236     return "NVPTXISD::Prototype";
00237   case NVPTXISD::MoveParam:
00238     return "NVPTXISD::MoveParam";
00239   case NVPTXISD::MoveRetval:
00240     return "NVPTXISD::MoveRetval";
00241   case NVPTXISD::MoveToRetval:
00242     return "NVPTXISD::MoveToRetval";
00243   case NVPTXISD::StoreRetval:
00244     return "NVPTXISD::StoreRetval";
00245   case NVPTXISD::PseudoUseParam:
00246     return "NVPTXISD::PseudoUseParam";
00247   case NVPTXISD::RETURN:
00248     return "NVPTXISD::RETURN";
00249   case NVPTXISD::CallSeqBegin:
00250     return "NVPTXISD::CallSeqBegin";
00251   case NVPTXISD::CallSeqEnd:
00252     return "NVPTXISD::CallSeqEnd";
00253   case NVPTXISD::LoadV2:
00254     return "NVPTXISD::LoadV2";
00255   case NVPTXISD::LoadV4:
00256     return "NVPTXISD::LoadV4";
00257   case NVPTXISD::LDGV2:
00258     return "NVPTXISD::LDGV2";
00259   case NVPTXISD::LDGV4:
00260     return "NVPTXISD::LDGV4";
00261   case NVPTXISD::LDUV2:
00262     return "NVPTXISD::LDUV2";
00263   case NVPTXISD::LDUV4:
00264     return "NVPTXISD::LDUV4";
00265   case NVPTXISD::StoreV2:
00266     return "NVPTXISD::StoreV2";
00267   case NVPTXISD::StoreV4:
00268     return "NVPTXISD::StoreV4";
00269   }
00270 }
00271 
00272 bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
00273   return VT == MVT::i1;
00274 }
00275 
00276 SDValue
00277 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
00278   DebugLoc dl = Op.getDebugLoc();
00279   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
00280   Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
00281   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
00282 }
00283 
00284 std::string NVPTXTargetLowering::getPrototype(
00285     Type *retTy, const ArgListTy &Args,
00286     const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
00287 
00288   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00289 
00290   std::stringstream O;
00291   O << "prototype_" << uniqueCallSite << " : .callprototype ";
00292 
00293   if (retTy->getTypeID() == Type::VoidTyID)
00294     O << "()";
00295   else {
00296     O << "(";
00297     if (isABI) {
00298       if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
00299         unsigned size = 0;
00300         if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
00301           size = ITy->getBitWidth();
00302           if (size < 32)
00303             size = 32;
00304         } else {
00305           assert(retTy->isFloatingPointTy() &&
00306                  "Floating point type expected here");
00307           size = retTy->getPrimitiveSizeInBits();
00308         }
00309 
00310         O << ".param .b" << size << " _";
00311       } else if (isa<PointerType>(retTy))
00312         O << ".param .b" << getPointerTy().getSizeInBits() << " _";
00313       else {
00314         if ((retTy->getTypeID() == Type::StructTyID) ||
00315             isa<VectorType>(retTy)) {
00316           SmallVector<EVT, 16> vtparts;
00317           ComputeValueVTs(*this, retTy, vtparts);
00318           unsigned totalsz = 0;
00319           for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00320             unsigned elems = 1;
00321             EVT elemtype = vtparts[i];
00322             if (vtparts[i].isVector()) {
00323               elems = vtparts[i].getVectorNumElements();
00324               elemtype = vtparts[i].getVectorElementType();
00325             }
00326             for (unsigned j = 0, je = elems; j != je; ++j) {
00327               unsigned sz = elemtype.getSizeInBits();
00328               if (elemtype.isInteger() && (sz < 8))
00329                 sz = 8;
00330               totalsz += sz / 8;
00331             }
00332           }
00333           O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
00334         } else {
00335           assert(false && "Unknown return type");
00336         }
00337       }
00338     } else {
00339       SmallVector<EVT, 16> vtparts;
00340       ComputeValueVTs(*this, retTy, vtparts);
00341       unsigned idx = 0;
00342       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00343         unsigned elems = 1;
00344         EVT elemtype = vtparts[i];
00345         if (vtparts[i].isVector()) {
00346           elems = vtparts[i].getVectorNumElements();
00347           elemtype = vtparts[i].getVectorElementType();
00348         }
00349 
00350         for (unsigned j = 0, je = elems; j != je; ++j) {
00351           unsigned sz = elemtype.getSizeInBits();
00352           if (elemtype.isInteger() && (sz < 32))
00353             sz = 32;
00354           O << ".reg .b" << sz << " _";
00355           if (j < je - 1)
00356             O << ", ";
00357           ++idx;
00358         }
00359         if (i < e - 1)
00360           O << ", ";
00361       }
00362     }
00363     O << ") ";
00364   }
00365   O << "_ (";
00366 
00367   bool first = true;
00368   MVT thePointerTy = getPointerTy();
00369 
00370   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
00371     const Type *Ty = Args[i].Ty;
00372     if (!first) {
00373       O << ", ";
00374     }
00375     first = false;
00376 
00377     if (Outs[i].Flags.isByVal() == false) {
00378       unsigned sz = 0;
00379       if (isa<IntegerType>(Ty)) {
00380         sz = cast<IntegerType>(Ty)->getBitWidth();
00381         if (sz < 32)
00382           sz = 32;
00383       } else if (isa<PointerType>(Ty))
00384         sz = thePointerTy.getSizeInBits();
00385       else
00386         sz = Ty->getPrimitiveSizeInBits();
00387       if (isABI)
00388         O << ".param .b" << sz << " ";
00389       else
00390         O << ".reg .b" << sz << " ";
00391       O << "_";
00392       continue;
00393     }
00394     const PointerType *PTy = dyn_cast<PointerType>(Ty);
00395     assert(PTy && "Param with byval attribute should be a pointer type");
00396     Type *ETy = PTy->getElementType();
00397 
00398     if (isABI) {
00399       unsigned align = Outs[i].Flags.getByValAlign();
00400       unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
00401       O << ".param .align " << align << " .b8 ";
00402       O << "_";
00403       O << "[" << sz << "]";
00404       continue;
00405     } else {
00406       SmallVector<EVT, 16> vtparts;
00407       ComputeValueVTs(*this, ETy, vtparts);
00408       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00409         unsigned elems = 1;
00410         EVT elemtype = vtparts[i];
00411         if (vtparts[i].isVector()) {
00412           elems = vtparts[i].getVectorNumElements();
00413           elemtype = vtparts[i].getVectorElementType();
00414         }
00415 
00416         for (unsigned j = 0, je = elems; j != je; ++j) {
00417           unsigned sz = elemtype.getSizeInBits();
00418           if (elemtype.isInteger() && (sz < 32))
00419             sz = 32;
00420           O << ".reg .b" << sz << " ";
00421           O << "_";
00422           if (j < je - 1)
00423             O << ", ";
00424         }
00425         if (i < e - 1)
00426           O << ", ";
00427       }
00428       continue;
00429     }
00430   }
00431   O << ");";
00432   return O.str();
00433 }
00434 
00435 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
00436                                        SmallVectorImpl<SDValue> &InVals) const {
00437   SelectionDAG &DAG = CLI.DAG;
00438   DebugLoc &dl = CLI.DL;
00439   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
00440   SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
00441   SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
00442   SDValue Chain = CLI.Chain;
00443   SDValue Callee = CLI.Callee;
00444   bool &isTailCall = CLI.IsTailCall;
00445   ArgListTy &Args = CLI.Args;
00446   Type *retTy = CLI.RetTy;
00447   ImmutableCallSite *CS = CLI.CS;
00448 
00449   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00450 
00451   SDValue tempChain = Chain;
00452   Chain =
00453       DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
00454   SDValue InFlag = Chain.getValue(1);
00455 
00456   assert((Outs.size() == Args.size()) &&
00457          "Unexpected number of arguments to function call");
00458   unsigned paramCount = 0;
00459   // Declare the .params or .reg need to pass values
00460   // to the function
00461   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
00462     EVT VT = Outs[i].VT;
00463 
00464     if (Outs[i].Flags.isByVal() == false) {
00465       // Plain scalar
00466       // for ABI,    declare .param .b<size> .param<n>;
00467       // for nonABI, declare .reg .b<size> .param<n>;
00468       unsigned isReg = 1;
00469       if (isABI)
00470         isReg = 0;
00471       unsigned sz = VT.getSizeInBits();
00472       if (VT.isInteger() && (sz < 32))
00473         sz = 32;
00474       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00475       SDValue DeclareParamOps[] = { Chain,
00476                                     DAG.getConstant(paramCount, MVT::i32),
00477                                     DAG.getConstant(sz, MVT::i32),
00478                                     DAG.getConstant(isReg, MVT::i32), InFlag };
00479       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
00480                           DeclareParamOps, 5);
00481       InFlag = Chain.getValue(1);
00482       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00483       SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
00484                                  DAG.getConstant(0, MVT::i32), OutVals[i],
00485                                  InFlag };
00486 
00487       unsigned opcode = NVPTXISD::StoreParam;
00488       if (isReg)
00489         opcode = NVPTXISD::MoveToParam;
00490       else {
00491         if (Outs[i].Flags.isZExt())
00492           opcode = NVPTXISD::StoreParamU32;
00493         else if (Outs[i].Flags.isSExt())
00494           opcode = NVPTXISD::StoreParamS32;
00495       }
00496       Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
00497 
00498       InFlag = Chain.getValue(1);
00499       ++paramCount;
00500       continue;
00501     }
00502     // struct or vector
00503     SmallVector<EVT, 16> vtparts;
00504     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
00505     assert(PTy && "Type of a byval parameter should be pointer");
00506     ComputeValueVTs(*this, PTy->getElementType(), vtparts);
00507 
00508     if (isABI) {
00509       // declare .param .align 16 .b8 .param<n>[<size>];
00510       unsigned sz = Outs[i].Flags.getByValSize();
00511       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00512       // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
00513       // don't need to
00514       // worry about natural alignment or not. See TargetLowering::LowerCallTo()
00515       SDValue DeclareParamOps[] = {
00516         Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
00517         DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
00518         InFlag
00519       };
00520       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
00521                           DeclareParamOps, 5);
00522       InFlag = Chain.getValue(1);
00523       unsigned curOffset = 0;
00524       for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
00525         unsigned elems = 1;
00526         EVT elemtype = vtparts[j];
00527         if (vtparts[j].isVector()) {
00528           elems = vtparts[j].getVectorNumElements();
00529           elemtype = vtparts[j].getVectorElementType();
00530         }
00531         for (unsigned k = 0, ke = elems; k != ke; ++k) {
00532           unsigned sz = elemtype.getSizeInBits();
00533           if (elemtype.isInteger() && (sz < 8))
00534             sz = 8;
00535           SDValue srcAddr =
00536               DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
00537                           DAG.getConstant(curOffset, getPointerTy()));
00538           SDValue theVal =
00539               DAG.getLoad(elemtype, dl, tempChain, srcAddr,
00540                           MachinePointerInfo(), false, false, false, 0);
00541           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00542           SDValue CopyParamOps[] = { Chain,
00543                                      DAG.getConstant(paramCount, MVT::i32),
00544                                      DAG.getConstant(curOffset, MVT::i32),
00545                                      theVal, InFlag };
00546           Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
00547                               CopyParamOps, 5);
00548           InFlag = Chain.getValue(1);
00549           curOffset += sz / 8;
00550         }
00551       }
00552       ++paramCount;
00553       continue;
00554     }
00555     // Non-abi, struct or vector
00556     // Declare a bunch or .reg .b<size> .param<n>
00557     unsigned curOffset = 0;
00558     for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
00559       unsigned elems = 1;
00560       EVT elemtype = vtparts[j];
00561       if (vtparts[j].isVector()) {
00562         elems = vtparts[j].getVectorNumElements();
00563         elemtype = vtparts[j].getVectorElementType();
00564       }
00565       for (unsigned k = 0, ke = elems; k != ke; ++k) {
00566         unsigned sz = elemtype.getSizeInBits();
00567         if (elemtype.isInteger() && (sz < 32))
00568           sz = 32;
00569         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00570         SDValue DeclareParamOps[] = { Chain,
00571                                       DAG.getConstant(paramCount, MVT::i32),
00572                                       DAG.getConstant(sz, MVT::i32),
00573                                       DAG.getConstant(1, MVT::i32), InFlag };
00574         Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
00575                             DeclareParamOps, 5);
00576         InFlag = Chain.getValue(1);
00577         SDValue srcAddr =
00578             DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
00579                         DAG.getConstant(curOffset, getPointerTy()));
00580         SDValue theVal =
00581             DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
00582                         false, false, false, 0);
00583         SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00584         SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
00585                                    DAG.getConstant(0, MVT::i32), theVal,
00586                                    InFlag };
00587         Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
00588                             CopyParamOps, 5);
00589         InFlag = Chain.getValue(1);
00590         ++paramCount;
00591       }
00592     }
00593   }
00594 
00595   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
00596   unsigned retAlignment = 0;
00597 
00598   // Handle Result
00599   unsigned retCount = 0;
00600   if (Ins.size() > 0) {
00601     SmallVector<EVT, 16> resvtparts;
00602     ComputeValueVTs(*this, retTy, resvtparts);
00603 
00604     // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
00605     // individual .reg .b<size> func_retval<0..> for non ABI
00606     unsigned resultsz = 0;
00607     for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
00608       unsigned elems = 1;
00609       EVT elemtype = resvtparts[i];
00610       if (resvtparts[i].isVector()) {
00611         elems = resvtparts[i].getVectorNumElements();
00612         elemtype = resvtparts[i].getVectorElementType();
00613       }
00614       for (unsigned j = 0, je = elems; j != je; ++j) {
00615         unsigned sz = elemtype.getSizeInBits();
00616         if (isABI == false) {
00617           if (elemtype.isInteger() && (sz < 32))
00618             sz = 32;
00619         } else {
00620           if (elemtype.isInteger() && (sz < 8))
00621             sz = 8;
00622         }
00623         if (isABI == false) {
00624           SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00625           SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
00626                                       DAG.getConstant(sz, MVT::i32),
00627                                       DAG.getConstant(retCount, MVT::i32),
00628                                       InFlag };
00629           Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
00630                               DeclareRetOps, 5);
00631           InFlag = Chain.getValue(1);
00632           ++retCount;
00633         }
00634         resultsz += sz;
00635       }
00636     }
00637     if (isABI) {
00638       if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
00639           retTy->isPointerTy()) {
00640         // Scalar needs to be at least 32bit wide
00641         if (resultsz < 32)
00642           resultsz = 32;
00643         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00644         SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
00645                                     DAG.getConstant(resultsz, MVT::i32),
00646                                     DAG.getConstant(0, MVT::i32), InFlag };
00647         Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
00648                             DeclareRetOps, 5);
00649         InFlag = Chain.getValue(1);
00650       } else {
00651         if (Func) { // direct call
00652           if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
00653             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
00654         } else { // indirect call
00655           const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
00656           if (!llvm::getAlign(*CallI, 0, retAlignment))
00657             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
00658         }
00659         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00660         SDValue DeclareRetOps[] = { Chain,
00661                                     DAG.getConstant(retAlignment, MVT::i32),
00662                                     DAG.getConstant(resultsz / 8, MVT::i32),
00663                                     DAG.getConstant(0, MVT::i32), InFlag };
00664         Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
00665                             DeclareRetOps, 5);
00666         InFlag = Chain.getValue(1);
00667       }
00668     }
00669   }
00670 
00671   if (!Func) {
00672     // This is indirect function call case : PTX requires a prototype of the
00673     // form
00674     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
00675     // to be emitted, and the label has to used as the last arg of call
00676     // instruction.
00677     // The prototype is embedded in a string and put as the operand for an
00678     // INLINEASM SDNode.
00679     SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00680     std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
00681     const char *asmstr = nvTM->getManagedStrPool()
00682         ->getManagedString(proto_string.c_str())->c_str();
00683     SDValue InlineAsmOps[] = {
00684       Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
00685       DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
00686     };
00687     Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
00688     InFlag = Chain.getValue(1);
00689   }
00690   // Op to just print "call"
00691   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00692   SDValue PrintCallOps[] = {
00693     Chain,
00694     DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
00695     InFlag
00696   };
00697   Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
00698                       dl, PrintCallVTs, PrintCallOps, 3);
00699   InFlag = Chain.getValue(1);
00700 
00701   // Ops to print out the function name
00702   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00703   SDValue CallVoidOps[] = { Chain, Callee, InFlag };
00704   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
00705   InFlag = Chain.getValue(1);
00706 
00707   // Ops to print out the param list
00708   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00709   SDValue CallArgBeginOps[] = { Chain, InFlag };
00710   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
00711                       CallArgBeginOps, 2);
00712   InFlag = Chain.getValue(1);
00713 
00714   for (unsigned i = 0, e = paramCount; i != e; ++i) {
00715     unsigned opcode;
00716     if (i == (e - 1))
00717       opcode = NVPTXISD::LastCallArg;
00718     else
00719       opcode = NVPTXISD::CallArg;
00720     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00721     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
00722                              DAG.getConstant(i, MVT::i32), InFlag };
00723     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
00724     InFlag = Chain.getValue(1);
00725   }
00726   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00727   SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
00728                               InFlag };
00729   Chain =
00730       DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
00731   InFlag = Chain.getValue(1);
00732 
00733   if (!Func) {
00734     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
00735     SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
00736                                InFlag };
00737     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
00738     InFlag = Chain.getValue(1);
00739   }
00740 
00741   // Generate loads from param memory/moves from registers for result
00742   if (Ins.size() > 0) {
00743     if (isABI) {
00744       unsigned resoffset = 0;
00745       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
00746         unsigned sz = Ins[i].VT.getSizeInBits();
00747         if (Ins[i].VT.isInteger() && (sz < 8))
00748           sz = 8;
00749         EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
00750         SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
00751                                  DAG.getConstant(resoffset, MVT::i32), InFlag };
00752         SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
00753                                      LoadRetOps, array_lengthof(LoadRetOps));
00754         Chain = retval.getValue(1);
00755         InFlag = retval.getValue(2);
00756         InVals.push_back(retval);
00757         resoffset += sz / 8;
00758       }
00759     } else {
00760       SmallVector<EVT, 16> resvtparts;
00761       ComputeValueVTs(*this, retTy, resvtparts);
00762 
00763       assert(Ins.size() == resvtparts.size() &&
00764              "Unexpected number of return values in non-ABI case");
00765       unsigned paramNum = 0;
00766       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
00767         assert(EVT(Ins[i].VT) == resvtparts[i] &&
00768                "Unexpected EVT type in non-ABI case");
00769         unsigned numelems = 1;
00770         EVT elemtype = Ins[i].VT;
00771         if (Ins[i].VT.isVector()) {
00772           numelems = Ins[i].VT.getVectorNumElements();
00773           elemtype = Ins[i].VT.getVectorElementType();
00774         }
00775         std::vector<SDValue> tempRetVals;
00776         for (unsigned j = 0; j < numelems; ++j) {
00777           EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
00778           SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
00779                                    DAG.getConstant(paramNum, MVT::i32),
00780                                    InFlag };
00781           SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
00782                                        MoveRetOps, array_lengthof(MoveRetOps));
00783           Chain = retval.getValue(1);
00784           InFlag = retval.getValue(2);
00785           tempRetVals.push_back(retval);
00786           ++paramNum;
00787         }
00788         if (Ins[i].VT.isVector())
00789           InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
00790                                        &tempRetVals[0], tempRetVals.size()));
00791         else
00792           InVals.push_back(tempRetVals[0]);
00793       }
00794     }
00795   }
00796   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
00797                              DAG.getIntPtrConstant(uniqueCallSite + 1, true),
00798                              InFlag);
00799   uniqueCallSite++;
00800 
00801   // set isTailCall to false for now, until we figure out how to express
00802   // tail call optimization in PTX
00803   isTailCall = false;
00804   return Chain;
00805 }
00806 
00807 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
00808 // (see LegalizeDAG.cpp). This is slow and uses local memory.
00809 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
00810 SDValue
00811 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
00812   SDNode *Node = Op.getNode();
00813   DebugLoc dl = Node->getDebugLoc();
00814   SmallVector<SDValue, 8> Ops;
00815   unsigned NumOperands = Node->getNumOperands();
00816   for (unsigned i = 0; i < NumOperands; ++i) {
00817     SDValue SubOp = Node->getOperand(i);
00818     EVT VVT = SubOp.getNode()->getValueType(0);
00819     EVT EltVT = VVT.getVectorElementType();
00820     unsigned NumSubElem = VVT.getVectorNumElements();
00821     for (unsigned j = 0; j < NumSubElem; ++j) {
00822       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
00823                                 DAG.getIntPtrConstant(j)));
00824     }
00825   }
00826   return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
00827                      Ops.size());
00828 }
00829 
00830 SDValue
00831 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
00832   switch (Op.getOpcode()) {
00833   case ISD::RETURNADDR:
00834     return SDValue();
00835   case ISD::FRAMEADDR:
00836     return SDValue();
00837   case ISD::GlobalAddress:
00838     return LowerGlobalAddress(Op, DAG);
00839   case ISD::INTRINSIC_W_CHAIN:
00840     return Op;
00841   case ISD::BUILD_VECTOR:
00842   case ISD::EXTRACT_SUBVECTOR:
00843     return Op;
00844   case ISD::CONCAT_VECTORS:
00845     return LowerCONCAT_VECTORS(Op, DAG);
00846   case ISD::STORE:
00847     return LowerSTORE(Op, DAG);
00848   case ISD::LOAD:
00849     return LowerLOAD(Op, DAG);
00850   default:
00851     llvm_unreachable("Custom lowering not defined for operation");
00852   }
00853 }
00854 
00855 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
00856   if (Op.getValueType() == MVT::i1)
00857     return LowerLOADi1(Op, DAG);
00858   else
00859     return SDValue();
00860 }
00861 
00862 // v = ld i1* addr
00863 //   =>
00864 // v1 = ld i8* addr
00865 // v = trunc v1 to i1
00866 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
00867   SDNode *Node = Op.getNode();
00868   LoadSDNode *LD = cast<LoadSDNode>(Node);
00869   DebugLoc dl = Node->getDebugLoc();
00870   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
00871   assert(Node->getValueType(0) == MVT::i1 &&
00872          "Custom lowering for i1 load only");
00873   SDValue newLD =
00874       DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
00875                   LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
00876                   LD->isInvariant(), LD->getAlignment());
00877   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
00878   // The legalizer (the caller) is expecting two values from the legalized
00879   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
00880   // in LegalizeDAG.cpp which also uses MergeValues.
00881   SDValue Ops[] = { result, LD->getChain() };
00882   return DAG.getMergeValues(Ops, 2, dl);
00883 }
00884 
00885 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
00886   EVT ValVT = Op.getOperand(1).getValueType();
00887   if (ValVT == MVT::i1)
00888     return LowerSTOREi1(Op, DAG);
00889   else if (ValVT.isVector())
00890     return LowerSTOREVector(Op, DAG);
00891   else
00892     return SDValue();
00893 }
00894 
00895 SDValue
00896 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
00897   SDNode *N = Op.getNode();
00898   SDValue Val = N->getOperand(1);
00899   DebugLoc DL = N->getDebugLoc();
00900   EVT ValVT = Val.getValueType();
00901 
00902   if (ValVT.isVector()) {
00903     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
00904     // legal.  We can (and should) split that into 2 stores of <2 x double> here
00905     // but I'm leaving that as a TODO for now.
00906     if (!ValVT.isSimple())
00907       return SDValue();
00908     switch (ValVT.getSimpleVT().SimpleTy) {
00909     default:
00910       return SDValue();
00911     case MVT::v2i8:
00912     case MVT::v2i16:
00913     case MVT::v2i32:
00914     case MVT::v2i64:
00915     case MVT::v2f32:
00916     case MVT::v2f64:
00917     case MVT::v4i8:
00918     case MVT::v4i16:
00919     case MVT::v4i32:
00920     case MVT::v4f32:
00921       // This is a "native" vector type
00922       break;
00923     }
00924 
00925     unsigned Opcode = 0;
00926     EVT EltVT = ValVT.getVectorElementType();
00927     unsigned NumElts = ValVT.getVectorNumElements();
00928 
00929     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
00930     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
00931     // stored type to i16 and propogate the "real" type as the memory type.
00932     bool NeedExt = false;
00933     if (EltVT.getSizeInBits() < 16)
00934       NeedExt = true;
00935 
00936     switch (NumElts) {
00937     default:
00938       return SDValue();
00939     case 2:
00940       Opcode = NVPTXISD::StoreV2;
00941       break;
00942     case 4: {
00943       Opcode = NVPTXISD::StoreV4;
00944       break;
00945     }
00946     }
00947 
00948     SmallVector<SDValue, 8> Ops;
00949 
00950     // First is the chain
00951     Ops.push_back(N->getOperand(0));
00952 
00953     // Then the split values
00954     for (unsigned i = 0; i < NumElts; ++i) {
00955       SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
00956                                    DAG.getIntPtrConstant(i));
00957       if (NeedExt)
00958         // ANY_EXTEND is correct here since the store will only look at the
00959         // lower-order bits anyway.
00960         ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
00961       Ops.push_back(ExtVal);
00962     }
00963 
00964     // Then any remaining arguments
00965     for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
00966       Ops.push_back(N->getOperand(i));
00967     }
00968 
00969     MemSDNode *MemSD = cast<MemSDNode>(N);
00970 
00971     SDValue NewSt = DAG.getMemIntrinsicNode(
00972         Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
00973         MemSD->getMemoryVT(), MemSD->getMemOperand());
00974 
00975     //return DCI.CombineTo(N, NewSt, true);
00976     return NewSt;
00977   }
00978 
00979   return SDValue();
00980 }
00981 
00982 // st i1 v, addr
00983 //    =>
00984 // v1 = zxt v to i8
00985 // st i8, addr
00986 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
00987   SDNode *Node = Op.getNode();
00988   DebugLoc dl = Node->getDebugLoc();
00989   StoreSDNode *ST = cast<StoreSDNode>(Node);
00990   SDValue Tmp1 = ST->getChain();
00991   SDValue Tmp2 = ST->getBasePtr();
00992   SDValue Tmp3 = ST->getValue();
00993   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
00994   unsigned Alignment = ST->getAlignment();
00995   bool isVolatile = ST->isVolatile();
00996   bool isNonTemporal = ST->isNonTemporal();
00997   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
00998   SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
00999                                 isVolatile, isNonTemporal, Alignment);
01000   return Result;
01001 }
01002 
01003 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
01004                                         int idx, EVT v) const {
01005   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
01006   std::stringstream suffix;
01007   suffix << idx;
01008   *name += suffix.str();
01009   return DAG.getTargetExternalSymbol(name->c_str(), v);
01010 }
01011 
01012 SDValue
01013 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
01014   return getExtSymb(DAG, ".PARAM", idx, v);
01015 }
01016 
01017 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
01018   return getExtSymb(DAG, ".HLPPARAM", idx);
01019 }
01020 
01021 // Check to see if the kernel argument is image*_t or sampler_t
01022 
01023 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
01024   static const char *const specialTypes[] = { "struct._image2d_t",
01025                                               "struct._image3d_t",
01026                                               "struct._sampler_t" };
01027 
01028   const Type *Ty = arg->getType();
01029   const PointerType *PTy = dyn_cast<PointerType>(Ty);
01030 
01031   if (!PTy)
01032     return false;
01033 
01034   if (!context)
01035     return false;
01036 
01037   const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
01038   const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
01039 
01040   for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
01041     if (TypeName == specialTypes[i])
01042       return true;
01043 
01044   return false;
01045 }
01046 
01047 SDValue NVPTXTargetLowering::LowerFormalArguments(
01048     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
01049     const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
01050     SmallVectorImpl<SDValue> &InVals) const {
01051   MachineFunction &MF = DAG.getMachineFunction();
01052   const DataLayout *TD = getDataLayout();
01053 
01054   const Function *F = MF.getFunction();
01055   const AttributeSet &PAL = F->getAttributes();
01056 
01057   SDValue Root = DAG.getRoot();
01058   std::vector<SDValue> OutChains;
01059 
01060   bool isKernel = llvm::isKernelFunction(*F);
01061   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01062 
01063   std::vector<Type *> argTypes;
01064   std::vector<const Argument *> theArgs;
01065   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
01066        I != E; ++I) {
01067     theArgs.push_back(I);
01068     argTypes.push_back(I->getType());
01069   }
01070   //assert(argTypes.size() == Ins.size() &&
01071   //       "Ins types and function types did not match");
01072 
01073   int idx = 0;
01074   for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
01075     Type *Ty = argTypes[i];
01076     EVT ObjectVT = getValueType(Ty);
01077     //assert(ObjectVT == Ins[i].VT &&
01078     //       "Ins type did not match function type");
01079 
01080     // If the kernel argument is image*_t or sampler_t, convert it to
01081     // a i32 constant holding the parameter position. This can later
01082     // matched in the AsmPrinter to output the correct mangled name.
01083     if (isImageOrSamplerVal(
01084             theArgs[i],
01085             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
01086                                      : 0))) {
01087       assert(isKernel && "Only kernels can have image/sampler params");
01088       InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
01089       continue;
01090     }
01091 
01092     if (theArgs[i]->use_empty()) {
01093       // argument is dead
01094       if (ObjectVT.isVector()) {
01095         EVT EltVT = ObjectVT.getVectorElementType();
01096         unsigned NumElts = ObjectVT.getVectorNumElements();
01097         for (unsigned vi = 0; vi < NumElts; ++vi) {
01098           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
01099         }
01100       } else {
01101         InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
01102       }
01103       continue;
01104     }
01105 
01106     // In the following cases, assign a node order of "idx+1"
01107     // to newly created nodes. The SDNOdes for params have to
01108     // appear in the same order as their order of appearance
01109     // in the original function. "idx+1" holds that order.
01110     if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
01111       if (ObjectVT.isVector()) {
01112         unsigned NumElts = ObjectVT.getVectorNumElements();
01113         EVT EltVT = ObjectVT.getVectorElementType();
01114         unsigned Offset = 0;
01115         for (unsigned vi = 0; vi < NumElts; ++vi) {
01116           SDValue A = getParamSymbol(DAG, idx, getPointerTy());
01117           SDValue B = DAG.getIntPtrConstant(Offset);
01118           SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
01119                                      //getParamSymbol(DAG, idx, EltVT),
01120                                      //DAG.getConstant(Offset, getPointerTy()));
01121                                      A, B);
01122           Value *SrcValue = Constant::getNullValue(PointerType::get(
01123               EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
01124           SDValue Ld = DAG.getLoad(
01125               EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
01126               false,
01127               TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
01128           Offset += EltVT.getStoreSizeInBits() / 8;
01129           InVals.push_back(Ld);
01130         }
01131         continue;
01132       }
01133 
01134       // A plain scalar.
01135       if (isABI || isKernel) {
01136         // If ABI, load from the param symbol
01137         SDValue Arg = getParamSymbol(DAG, idx);
01138         // Conjure up a value that we can get the address space from.
01139         // FIXME: Using a constant here is a hack.
01140         Value *srcValue = Constant::getNullValue(
01141             PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
01142                              llvm::ADDRESS_SPACE_PARAM));
01143         SDValue p = DAG.getLoad(
01144             ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
01145             false,
01146             TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
01147         if (p.getNode())
01148           DAG.AssignOrdering(p.getNode(), idx + 1);
01149         InVals.push_back(p);
01150       } else {
01151         // If no ABI, just move the param symbol
01152         SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
01153         SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
01154         if (p.getNode())
01155           DAG.AssignOrdering(p.getNode(), idx + 1);
01156         InVals.push_back(p);
01157       }
01158       continue;
01159     }
01160 
01161     // Param has ByVal attribute
01162     if (isABI || isKernel) {
01163       // Return MoveParam(param symbol).
01164       // Ideally, the param symbol can be returned directly,
01165       // but when SDNode builder decides to use it in a CopyToReg(),
01166       // machine instruction fails because TargetExternalSymbol
01167       // (not lowered) is target dependent, and CopyToReg assumes
01168       // the source is lowered.
01169       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
01170       SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
01171       if (p.getNode())
01172         DAG.AssignOrdering(p.getNode(), idx + 1);
01173       if (isKernel)
01174         InVals.push_back(p);
01175       else {
01176         SDValue p2 = DAG.getNode(
01177             ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
01178             DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
01179         InVals.push_back(p2);
01180       }
01181     } else {
01182       // Have to move a set of param symbols to registers and
01183       // store them locally and return the local pointer in InVals
01184       const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
01185       assert(elemPtrType && "Byval parameter should be a pointer type");
01186       Type *elemType = elemPtrType->getElementType();
01187       // Compute the constituent parts
01188       SmallVector<EVT, 16> vtparts;
01189       SmallVector<uint64_t, 16> offsets;
01190       ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
01191       unsigned totalsize = 0;
01192       for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
01193         totalsize += vtparts[j].getStoreSizeInBits();
01194       SDValue localcopy = DAG.getFrameIndex(
01195           MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
01196           getPointerTy());
01197       unsigned sizesofar = 0;
01198       std::vector<SDValue> theChains;
01199       for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
01200         unsigned numElems = 1;
01201         if (vtparts[j].isVector())
01202           numElems = vtparts[j].getVectorNumElements();
01203         for (unsigned k = 0, ke = numElems; k != ke; ++k) {
01204           EVT tmpvt = vtparts[j];
01205           if (tmpvt.isVector())
01206             tmpvt = tmpvt.getVectorElementType();
01207           SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
01208                                     getParamSymbol(DAG, idx, tmpvt));
01209           SDValue addr =
01210               DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
01211                           DAG.getConstant(sizesofar, getPointerTy()));
01212           theChains.push_back(DAG.getStore(
01213               Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
01214           sizesofar += tmpvt.getStoreSizeInBits() / 8;
01215           ++idx;
01216         }
01217       }
01218       --idx;
01219       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
01220                           theChains.size());
01221       InVals.push_back(localcopy);
01222     }
01223   }
01224 
01225   // Clang will check explicit VarArg and issue error if any. However, Clang
01226   // will let code with
01227   // implicit var arg like f() pass.
01228   // We treat this case as if the arg list is empty.
01229   //if (F.isVarArg()) {
01230   // assert(0 && "VarArg not supported yet!");
01231   //}
01232 
01233   if (!OutChains.empty())
01234     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
01235                             OutChains.size()));
01236 
01237   return Chain;
01238 }
01239 
01240 SDValue NVPTXTargetLowering::LowerReturn(
01241     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
01242     const SmallVectorImpl<ISD::OutputArg> &Outs,
01243     const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
01244     SelectionDAG &DAG) const {
01245 
01246   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01247 
01248   unsigned sizesofar = 0;
01249   unsigned idx = 0;
01250   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
01251     SDValue theVal = OutVals[i];
01252     EVT theValType = theVal.getValueType();
01253     unsigned numElems = 1;
01254     if (theValType.isVector())
01255       numElems = theValType.getVectorNumElements();
01256     for (unsigned j = 0, je = numElems; j != je; ++j) {
01257       SDValue tmpval = theVal;
01258       if (theValType.isVector())
01259         tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
01260                              theValType.getVectorElementType(), tmpval,
01261                              DAG.getIntPtrConstant(j));
01262       Chain = DAG.getNode(
01263           isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
01264           MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
01265           tmpval);
01266       if (theValType.isVector())
01267         sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
01268       else
01269         sizesofar += theValType.getStoreSizeInBits() / 8;
01270       ++idx;
01271     }
01272   }
01273 
01274   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
01275 }
01276 
01277 void NVPTXTargetLowering::LowerAsmOperandForConstraint(
01278     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
01279     SelectionDAG &DAG) const {
01280   if (Constraint.length() > 1)
01281     return;
01282   else
01283     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
01284 }
01285 
01286 // NVPTX suuport vector of legal types of any length in Intrinsics because the
01287 // NVPTX specific type legalizer
01288 // will legalize them to the PTX supported length.
01289 bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
01290   if (isTypeLegal(VT))
01291     return true;
01292   if (VT.isVector()) {
01293     MVT eVT = VT.getVectorElementType();
01294     if (isTypeLegal(eVT))
01295       return true;
01296   }
01297   return false;
01298 }
01299 
01300 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
01301 // TgtMemIntrinsic
01302 // because we need the information that is only available in the "Value" type
01303 // of destination
01304 // pointer. In particular, the address space information.
01305 bool NVPTXTargetLowering::getTgtMemIntrinsic(
01306     IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
01307   switch (Intrinsic) {
01308   default:
01309     return false;
01310 
01311   case Intrinsic::nvvm_atomic_load_add_f32:
01312     Info.opc = ISD::INTRINSIC_W_CHAIN;
01313     Info.memVT = MVT::f32;
01314     Info.ptrVal = I.getArgOperand(0);
01315     Info.offset = 0;
01316     Info.vol = 0;
01317     Info.readMem = true;
01318     Info.writeMem = true;
01319     Info.align = 0;
01320     return true;
01321 
01322   case Intrinsic::nvvm_atomic_load_inc_32:
01323   case Intrinsic::nvvm_atomic_load_dec_32:
01324     Info.opc = ISD::INTRINSIC_W_CHAIN;
01325     Info.memVT = MVT::i32;
01326     Info.ptrVal = I.getArgOperand(0);
01327     Info.offset = 0;
01328     Info.vol = 0;
01329     Info.readMem = true;
01330     Info.writeMem = true;
01331     Info.align = 0;
01332     return true;
01333 
01334   case Intrinsic::nvvm_ldu_global_i:
01335   case Intrinsic::nvvm_ldu_global_f:
01336   case Intrinsic::nvvm_ldu_global_p:
01337 
01338     Info.opc = ISD::INTRINSIC_W_CHAIN;
01339     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
01340       Info.memVT = MVT::i32;
01341     else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
01342       Info.memVT = getPointerTy();
01343     else
01344       Info.memVT = MVT::f32;
01345     Info.ptrVal = I.getArgOperand(0);
01346     Info.offset = 0;
01347     Info.vol = 0;
01348     Info.readMem = true;
01349     Info.writeMem = false;
01350     Info.align = 0;
01351     return true;
01352 
01353   }
01354   return false;
01355 }
01356 
01357 /// isLegalAddressingMode - Return true if the addressing mode represented
01358 /// by AM is legal for this target, for a load/store of the specified type.
01359 /// Used to guide target specific optimizations, like loop strength reduction
01360 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
01361 /// (CodeGenPrepare.cpp)
01362 bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
01363                                                 Type *Ty) const {
01364 
01365   // AddrMode - This represents an addressing mode of:
01366   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
01367   //
01368   // The legal address modes are
01369   // - [avar]
01370   // - [areg]
01371   // - [areg+immoff]
01372   // - [immAddr]
01373 
01374   if (AM.BaseGV) {
01375     if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
01376       return false;
01377     return true;
01378   }
01379 
01380   switch (AM.Scale) {
01381   case 0: // "r", "r+i" or "i" is allowed
01382     break;
01383   case 1:
01384     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
01385       return false;
01386     // Otherwise we have r+i.
01387     break;
01388   default:
01389     // No scale > 1 is allowed
01390     return false;
01391   }
01392   return true;
01393 }
01394 
01395 //===----------------------------------------------------------------------===//
01396 //                         NVPTX Inline Assembly Support
01397 //===----------------------------------------------------------------------===//
01398 
01399 /// getConstraintType - Given a constraint letter, return the type of
01400 /// constraint it is for this target.
01401 NVPTXTargetLowering::ConstraintType
01402 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
01403   if (Constraint.size() == 1) {
01404     switch (Constraint[0]) {
01405     default:
01406       break;
01407     case 'r':
01408     case 'h':
01409     case 'c':
01410     case 'l':
01411     case 'f':
01412     case 'd':
01413     case '0':
01414     case 'N':
01415       return C_RegisterClass;
01416     }
01417   }
01418   return TargetLowering::getConstraintType(Constraint);
01419 }
01420 
01421 std::pair<unsigned, const TargetRegisterClass *>
01422 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
01423                                                   EVT VT) const {
01424   if (Constraint.size() == 1) {
01425     switch (Constraint[0]) {
01426     case 'c':
01427       return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
01428     case 'h':
01429       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
01430     case 'r':
01431       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
01432     case 'l':
01433     case 'N':
01434       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
01435     case 'f':
01436       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
01437     case 'd':
01438       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
01439     }
01440   }
01441   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
01442 }
01443 
01444 /// getFunctionAlignment - Return the Log2 alignment of this function.
01445 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
01446   return 4;
01447 }
01448 
01449 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
01450 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
01451                               SmallVectorImpl<SDValue> &Results) {
01452   EVT ResVT = N->getValueType(0);
01453   DebugLoc DL = N->getDebugLoc();
01454 
01455   assert(ResVT.isVector() && "Vector load must have vector type");
01456 
01457   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
01458   // legal.  We can (and should) split that into 2 loads of <2 x double> here
01459   // but I'm leaving that as a TODO for now.
01460   assert(ResVT.isSimple() && "Can only handle simple types");
01461   switch (ResVT.getSimpleVT().SimpleTy) {
01462   default:
01463     return;
01464   case MVT::v2i8:
01465   case MVT::v2i16:
01466   case MVT::v2i32:
01467   case MVT::v2i64:
01468   case MVT::v2f32:
01469   case MVT::v2f64:
01470   case MVT::v4i8:
01471   case MVT::v4i16:
01472   case MVT::v4i32:
01473   case MVT::v4f32:
01474     // This is a "native" vector type
01475     break;
01476   }
01477 
01478   EVT EltVT = ResVT.getVectorElementType();
01479   unsigned NumElts = ResVT.getVectorNumElements();
01480 
01481   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
01482   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
01483   // loaded type to i16 and propogate the "real" type as the memory type.
01484   bool NeedTrunc = false;
01485   if (EltVT.getSizeInBits() < 16) {
01486     EltVT = MVT::i16;
01487     NeedTrunc = true;
01488   }
01489 
01490   unsigned Opcode = 0;
01491   SDVTList LdResVTs;
01492 
01493   switch (NumElts) {
01494   default:
01495     return;
01496   case 2:
01497     Opcode = NVPTXISD::LoadV2;
01498     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
01499     break;
01500   case 4: {
01501     Opcode = NVPTXISD::LoadV4;
01502     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
01503     LdResVTs = DAG.getVTList(ListVTs, 5);
01504     break;
01505   }
01506   }
01507 
01508   SmallVector<SDValue, 8> OtherOps;
01509 
01510   // Copy regular operands
01511   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
01512     OtherOps.push_back(N->getOperand(i));
01513 
01514   LoadSDNode *LD = cast<LoadSDNode>(N);
01515 
01516   // The select routine does not have access to the LoadSDNode instance, so
01517   // pass along the extension information
01518   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
01519 
01520   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
01521                                           OtherOps.size(), LD->getMemoryVT(),
01522                                           LD->getMemOperand());
01523 
01524   SmallVector<SDValue, 4> ScalarRes;
01525 
01526   for (unsigned i = 0; i < NumElts; ++i) {
01527     SDValue Res = NewLD.getValue(i);
01528     if (NeedTrunc)
01529       Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
01530     ScalarRes.push_back(Res);
01531   }
01532 
01533   SDValue LoadChain = NewLD.getValue(NumElts);
01534 
01535   SDValue BuildVec =
01536       DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
01537 
01538   Results.push_back(BuildVec);
01539   Results.push_back(LoadChain);
01540 }
01541 
01542 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
01543                                      SmallVectorImpl<SDValue> &Results) {
01544   SDValue Chain = N->getOperand(0);
01545   SDValue Intrin = N->getOperand(1);
01546   DebugLoc DL = N->getDebugLoc();
01547 
01548   // Get the intrinsic ID
01549   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
01550   switch (IntrinNo) {
01551   default:
01552     return;
01553   case Intrinsic::nvvm_ldg_global_i:
01554   case Intrinsic::nvvm_ldg_global_f:
01555   case Intrinsic::nvvm_ldg_global_p:
01556   case Intrinsic::nvvm_ldu_global_i:
01557   case Intrinsic::nvvm_ldu_global_f:
01558   case Intrinsic::nvvm_ldu_global_p: {
01559     EVT ResVT = N->getValueType(0);
01560 
01561     if (ResVT.isVector()) {
01562       // Vector LDG/LDU
01563 
01564       unsigned NumElts = ResVT.getVectorNumElements();
01565       EVT EltVT = ResVT.getVectorElementType();
01566 
01567       // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
01568       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
01569       // loaded type to i16 and propogate the "real" type as the memory type.
01570       bool NeedTrunc = false;
01571       if (EltVT.getSizeInBits() < 16) {
01572         EltVT = MVT::i16;
01573         NeedTrunc = true;
01574       }
01575 
01576       unsigned Opcode = 0;
01577       SDVTList LdResVTs;
01578 
01579       switch (NumElts) {
01580       default:
01581         return;
01582       case 2:
01583         switch (IntrinNo) {
01584         default:
01585           return;
01586         case Intrinsic::nvvm_ldg_global_i:
01587         case Intrinsic::nvvm_ldg_global_f:
01588         case Intrinsic::nvvm_ldg_global_p:
01589           Opcode = NVPTXISD::LDGV2;
01590           break;
01591         case Intrinsic::nvvm_ldu_global_i:
01592         case Intrinsic::nvvm_ldu_global_f:
01593         case Intrinsic::nvvm_ldu_global_p:
01594           Opcode = NVPTXISD::LDUV2;
01595           break;
01596         }
01597         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
01598         break;
01599       case 4: {
01600         switch (IntrinNo) {
01601         default:
01602           return;
01603         case Intrinsic::nvvm_ldg_global_i:
01604         case Intrinsic::nvvm_ldg_global_f:
01605         case Intrinsic::nvvm_ldg_global_p:
01606           Opcode = NVPTXISD::LDGV4;
01607           break;
01608         case Intrinsic::nvvm_ldu_global_i:
01609         case Intrinsic::nvvm_ldu_global_f:
01610         case Intrinsic::nvvm_ldu_global_p:
01611           Opcode = NVPTXISD::LDUV4;
01612           break;
01613         }
01614         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
01615         LdResVTs = DAG.getVTList(ListVTs, 5);
01616         break;
01617       }
01618       }
01619 
01620       SmallVector<SDValue, 8> OtherOps;
01621 
01622       // Copy regular operands
01623 
01624       OtherOps.push_back(Chain); // Chain
01625                                  // Skip operand 1 (intrinsic ID)
01626                                  // Others
01627       for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
01628         OtherOps.push_back(N->getOperand(i));
01629 
01630       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
01631 
01632       SDValue NewLD = DAG.getMemIntrinsicNode(
01633           Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
01634           MemSD->getMemoryVT(), MemSD->getMemOperand());
01635 
01636       SmallVector<SDValue, 4> ScalarRes;
01637 
01638       for (unsigned i = 0; i < NumElts; ++i) {
01639         SDValue Res = NewLD.getValue(i);
01640         if (NeedTrunc)
01641           Res =
01642               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
01643         ScalarRes.push_back(Res);
01644       }
01645 
01646       SDValue LoadChain = NewLD.getValue(NumElts);
01647 
01648       SDValue BuildVec =
01649           DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
01650 
01651       Results.push_back(BuildVec);
01652       Results.push_back(LoadChain);
01653     } else {
01654       // i8 LDG/LDU
01655       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
01656              "Custom handling of non-i8 ldu/ldg?");
01657 
01658       // Just copy all operands as-is
01659       SmallVector<SDValue, 4> Ops;
01660       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
01661         Ops.push_back(N->getOperand(i));
01662 
01663       // Force output to i16
01664       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
01665 
01666       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
01667 
01668       // We make sure the memory type is i8, which will be used during isel
01669       // to select the proper instruction.
01670       SDValue NewLD =
01671           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
01672                                   Ops.size(), MVT::i8, MemSD->getMemOperand());
01673 
01674       Results.push_back(NewLD.getValue(0));
01675       Results.push_back(NewLD.getValue(1));
01676     }
01677   }
01678   }
01679 }
01680 
01681 void NVPTXTargetLowering::ReplaceNodeResults(
01682     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
01683   switch (N->getOpcode()) {
01684   default:
01685     report_fatal_error("Unhandled custom legalization");
01686   case ISD::LOAD:
01687     ReplaceLoadVector(N, DAG, Results);
01688     return;
01689   case ISD::INTRINSIC_W_CHAIN:
01690     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
01691     return;
01692   }
01693 }