LLVM  mainline
NVPTXISelLowering.cpp
Go to the documentation of this file.
00001 //
00002 //                     The LLVM Compiler Infrastructure
00003 //
00004 // This file is distributed under the University of Illinois Open Source
00005 // License. See LICENSE.TXT for details.
00006 //
00007 //===----------------------------------------------------------------------===//
00008 //
00009 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
00010 // selection DAG.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "NVPTXISelLowering.h"
00015 #include "NVPTX.h"
00016 #include "NVPTXTargetMachine.h"
00017 #include "NVPTXTargetObjectFile.h"
00018 #include "NVPTXUtilities.h"
00019 #include "llvm/CodeGen/Analysis.h"
00020 #include "llvm/CodeGen/MachineFrameInfo.h"
00021 #include "llvm/CodeGen/MachineFunction.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineRegisterInfo.h"
00024 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00025 #include "llvm/IR/CallSite.h"
00026 #include "llvm/IR/DerivedTypes.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/IR/GlobalValue.h"
00029 #include "llvm/IR/IntrinsicInst.h"
00030 #include "llvm/IR/Intrinsics.h"
00031 #include "llvm/IR/Module.h"
00032 #include "llvm/MC/MCSectionELF.h"
00033 #include "llvm/Support/CommandLine.h"
00034 #include "llvm/Support/Debug.h"
00035 #include "llvm/Support/ErrorHandling.h"
00036 #include "llvm/Support/MathExtras.h"
00037 #include "llvm/Support/raw_ostream.h"
00038 #include <sstream>
00039 
00040 #undef DEBUG_TYPE
00041 #define DEBUG_TYPE "nvptx-lower"
00042 
00043 using namespace llvm;
00044 
00045 static unsigned int uniqueCallSite = 0;
00046 
00047 static cl::opt<bool> sched4reg(
00048     "nvptx-sched4reg",
00049     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
00050 
00051 static cl::opt<unsigned>
00052 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
00053                     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
00054                              " 1: do it  2: do it aggressively"),
00055                     cl::init(2));
00056 
00057 static bool IsPTXVectorType(MVT VT) {
00058   switch (VT.SimpleTy) {
00059   default:
00060     return false;
00061   case MVT::v2i1:
00062   case MVT::v4i1:
00063   case MVT::v2i8:
00064   case MVT::v4i8:
00065   case MVT::v2i16:
00066   case MVT::v4i16:
00067   case MVT::v2i32:
00068   case MVT::v4i32:
00069   case MVT::v2i64:
00070   case MVT::v2f32:
00071   case MVT::v4f32:
00072   case MVT::v2f64:
00073     return true;
00074   }
00075 }
00076 
00077 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
00078 /// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
00079 /// into their primitive components.
00080 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
00081 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
00082 /// LowerCall, and LowerReturn.
00083 static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
00084                                SmallVectorImpl<EVT> &ValueVTs,
00085                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
00086                                uint64_t StartingOffset = 0) {
00087   SmallVector<EVT, 16> TempVTs;
00088   SmallVector<uint64_t, 16> TempOffsets;
00089 
00090   ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
00091   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
00092     EVT VT = TempVTs[i];
00093     uint64_t Off = TempOffsets[i];
00094     if (VT.isVector())
00095       for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
00096         ValueVTs.push_back(VT.getVectorElementType());
00097         if (Offsets)
00098           Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
00099       }
00100     else {
00101       ValueVTs.push_back(VT);
00102       if (Offsets)
00103         Offsets->push_back(Off);
00104     }
00105   }
00106 }
00107 
00108 // NVPTXTargetLowering Constructor.
00109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
00110                                          const NVPTXSubtarget &STI)
00111     : TargetLowering(TM), nvTM(&TM), STI(STI) {
00112 
00113   // always lower memset, memcpy, and memmove intrinsics to load/store
00114   // instructions, rather
00115   // then generating calls to memset, mempcy or memmove.
00116   MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
00117   MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
00118   MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
00119 
00120   setBooleanContents(ZeroOrNegativeOneBooleanContent);
00121   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00122 
00123   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
00124   // condition branches.
00125   setJumpIsExpensive(true);
00126 
00127   // By default, use the Source scheduling
00128   if (sched4reg)
00129     setSchedulingPreference(Sched::RegPressure);
00130   else
00131     setSchedulingPreference(Sched::Source);
00132 
00133   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
00134   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
00135   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
00136   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
00137   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
00138   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
00139 
00140   // Operations not directly supported by NVPTX.
00141   setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
00142   setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
00143   setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
00144   setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
00145   setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
00146   setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
00147   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
00148   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
00149   setOperationAction(ISD::BR_CC, MVT::f64, Expand);
00150   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
00151   setOperationAction(ISD::BR_CC, MVT::i8, Expand);
00152   setOperationAction(ISD::BR_CC, MVT::i16, Expand);
00153   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
00154   setOperationAction(ISD::BR_CC, MVT::i64, Expand);
00155   // Some SIGN_EXTEND_INREG can be done using cvt instruction.
00156   // For others we will expand to a SHL/SRA pair.
00157   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
00158   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
00159   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
00160   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
00161   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00162 
00163   setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
00164   setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
00165   setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
00166   setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
00167   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
00168   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
00169 
00170   if (STI.hasROT64()) {
00171     setOperationAction(ISD::ROTL, MVT::i64, Legal);
00172     setOperationAction(ISD::ROTR, MVT::i64, Legal);
00173   } else {
00174     setOperationAction(ISD::ROTL, MVT::i64, Expand);
00175     setOperationAction(ISD::ROTR, MVT::i64, Expand);
00176   }
00177   if (STI.hasROT32()) {
00178     setOperationAction(ISD::ROTL, MVT::i32, Legal);
00179     setOperationAction(ISD::ROTR, MVT::i32, Legal);
00180   } else {
00181     setOperationAction(ISD::ROTL, MVT::i32, Expand);
00182     setOperationAction(ISD::ROTR, MVT::i32, Expand);
00183   }
00184 
00185   setOperationAction(ISD::ROTL, MVT::i16, Expand);
00186   setOperationAction(ISD::ROTR, MVT::i16, Expand);
00187   setOperationAction(ISD::ROTL, MVT::i8, Expand);
00188   setOperationAction(ISD::ROTR, MVT::i8, Expand);
00189   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
00190   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00191   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
00192 
00193   // Indirect branch is not supported.
00194   // This also disables Jump Table creation.
00195   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
00196   setOperationAction(ISD::BRIND, MVT::Other, Expand);
00197 
00198   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00199   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00200 
00201   // We want to legalize constant related memmove and memcopy
00202   // intrinsics.
00203   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
00204 
00205   // Turn FP extload into load/fextend
00206   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
00207   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
00208   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
00209   // Turn FP truncstore into trunc + store.
00210   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00211   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00212   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00213 
00214   // PTX does not support load / store predicate registers
00215   setOperationAction(ISD::LOAD, MVT::i1, Custom);
00216   setOperationAction(ISD::STORE, MVT::i1, Custom);
00217 
00218   for (MVT VT : MVT::integer_valuetypes()) {
00219     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00220     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00221     setTruncStoreAction(VT, MVT::i1, Expand);
00222   }
00223 
00224   // This is legal in NVPTX
00225   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
00226   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
00227 
00228   // TRAP can be lowered to PTX trap
00229   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00230 
00231   setOperationAction(ISD::ADDC, MVT::i64, Expand);
00232   setOperationAction(ISD::ADDE, MVT::i64, Expand);
00233 
00234   // Register custom handling for vector loads/stores
00235   for (MVT VT : MVT::vector_valuetypes()) {
00236     if (IsPTXVectorType(VT)) {
00237       setOperationAction(ISD::LOAD, VT, Custom);
00238       setOperationAction(ISD::STORE, VT, Custom);
00239       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
00240     }
00241   }
00242 
00243   // Custom handling for i8 intrinsics
00244   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
00245 
00246   setOperationAction(ISD::CTLZ, MVT::i16, Legal);
00247   setOperationAction(ISD::CTLZ, MVT::i32, Legal);
00248   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
00249   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
00250   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
00251   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
00252   setOperationAction(ISD::CTTZ, MVT::i16, Expand);
00253   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
00254   setOperationAction(ISD::CTTZ, MVT::i64, Expand);
00255   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
00256   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
00257   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00258   setOperationAction(ISD::CTPOP, MVT::i16, Legal);
00259   setOperationAction(ISD::CTPOP, MVT::i32, Legal);
00260   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
00261 
00262   // PTX does not directly support SELP of i1, so promote to i32 first
00263   setOperationAction(ISD::SELECT, MVT::i1, Custom);
00264 
00265   // We have some custom DAG combine patterns for these nodes
00266   setTargetDAGCombine(ISD::ADD);
00267   setTargetDAGCombine(ISD::AND);
00268   setTargetDAGCombine(ISD::FADD);
00269   setTargetDAGCombine(ISD::MUL);
00270   setTargetDAGCombine(ISD::SHL);
00271 
00272   // Now deduce the information based on the above mentioned
00273   // actions
00274   computeRegisterProperties(STI.getRegisterInfo());
00275 }
00276 
00277 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
00278   switch ((NVPTXISD::NodeType)Opcode) {
00279   case NVPTXISD::FIRST_NUMBER:
00280     break;
00281   case NVPTXISD::CALL:
00282     return "NVPTXISD::CALL";
00283   case NVPTXISD::RET_FLAG:
00284     return "NVPTXISD::RET_FLAG";
00285   case NVPTXISD::LOAD_PARAM:
00286     return "NVPTXISD::LOAD_PARAM";
00287   case NVPTXISD::Wrapper:
00288     return "NVPTXISD::Wrapper";
00289   case NVPTXISD::DeclareParam:
00290     return "NVPTXISD::DeclareParam";
00291   case NVPTXISD::DeclareScalarParam:
00292     return "NVPTXISD::DeclareScalarParam";
00293   case NVPTXISD::DeclareRet:
00294     return "NVPTXISD::DeclareRet";
00295   case NVPTXISD::DeclareScalarRet:
00296     return "NVPTXISD::DeclareScalarRet";
00297   case NVPTXISD::DeclareRetParam:
00298     return "NVPTXISD::DeclareRetParam";
00299   case NVPTXISD::PrintCall:
00300     return "NVPTXISD::PrintCall";
00301   case NVPTXISD::PrintCallUni:
00302     return "NVPTXISD::PrintCallUni";
00303   case NVPTXISD::LoadParam:
00304     return "NVPTXISD::LoadParam";
00305   case NVPTXISD::LoadParamV2:
00306     return "NVPTXISD::LoadParamV2";
00307   case NVPTXISD::LoadParamV4:
00308     return "NVPTXISD::LoadParamV4";
00309   case NVPTXISD::StoreParam:
00310     return "NVPTXISD::StoreParam";
00311   case NVPTXISD::StoreParamV2:
00312     return "NVPTXISD::StoreParamV2";
00313   case NVPTXISD::StoreParamV4:
00314     return "NVPTXISD::StoreParamV4";
00315   case NVPTXISD::StoreParamS32:
00316     return "NVPTXISD::StoreParamS32";
00317   case NVPTXISD::StoreParamU32:
00318     return "NVPTXISD::StoreParamU32";
00319   case NVPTXISD::CallArgBegin:
00320     return "NVPTXISD::CallArgBegin";
00321   case NVPTXISD::CallArg:
00322     return "NVPTXISD::CallArg";
00323   case NVPTXISD::LastCallArg:
00324     return "NVPTXISD::LastCallArg";
00325   case NVPTXISD::CallArgEnd:
00326     return "NVPTXISD::CallArgEnd";
00327   case NVPTXISD::CallVoid:
00328     return "NVPTXISD::CallVoid";
00329   case NVPTXISD::CallVal:
00330     return "NVPTXISD::CallVal";
00331   case NVPTXISD::CallSymbol:
00332     return "NVPTXISD::CallSymbol";
00333   case NVPTXISD::Prototype:
00334     return "NVPTXISD::Prototype";
00335   case NVPTXISD::MoveParam:
00336     return "NVPTXISD::MoveParam";
00337   case NVPTXISD::StoreRetval:
00338     return "NVPTXISD::StoreRetval";
00339   case NVPTXISD::StoreRetvalV2:
00340     return "NVPTXISD::StoreRetvalV2";
00341   case NVPTXISD::StoreRetvalV4:
00342     return "NVPTXISD::StoreRetvalV4";
00343   case NVPTXISD::PseudoUseParam:
00344     return "NVPTXISD::PseudoUseParam";
00345   case NVPTXISD::RETURN:
00346     return "NVPTXISD::RETURN";
00347   case NVPTXISD::CallSeqBegin:
00348     return "NVPTXISD::CallSeqBegin";
00349   case NVPTXISD::CallSeqEnd:
00350     return "NVPTXISD::CallSeqEnd";
00351   case NVPTXISD::CallPrototype:
00352     return "NVPTXISD::CallPrototype";
00353   case NVPTXISD::LoadV2:
00354     return "NVPTXISD::LoadV2";
00355   case NVPTXISD::LoadV4:
00356     return "NVPTXISD::LoadV4";
00357   case NVPTXISD::LDGV2:
00358     return "NVPTXISD::LDGV2";
00359   case NVPTXISD::LDGV4:
00360     return "NVPTXISD::LDGV4";
00361   case NVPTXISD::LDUV2:
00362     return "NVPTXISD::LDUV2";
00363   case NVPTXISD::LDUV4:
00364     return "NVPTXISD::LDUV4";
00365   case NVPTXISD::StoreV2:
00366     return "NVPTXISD::StoreV2";
00367   case NVPTXISD::StoreV4:
00368     return "NVPTXISD::StoreV4";
00369   case NVPTXISD::FUN_SHFL_CLAMP:
00370     return "NVPTXISD::FUN_SHFL_CLAMP";
00371   case NVPTXISD::FUN_SHFR_CLAMP:
00372     return "NVPTXISD::FUN_SHFR_CLAMP";
00373   case NVPTXISD::IMAD:
00374     return "NVPTXISD::IMAD";
00375   case NVPTXISD::Dummy:
00376     return "NVPTXISD::Dummy";
00377   case NVPTXISD::MUL_WIDE_SIGNED:
00378     return "NVPTXISD::MUL_WIDE_SIGNED";
00379   case NVPTXISD::MUL_WIDE_UNSIGNED:
00380     return "NVPTXISD::MUL_WIDE_UNSIGNED";
00381   case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
00382   case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
00383   case NVPTXISD::Tex1DFloatFloatLevel:
00384     return "NVPTXISD::Tex1DFloatFloatLevel";
00385   case NVPTXISD::Tex1DFloatFloatGrad:
00386     return "NVPTXISD::Tex1DFloatFloatGrad";
00387   case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
00388   case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
00389   case NVPTXISD::Tex1DS32FloatLevel:
00390     return "NVPTXISD::Tex1DS32FloatLevel";
00391   case NVPTXISD::Tex1DS32FloatGrad:
00392     return "NVPTXISD::Tex1DS32FloatGrad";
00393   case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
00394   case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
00395   case NVPTXISD::Tex1DU32FloatLevel:
00396     return "NVPTXISD::Tex1DU32FloatLevel";
00397   case NVPTXISD::Tex1DU32FloatGrad:
00398     return "NVPTXISD::Tex1DU32FloatGrad";
00399   case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
00400   case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
00401   case NVPTXISD::Tex1DArrayFloatFloatLevel:
00402     return "NVPTXISD::Tex1DArrayFloatFloatLevel";
00403   case NVPTXISD::Tex1DArrayFloatFloatGrad:
00404     return "NVPTXISD::Tex1DArrayFloatFloatGrad";
00405   case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
00406   case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
00407   case NVPTXISD::Tex1DArrayS32FloatLevel:
00408     return "NVPTXISD::Tex1DArrayS32FloatLevel";
00409   case NVPTXISD::Tex1DArrayS32FloatGrad:
00410     return "NVPTXISD::Tex1DArrayS32FloatGrad";
00411   case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
00412   case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
00413   case NVPTXISD::Tex1DArrayU32FloatLevel:
00414     return "NVPTXISD::Tex1DArrayU32FloatLevel";
00415   case NVPTXISD::Tex1DArrayU32FloatGrad:
00416     return "NVPTXISD::Tex1DArrayU32FloatGrad";
00417   case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
00418   case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
00419   case NVPTXISD::Tex2DFloatFloatLevel:
00420     return "NVPTXISD::Tex2DFloatFloatLevel";
00421   case NVPTXISD::Tex2DFloatFloatGrad:
00422     return "NVPTXISD::Tex2DFloatFloatGrad";
00423   case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
00424   case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
00425   case NVPTXISD::Tex2DS32FloatLevel:
00426     return "NVPTXISD::Tex2DS32FloatLevel";
00427   case NVPTXISD::Tex2DS32FloatGrad:
00428     return "NVPTXISD::Tex2DS32FloatGrad";
00429   case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
00430   case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
00431   case NVPTXISD::Tex2DU32FloatLevel:
00432     return "NVPTXISD::Tex2DU32FloatLevel";
00433   case NVPTXISD::Tex2DU32FloatGrad:
00434     return "NVPTXISD::Tex2DU32FloatGrad";
00435   case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
00436   case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
00437   case NVPTXISD::Tex2DArrayFloatFloatLevel:
00438     return "NVPTXISD::Tex2DArrayFloatFloatLevel";
00439   case NVPTXISD::Tex2DArrayFloatFloatGrad:
00440     return "NVPTXISD::Tex2DArrayFloatFloatGrad";
00441   case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
00442   case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
00443   case NVPTXISD::Tex2DArrayS32FloatLevel:
00444     return "NVPTXISD::Tex2DArrayS32FloatLevel";
00445   case NVPTXISD::Tex2DArrayS32FloatGrad:
00446     return "NVPTXISD::Tex2DArrayS32FloatGrad";
00447   case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
00448   case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
00449   case NVPTXISD::Tex2DArrayU32FloatLevel:
00450     return "NVPTXISD::Tex2DArrayU32FloatLevel";
00451   case NVPTXISD::Tex2DArrayU32FloatGrad:
00452     return "NVPTXISD::Tex2DArrayU32FloatGrad";
00453   case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
00454   case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
00455   case NVPTXISD::Tex3DFloatFloatLevel:
00456     return "NVPTXISD::Tex3DFloatFloatLevel";
00457   case NVPTXISD::Tex3DFloatFloatGrad:
00458     return "NVPTXISD::Tex3DFloatFloatGrad";
00459   case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
00460   case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
00461   case NVPTXISD::Tex3DS32FloatLevel:
00462     return "NVPTXISD::Tex3DS32FloatLevel";
00463   case NVPTXISD::Tex3DS32FloatGrad:
00464     return "NVPTXISD::Tex3DS32FloatGrad";
00465   case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
00466   case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
00467   case NVPTXISD::Tex3DU32FloatLevel:
00468     return "NVPTXISD::Tex3DU32FloatLevel";
00469   case NVPTXISD::Tex3DU32FloatGrad:
00470     return "NVPTXISD::Tex3DU32FloatGrad";
00471   case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
00472   case NVPTXISD::TexCubeFloatFloatLevel:
00473     return "NVPTXISD::TexCubeFloatFloatLevel";
00474   case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
00475   case NVPTXISD::TexCubeS32FloatLevel:
00476     return "NVPTXISD::TexCubeS32FloatLevel";
00477   case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
00478   case NVPTXISD::TexCubeU32FloatLevel:
00479     return "NVPTXISD::TexCubeU32FloatLevel";
00480   case NVPTXISD::TexCubeArrayFloatFloat:
00481     return "NVPTXISD::TexCubeArrayFloatFloat";
00482   case NVPTXISD::TexCubeArrayFloatFloatLevel:
00483     return "NVPTXISD::TexCubeArrayFloatFloatLevel";
00484   case NVPTXISD::TexCubeArrayS32Float:
00485     return "NVPTXISD::TexCubeArrayS32Float";
00486   case NVPTXISD::TexCubeArrayS32FloatLevel:
00487     return "NVPTXISD::TexCubeArrayS32FloatLevel";
00488   case NVPTXISD::TexCubeArrayU32Float:
00489     return "NVPTXISD::TexCubeArrayU32Float";
00490   case NVPTXISD::TexCubeArrayU32FloatLevel:
00491     return "NVPTXISD::TexCubeArrayU32FloatLevel";
00492   case NVPTXISD::Tld4R2DFloatFloat:
00493     return "NVPTXISD::Tld4R2DFloatFloat";
00494   case NVPTXISD::Tld4G2DFloatFloat:
00495     return "NVPTXISD::Tld4G2DFloatFloat";
00496   case NVPTXISD::Tld4B2DFloatFloat:
00497     return "NVPTXISD::Tld4B2DFloatFloat";
00498   case NVPTXISD::Tld4A2DFloatFloat:
00499     return "NVPTXISD::Tld4A2DFloatFloat";
00500   case NVPTXISD::Tld4R2DS64Float:
00501     return "NVPTXISD::Tld4R2DS64Float";
00502   case NVPTXISD::Tld4G2DS64Float:
00503     return "NVPTXISD::Tld4G2DS64Float";
00504   case NVPTXISD::Tld4B2DS64Float:
00505     return "NVPTXISD::Tld4B2DS64Float";
00506   case NVPTXISD::Tld4A2DS64Float:
00507     return "NVPTXISD::Tld4A2DS64Float";
00508   case NVPTXISD::Tld4R2DU64Float:
00509     return "NVPTXISD::Tld4R2DU64Float";
00510   case NVPTXISD::Tld4G2DU64Float:
00511     return "NVPTXISD::Tld4G2DU64Float";
00512   case NVPTXISD::Tld4B2DU64Float:
00513     return "NVPTXISD::Tld4B2DU64Float";
00514   case NVPTXISD::Tld4A2DU64Float:
00515     return "NVPTXISD::Tld4A2DU64Float";
00516 
00517   case NVPTXISD::TexUnified1DFloatS32:
00518     return "NVPTXISD::TexUnified1DFloatS32";
00519   case NVPTXISD::TexUnified1DFloatFloat:
00520     return "NVPTXISD::TexUnified1DFloatFloat";
00521   case NVPTXISD::TexUnified1DFloatFloatLevel:
00522     return "NVPTXISD::TexUnified1DFloatFloatLevel";
00523   case NVPTXISD::TexUnified1DFloatFloatGrad:
00524     return "NVPTXISD::TexUnified1DFloatFloatGrad";
00525   case NVPTXISD::TexUnified1DS32S32:
00526     return "NVPTXISD::TexUnified1DS32S32";
00527   case NVPTXISD::TexUnified1DS32Float:
00528     return "NVPTXISD::TexUnified1DS32Float";
00529   case NVPTXISD::TexUnified1DS32FloatLevel:
00530     return "NVPTXISD::TexUnified1DS32FloatLevel";
00531   case NVPTXISD::TexUnified1DS32FloatGrad:
00532     return "NVPTXISD::TexUnified1DS32FloatGrad";
00533   case NVPTXISD::TexUnified1DU32S32:
00534     return "NVPTXISD::TexUnified1DU32S32";
00535   case NVPTXISD::TexUnified1DU32Float:
00536     return "NVPTXISD::TexUnified1DU32Float";
00537   case NVPTXISD::TexUnified1DU32FloatLevel:
00538     return "NVPTXISD::TexUnified1DU32FloatLevel";
00539   case NVPTXISD::TexUnified1DU32FloatGrad:
00540     return "NVPTXISD::TexUnified1DU32FloatGrad";
00541   case NVPTXISD::TexUnified1DArrayFloatS32:
00542     return "NVPTXISD::TexUnified1DArrayFloatS32";
00543   case NVPTXISD::TexUnified1DArrayFloatFloat:
00544     return "NVPTXISD::TexUnified1DArrayFloatFloat";
00545   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
00546     return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
00547   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
00548     return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
00549   case NVPTXISD::TexUnified1DArrayS32S32:
00550     return "NVPTXISD::TexUnified1DArrayS32S32";
00551   case NVPTXISD::TexUnified1DArrayS32Float:
00552     return "NVPTXISD::TexUnified1DArrayS32Float";
00553   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
00554     return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
00555   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
00556     return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
00557   case NVPTXISD::TexUnified1DArrayU32S32:
00558     return "NVPTXISD::TexUnified1DArrayU32S32";
00559   case NVPTXISD::TexUnified1DArrayU32Float:
00560     return "NVPTXISD::TexUnified1DArrayU32Float";
00561   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
00562     return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
00563   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
00564     return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
00565   case NVPTXISD::TexUnified2DFloatS32:
00566     return "NVPTXISD::TexUnified2DFloatS32";
00567   case NVPTXISD::TexUnified2DFloatFloat:
00568     return "NVPTXISD::TexUnified2DFloatFloat";
00569   case NVPTXISD::TexUnified2DFloatFloatLevel:
00570     return "NVPTXISD::TexUnified2DFloatFloatLevel";
00571   case NVPTXISD::TexUnified2DFloatFloatGrad:
00572     return "NVPTXISD::TexUnified2DFloatFloatGrad";
00573   case NVPTXISD::TexUnified2DS32S32:
00574     return "NVPTXISD::TexUnified2DS32S32";
00575   case NVPTXISD::TexUnified2DS32Float:
00576     return "NVPTXISD::TexUnified2DS32Float";
00577   case NVPTXISD::TexUnified2DS32FloatLevel:
00578     return "NVPTXISD::TexUnified2DS32FloatLevel";
00579   case NVPTXISD::TexUnified2DS32FloatGrad:
00580     return "NVPTXISD::TexUnified2DS32FloatGrad";
00581   case NVPTXISD::TexUnified2DU32S32:
00582     return "NVPTXISD::TexUnified2DU32S32";
00583   case NVPTXISD::TexUnified2DU32Float:
00584     return "NVPTXISD::TexUnified2DU32Float";
00585   case NVPTXISD::TexUnified2DU32FloatLevel:
00586     return "NVPTXISD::TexUnified2DU32FloatLevel";
00587   case NVPTXISD::TexUnified2DU32FloatGrad:
00588     return "NVPTXISD::TexUnified2DU32FloatGrad";
00589   case NVPTXISD::TexUnified2DArrayFloatS32:
00590     return "NVPTXISD::TexUnified2DArrayFloatS32";
00591   case NVPTXISD::TexUnified2DArrayFloatFloat:
00592     return "NVPTXISD::TexUnified2DArrayFloatFloat";
00593   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
00594     return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
00595   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
00596     return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
00597   case NVPTXISD::TexUnified2DArrayS32S32:
00598     return "NVPTXISD::TexUnified2DArrayS32S32";
00599   case NVPTXISD::TexUnified2DArrayS32Float:
00600     return "NVPTXISD::TexUnified2DArrayS32Float";
00601   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
00602     return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
00603   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
00604     return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
00605   case NVPTXISD::TexUnified2DArrayU32S32:
00606     return "NVPTXISD::TexUnified2DArrayU32S32";
00607   case NVPTXISD::TexUnified2DArrayU32Float:
00608     return "NVPTXISD::TexUnified2DArrayU32Float";
00609   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
00610     return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
00611   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
00612     return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
00613   case NVPTXISD::TexUnified3DFloatS32:
00614     return "NVPTXISD::TexUnified3DFloatS32";
00615   case NVPTXISD::TexUnified3DFloatFloat:
00616     return "NVPTXISD::TexUnified3DFloatFloat";
00617   case NVPTXISD::TexUnified3DFloatFloatLevel:
00618     return "NVPTXISD::TexUnified3DFloatFloatLevel";
00619   case NVPTXISD::TexUnified3DFloatFloatGrad:
00620     return "NVPTXISD::TexUnified3DFloatFloatGrad";
00621   case NVPTXISD::TexUnified3DS32S32:
00622     return "NVPTXISD::TexUnified3DS32S32";
00623   case NVPTXISD::TexUnified3DS32Float:
00624     return "NVPTXISD::TexUnified3DS32Float";
00625   case NVPTXISD::TexUnified3DS32FloatLevel:
00626     return "NVPTXISD::TexUnified3DS32FloatLevel";
00627   case NVPTXISD::TexUnified3DS32FloatGrad:
00628     return "NVPTXISD::TexUnified3DS32FloatGrad";
00629   case NVPTXISD::TexUnified3DU32S32:
00630     return "NVPTXISD::TexUnified3DU32S32";
00631   case NVPTXISD::TexUnified3DU32Float:
00632     return "NVPTXISD::TexUnified3DU32Float";
00633   case NVPTXISD::TexUnified3DU32FloatLevel:
00634     return "NVPTXISD::TexUnified3DU32FloatLevel";
00635   case NVPTXISD::TexUnified3DU32FloatGrad:
00636     return "NVPTXISD::TexUnified3DU32FloatGrad";
00637   case NVPTXISD::TexUnifiedCubeFloatFloat:
00638     return "NVPTXISD::TexUnifiedCubeFloatFloat";
00639   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
00640     return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
00641   case NVPTXISD::TexUnifiedCubeS32Float:
00642     return "NVPTXISD::TexUnifiedCubeS32Float";
00643   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
00644     return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
00645   case NVPTXISD::TexUnifiedCubeU32Float:
00646     return "NVPTXISD::TexUnifiedCubeU32Float";
00647   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
00648     return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
00649   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
00650     return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
00651   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
00652     return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
00653   case NVPTXISD::TexUnifiedCubeArrayS32Float:
00654     return "NVPTXISD::TexUnifiedCubeArrayS32Float";
00655   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
00656     return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
00657   case NVPTXISD::TexUnifiedCubeArrayU32Float:
00658     return "NVPTXISD::TexUnifiedCubeArrayU32Float";
00659   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
00660     return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
00661   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
00662     return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
00663   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
00664     return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
00665   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
00666     return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
00667   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
00668     return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
00669   case NVPTXISD::Tld4UnifiedR2DS64Float:
00670     return "NVPTXISD::Tld4UnifiedR2DS64Float";
00671   case NVPTXISD::Tld4UnifiedG2DS64Float:
00672     return "NVPTXISD::Tld4UnifiedG2DS64Float";
00673   case NVPTXISD::Tld4UnifiedB2DS64Float:
00674     return "NVPTXISD::Tld4UnifiedB2DS64Float";
00675   case NVPTXISD::Tld4UnifiedA2DS64Float:
00676     return "NVPTXISD::Tld4UnifiedA2DS64Float";
00677   case NVPTXISD::Tld4UnifiedR2DU64Float:
00678     return "NVPTXISD::Tld4UnifiedR2DU64Float";
00679   case NVPTXISD::Tld4UnifiedG2DU64Float:
00680     return "NVPTXISD::Tld4UnifiedG2DU64Float";
00681   case NVPTXISD::Tld4UnifiedB2DU64Float:
00682     return "NVPTXISD::Tld4UnifiedB2DU64Float";
00683   case NVPTXISD::Tld4UnifiedA2DU64Float:
00684     return "NVPTXISD::Tld4UnifiedA2DU64Float";
00685 
00686   case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
00687   case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
00688   case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
00689   case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
00690   case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
00691   case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
00692   case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
00693   case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
00694   case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
00695   case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
00696   case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
00697 
00698   case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
00699   case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
00700   case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
00701   case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
00702   case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
00703   case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
00704   case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
00705   case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
00706   case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
00707   case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
00708   case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
00709 
00710   case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
00711   case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
00712   case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
00713   case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
00714   case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
00715   case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
00716   case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
00717   case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
00718   case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
00719   case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
00720   case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
00721 
00722   case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
00723   case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
00724   case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
00725   case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
00726   case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
00727   case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
00728   case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
00729   case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
00730   case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
00731   case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
00732   case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
00733 
00734   case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
00735   case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
00736   case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
00737   case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
00738   case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
00739   case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
00740   case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
00741   case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
00742   case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
00743   case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
00744   case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
00745 
00746   case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
00747   case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
00748   case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
00749   case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
00750   case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
00751   case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
00752   case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
00753   case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
00754   case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
00755   case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
00756   case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
00757 
00758   case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
00759   case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
00760   case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
00761   case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
00762   case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
00763   case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
00764   case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
00765   case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
00766   case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
00767   case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
00768   case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
00769 
00770   case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
00771   case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
00772   case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
00773   case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
00774   case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
00775   case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
00776   case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
00777   case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
00778   case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
00779   case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
00780   case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
00781 
00782   case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
00783   case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
00784   case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
00785   case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
00786   case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
00787   case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
00788   case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
00789   case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
00790   case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
00791   case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
00792   case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
00793 
00794   case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
00795   case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
00796   case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
00797   case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
00798   case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
00799   case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
00800   case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
00801   case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
00802   case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
00803   case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
00804   case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
00805 
00806   case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
00807   case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
00808   case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
00809   case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
00810   case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
00811   case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
00812   case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
00813   case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
00814   case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
00815   case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
00816   case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
00817 
00818   case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
00819   case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
00820   case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
00821   case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
00822   case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
00823   case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
00824   case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
00825   case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
00826   case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
00827   case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
00828   case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
00829 
00830   case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
00831   case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
00832   case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
00833   case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
00834   case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
00835   case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
00836   case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
00837   case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
00838   case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
00839   case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
00840   case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
00841 
00842   case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
00843   case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
00844   case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
00845   case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
00846   case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
00847   case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
00848   case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
00849   case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
00850   case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
00851   case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
00852   case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
00853 
00854   case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
00855   case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
00856   case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
00857   case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
00858   case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
00859   case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
00860   case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
00861   case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
00862   case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
00863   case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
00864   case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
00865   }
00866   return nullptr;
00867 }
00868 
00869 TargetLoweringBase::LegalizeTypeAction
00870 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
00871   if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
00872     return TypeSplitVector;
00873 
00874   return TargetLoweringBase::getPreferredVectorAction(VT);
00875 }
00876 
00877 SDValue
00878 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
00879   SDLoc dl(Op);
00880   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
00881   Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
00882   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
00883 }
00884 
00885 std::string
00886 NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
00887                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
00888                                   unsigned retAlignment,
00889                                   const ImmutableCallSite *CS) const {
00890 
00891   bool isABI = (STI.getSmVersion() >= 20);
00892   assert(isABI && "Non-ABI compilation is not supported");
00893   if (!isABI)
00894     return "";
00895 
00896   std::stringstream O;
00897   O << "prototype_" << uniqueCallSite << " : .callprototype ";
00898 
00899   if (retTy->getTypeID() == Type::VoidTyID) {
00900     O << "()";
00901   } else {
00902     O << "(";
00903     if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
00904       unsigned size = 0;
00905       if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
00906         size = ITy->getBitWidth();
00907         if (size < 32)
00908           size = 32;
00909       } else {
00910         assert(retTy->isFloatingPointTy() &&
00911                "Floating point type expected here");
00912         size = retTy->getPrimitiveSizeInBits();
00913       }
00914 
00915       O << ".param .b" << size << " _";
00916     } else if (isa<PointerType>(retTy)) {
00917       O << ".param .b" << getPointerTy().getSizeInBits() << " _";
00918     } else if ((retTy->getTypeID() == Type::StructTyID) ||
00919                isa<VectorType>(retTy)) {
00920       O << ".param .align "
00921         << retAlignment
00922         << " .b8 _["
00923         << getDataLayout()->getTypeAllocSize(retTy) << "]";
00924     } else {
00925       llvm_unreachable("Unknown return type");
00926     }
00927     O << ") ";
00928   }
00929   O << "_ (";
00930 
00931   bool first = true;
00932   MVT thePointerTy = getPointerTy();
00933 
00934   unsigned OIdx = 0;
00935   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
00936     Type *Ty = Args[i].Ty;
00937     if (!first) {
00938       O << ", ";
00939     }
00940     first = false;
00941 
00942     if (!Outs[OIdx].Flags.isByVal()) {
00943       if (Ty->isAggregateType() || Ty->isVectorTy()) {
00944         unsigned align = 0;
00945         const CallInst *CallI = cast<CallInst>(CS->getInstruction());
00946         const DataLayout *TD = getDataLayout();
00947         // +1 because index 0 is reserved for return type alignment
00948         if (!llvm::getAlign(*CallI, i + 1, align))
00949           align = TD->getABITypeAlignment(Ty);
00950         unsigned sz = TD->getTypeAllocSize(Ty);
00951         O << ".param .align " << align << " .b8 ";
00952         O << "_";
00953         O << "[" << sz << "]";
00954         // update the index for Outs
00955         SmallVector<EVT, 16> vtparts;
00956         ComputeValueVTs(*this, Ty, vtparts);
00957         if (unsigned len = vtparts.size())
00958           OIdx += len - 1;
00959         continue;
00960       }
00961        // i8 types in IR will be i16 types in SDAG
00962       assert((getValueType(Ty) == Outs[OIdx].VT ||
00963              (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
00964              "type mismatch between callee prototype and arguments");
00965       // scalar type
00966       unsigned sz = 0;
00967       if (isa<IntegerType>(Ty)) {
00968         sz = cast<IntegerType>(Ty)->getBitWidth();
00969         if (sz < 32)
00970           sz = 32;
00971       } else if (isa<PointerType>(Ty))
00972         sz = thePointerTy.getSizeInBits();
00973       else
00974         sz = Ty->getPrimitiveSizeInBits();
00975       O << ".param .b" << sz << " ";
00976       O << "_";
00977       continue;
00978     }
00979     const PointerType *PTy = dyn_cast<PointerType>(Ty);
00980     assert(PTy && "Param with byval attribute should be a pointer type");
00981     Type *ETy = PTy->getElementType();
00982 
00983     unsigned align = Outs[OIdx].Flags.getByValAlign();
00984     unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
00985     O << ".param .align " << align << " .b8 ";
00986     O << "_";
00987     O << "[" << sz << "]";
00988   }
00989   O << ");";
00990   return O.str();
00991 }
00992 
00993 unsigned
00994 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
00995                                           const ImmutableCallSite *CS,
00996                                           Type *Ty,
00997                                           unsigned Idx) const {
00998   const DataLayout *TD = getDataLayout();
00999   unsigned Align = 0;
01000   const Value *DirectCallee = CS->getCalledFunction();
01001 
01002   if (!DirectCallee) {
01003     // We don't have a direct function symbol, but that may be because of
01004     // constant cast instructions in the call.
01005     const Instruction *CalleeI = CS->getInstruction();
01006     assert(CalleeI && "Call target is not a function or derived value?");
01007 
01008     // With bitcast'd call targets, the instruction will be the call
01009     if (isa<CallInst>(CalleeI)) {
01010       // Check if we have call alignment metadata
01011       if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
01012         return Align;
01013 
01014       const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
01015       // Ignore any bitcast instructions
01016       while(isa<ConstantExpr>(CalleeV)) {
01017         const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
01018         if (!CE->isCast())
01019           break;
01020         // Look through the bitcast
01021         CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
01022       }
01023 
01024       // We have now looked past all of the bitcasts.  Do we finally have a
01025       // Function?
01026       if (isa<Function>(CalleeV))
01027         DirectCallee = CalleeV;
01028     }
01029   }
01030 
01031   // Check for function alignment information if we found that the
01032   // ultimate target is a Function
01033   if (DirectCallee)
01034     if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
01035       return Align;
01036 
01037   // Call is indirect or alignment information is not available, fall back to
01038   // the ABI type alignment
01039   return TD->getABITypeAlignment(Ty);
01040 }
01041 
01042 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
01043                                        SmallVectorImpl<SDValue> &InVals) const {
01044   SelectionDAG &DAG = CLI.DAG;
01045   SDLoc dl = CLI.DL;
01046   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01047   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
01048   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
01049   SDValue Chain = CLI.Chain;
01050   SDValue Callee = CLI.Callee;
01051   bool &isTailCall = CLI.IsTailCall;
01052   ArgListTy &Args = CLI.getArgs();
01053   Type *retTy = CLI.RetTy;
01054   ImmutableCallSite *CS = CLI.CS;
01055 
01056   bool isABI = (STI.getSmVersion() >= 20);
01057   assert(isABI && "Non-ABI compilation is not supported");
01058   if (!isABI)
01059     return Chain;
01060   const DataLayout *TD = getDataLayout();
01061   MachineFunction &MF = DAG.getMachineFunction();
01062   const Function *F = MF.getFunction();
01063 
01064   SDValue tempChain = Chain;
01065   Chain = DAG.getCALLSEQ_START(Chain,
01066                                DAG.getIntPtrConstant(uniqueCallSite, dl, true),
01067                                dl);
01068   SDValue InFlag = Chain.getValue(1);
01069 
01070   unsigned paramCount = 0;
01071   // Args.size() and Outs.size() need not match.
01072   // Outs.size() will be larger
01073   //   * if there is an aggregate argument with multiple fields (each field
01074   //     showing up separately in Outs)
01075   //   * if there is a vector argument with more than typical vector-length
01076   //     elements (generally if more than 4) where each vector element is
01077   //     individually present in Outs.
01078   // So a different index should be used for indexing into Outs/OutVals.
01079   // See similar issue in LowerFormalArguments.
01080   unsigned OIdx = 0;
01081   // Declare the .params or .reg need to pass values
01082   // to the function
01083   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
01084     EVT VT = Outs[OIdx].VT;
01085     Type *Ty = Args[i].Ty;
01086 
01087     if (!Outs[OIdx].Flags.isByVal()) {
01088       if (Ty->isAggregateType()) {
01089         // aggregate
01090         SmallVector<EVT, 16> vtparts;
01091         SmallVector<uint64_t, 16> Offsets;
01092         ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
01093 
01094         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
01095         // declare .param .align <align> .b8 .param<n>[<size>];
01096         unsigned sz = TD->getTypeAllocSize(Ty);
01097         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01098         SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
01099                                                              MVT::i32),
01100                                       DAG.getConstant(paramCount, dl, MVT::i32),
01101                                       DAG.getConstant(sz, dl, MVT::i32),
01102                                       InFlag };
01103         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
01104                             DeclareParamOps);
01105         InFlag = Chain.getValue(1);
01106         for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
01107           EVT elemtype = vtparts[j];
01108           unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
01109           if (elemtype.isInteger() && (sz < 8))
01110             sz = 8;
01111           SDValue StVal = OutVals[OIdx];
01112           if (elemtype.getSizeInBits() < 16) {
01113             StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
01114           }
01115           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01116           SDValue CopyParamOps[] = { Chain,
01117                                      DAG.getConstant(paramCount, dl, MVT::i32),
01118                                      DAG.getConstant(Offsets[j], dl, MVT::i32),
01119                                      StVal, InFlag };
01120           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
01121                                           CopyParamVTs, CopyParamOps,
01122                                           elemtype, MachinePointerInfo(),
01123                                           ArgAlign);
01124           InFlag = Chain.getValue(1);
01125           ++OIdx;
01126         }
01127         if (vtparts.size() > 0)
01128           --OIdx;
01129         ++paramCount;
01130         continue;
01131       }
01132       if (Ty->isVectorTy()) {
01133         EVT ObjectVT = getValueType(Ty);
01134         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
01135         // declare .param .align <align> .b8 .param<n>[<size>];
01136         unsigned sz = TD->getTypeAllocSize(Ty);
01137         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01138         SDValue DeclareParamOps[] = { Chain,
01139                                       DAG.getConstant(align, dl, MVT::i32),
01140                                       DAG.getConstant(paramCount, dl, MVT::i32),
01141                                       DAG.getConstant(sz, dl, MVT::i32),
01142                                       InFlag };
01143         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
01144                             DeclareParamOps);
01145         InFlag = Chain.getValue(1);
01146         unsigned NumElts = ObjectVT.getVectorNumElements();
01147         EVT EltVT = ObjectVT.getVectorElementType();
01148         EVT MemVT = EltVT;
01149         bool NeedExtend = false;
01150         if (EltVT.getSizeInBits() < 16) {
01151           NeedExtend = true;
01152           EltVT = MVT::i16;
01153         }
01154 
01155         // V1 store
01156         if (NumElts == 1) {
01157           SDValue Elt = OutVals[OIdx++];
01158           if (NeedExtend)
01159             Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
01160 
01161           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01162           SDValue CopyParamOps[] = { Chain,
01163                                      DAG.getConstant(paramCount, dl, MVT::i32),
01164                                      DAG.getConstant(0, dl, MVT::i32), Elt,
01165                                      InFlag };
01166           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
01167                                           CopyParamVTs, CopyParamOps,
01168                                           MemVT, MachinePointerInfo());
01169           InFlag = Chain.getValue(1);
01170         } else if (NumElts == 2) {
01171           SDValue Elt0 = OutVals[OIdx++];
01172           SDValue Elt1 = OutVals[OIdx++];
01173           if (NeedExtend) {
01174             Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
01175             Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
01176           }
01177 
01178           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01179           SDValue CopyParamOps[] = { Chain,
01180                                      DAG.getConstant(paramCount, dl, MVT::i32),
01181                                      DAG.getConstant(0, dl, MVT::i32), Elt0,
01182                                      Elt1, InFlag };
01183           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
01184                                           CopyParamVTs, CopyParamOps,
01185                                           MemVT, MachinePointerInfo());
01186           InFlag = Chain.getValue(1);
01187         } else {
01188           unsigned curOffset = 0;
01189           // V4 stores
01190           // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
01191           // the
01192           // vector will be expanded to a power of 2 elements, so we know we can
01193           // always round up to the next multiple of 4 when creating the vector
01194           // stores.
01195           // e.g.  4 elem => 1 st.v4
01196           //       6 elem => 2 st.v4
01197           //       8 elem => 2 st.v4
01198           //      11 elem => 3 st.v4
01199           unsigned VecSize = 4;
01200           if (EltVT.getSizeInBits() == 64)
01201             VecSize = 2;
01202 
01203           // This is potentially only part of a vector, so assume all elements
01204           // are packed together.
01205           unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
01206 
01207           for (unsigned i = 0; i < NumElts; i += VecSize) {
01208             // Get values
01209             SDValue StoreVal;
01210             SmallVector<SDValue, 8> Ops;
01211             Ops.push_back(Chain);
01212             Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
01213             Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
01214 
01215             unsigned Opc = NVPTXISD::StoreParamV2;
01216 
01217             StoreVal = OutVals[OIdx++];
01218             if (NeedExtend)
01219               StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
01220             Ops.push_back(StoreVal);
01221 
01222             if (i + 1 < NumElts) {
01223               StoreVal = OutVals[OIdx++];
01224               if (NeedExtend)
01225                 StoreVal =
01226                     DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
01227             } else {
01228               StoreVal = DAG.getUNDEF(EltVT);
01229             }
01230             Ops.push_back(StoreVal);
01231 
01232             if (VecSize == 4) {
01233               Opc = NVPTXISD::StoreParamV4;
01234               if (i + 2 < NumElts) {
01235                 StoreVal = OutVals[OIdx++];
01236                 if (NeedExtend)
01237                   StoreVal =
01238                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
01239               } else {
01240                 StoreVal = DAG.getUNDEF(EltVT);
01241               }
01242               Ops.push_back(StoreVal);
01243 
01244               if (i + 3 < NumElts) {
01245                 StoreVal = OutVals[OIdx++];
01246                 if (NeedExtend)
01247                   StoreVal =
01248                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
01249               } else {
01250                 StoreVal = DAG.getUNDEF(EltVT);
01251               }
01252               Ops.push_back(StoreVal);
01253             }
01254 
01255             Ops.push_back(InFlag);
01256 
01257             SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01258             Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
01259                                             MemVT, MachinePointerInfo());
01260             InFlag = Chain.getValue(1);
01261             curOffset += PerStoreOffset;
01262           }
01263         }
01264         ++paramCount;
01265         --OIdx;
01266         continue;
01267       }
01268       // Plain scalar
01269       // for ABI,    declare .param .b<size> .param<n>;
01270       unsigned sz = VT.getSizeInBits();
01271       bool needExtend = false;
01272       if (VT.isInteger()) {
01273         if (sz < 16)
01274           needExtend = true;
01275         if (sz < 32)
01276           sz = 32;
01277       }
01278       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01279       SDValue DeclareParamOps[] = { Chain,
01280                                     DAG.getConstant(paramCount, dl, MVT::i32),
01281                                     DAG.getConstant(sz, dl, MVT::i32),
01282                                     DAG.getConstant(0, dl, MVT::i32), InFlag };
01283       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
01284                           DeclareParamOps);
01285       InFlag = Chain.getValue(1);
01286       SDValue OutV = OutVals[OIdx];
01287       if (needExtend) {
01288         // zext/sext i1 to i16
01289         unsigned opc = ISD::ZERO_EXTEND;
01290         if (Outs[OIdx].Flags.isSExt())
01291           opc = ISD::SIGN_EXTEND;
01292         OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
01293       }
01294       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01295       SDValue CopyParamOps[] = { Chain,
01296                                  DAG.getConstant(paramCount, dl, MVT::i32),
01297                                  DAG.getConstant(0, dl, MVT::i32), OutV,
01298                                  InFlag };
01299 
01300       unsigned opcode = NVPTXISD::StoreParam;
01301       if (Outs[OIdx].Flags.isZExt())
01302         opcode = NVPTXISD::StoreParamU32;
01303       else if (Outs[OIdx].Flags.isSExt())
01304         opcode = NVPTXISD::StoreParamS32;
01305       Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
01306                                       VT, MachinePointerInfo());
01307 
01308       InFlag = Chain.getValue(1);
01309       ++paramCount;
01310       continue;
01311     }
01312     // struct or vector
01313     SmallVector<EVT, 16> vtparts;
01314     SmallVector<uint64_t, 16> Offsets;
01315     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
01316     assert(PTy && "Type of a byval parameter should be pointer");
01317     ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
01318 
01319     // declare .param .align <align> .b8 .param<n>[<size>];
01320     unsigned sz = Outs[OIdx].Flags.getByValSize();
01321     SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01322     unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
01323     // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
01324     // so we don't need to worry about natural alignment or not.
01325     // See TargetLowering::LowerCallTo().
01326     SDValue DeclareParamOps[] = {
01327       Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
01328       DAG.getConstant(paramCount, dl, MVT::i32),
01329       DAG.getConstant(sz, dl, MVT::i32), InFlag
01330     };
01331     Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
01332                         DeclareParamOps);
01333     InFlag = Chain.getValue(1);
01334     for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
01335       EVT elemtype = vtparts[j];
01336       int curOffset = Offsets[j];
01337       unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
01338       SDValue srcAddr =
01339           DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
01340                       DAG.getConstant(curOffset, dl, getPointerTy()));
01341       SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
01342                                    MachinePointerInfo(), false, false, false,
01343                                    PartAlign);
01344       if (elemtype.getSizeInBits() < 16) {
01345         theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
01346       }
01347       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01348       SDValue CopyParamOps[] = { Chain,
01349                                  DAG.getConstant(paramCount, dl, MVT::i32),
01350                                  DAG.getConstant(curOffset, dl, MVT::i32),
01351                                  theVal, InFlag };
01352       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
01353                                       CopyParamOps, elemtype,
01354                                       MachinePointerInfo());
01355 
01356       InFlag = Chain.getValue(1);
01357     }
01358     ++paramCount;
01359   }
01360 
01361   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
01362   unsigned retAlignment = 0;
01363 
01364   // Handle Result
01365   if (Ins.size() > 0) {
01366     SmallVector<EVT, 16> resvtparts;
01367     ComputeValueVTs(*this, retTy, resvtparts);
01368 
01369     // Declare
01370     //  .param .align 16 .b8 retval0[<size-in-bytes>], or
01371     //  .param .b<size-in-bits> retval0
01372     unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
01373     // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
01374     // these three types to match the logic in
01375     // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
01376     // Plus, this behavior is consistent with nvcc's.
01377     if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
01378         retTy->isPointerTy()) {
01379       // Scalar needs to be at least 32bit wide
01380       if (resultsz < 32)
01381         resultsz = 32;
01382       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01383       SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
01384                                   DAG.getConstant(resultsz, dl, MVT::i32),
01385                                   DAG.getConstant(0, dl, MVT::i32), InFlag };
01386       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
01387                           DeclareRetOps);
01388       InFlag = Chain.getValue(1);
01389     } else {
01390       retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
01391       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01392       SDValue DeclareRetOps[] = { Chain,
01393                                   DAG.getConstant(retAlignment, dl, MVT::i32),
01394                                   DAG.getConstant(resultsz / 8, dl, MVT::i32),
01395                                   DAG.getConstant(0, dl, MVT::i32), InFlag };
01396       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
01397                           DeclareRetOps);
01398       InFlag = Chain.getValue(1);
01399     }
01400   }
01401 
01402   if (!Func) {
01403     // This is indirect function call case : PTX requires a prototype of the
01404     // form
01405     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
01406     // to be emitted, and the label has to used as the last arg of call
01407     // instruction.
01408     // The prototype is embedded in a string and put as the operand for a
01409     // CallPrototype SDNode which will print out to the value of the string.
01410     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01411     std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
01412     const char *ProtoStr =
01413       nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
01414     SDValue ProtoOps[] = {
01415       Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
01416     };
01417     Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
01418     InFlag = Chain.getValue(1);
01419   }
01420   // Op to just print "call"
01421   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01422   SDValue PrintCallOps[] = {
01423     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
01424   };
01425   Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
01426                       dl, PrintCallVTs, PrintCallOps);
01427   InFlag = Chain.getValue(1);
01428 
01429   // Ops to print out the function name
01430   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01431   SDValue CallVoidOps[] = { Chain, Callee, InFlag };
01432   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
01433   InFlag = Chain.getValue(1);
01434 
01435   // Ops to print out the param list
01436   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01437   SDValue CallArgBeginOps[] = { Chain, InFlag };
01438   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
01439                       CallArgBeginOps);
01440   InFlag = Chain.getValue(1);
01441 
01442   for (unsigned i = 0, e = paramCount; i != e; ++i) {
01443     unsigned opcode;
01444     if (i == (e - 1))
01445       opcode = NVPTXISD::LastCallArg;
01446     else
01447       opcode = NVPTXISD::CallArg;
01448     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01449     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
01450                              DAG.getConstant(i, dl, MVT::i32), InFlag };
01451     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
01452     InFlag = Chain.getValue(1);
01453   }
01454   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01455   SDValue CallArgEndOps[] = { Chain,
01456                               DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
01457                               InFlag };
01458   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
01459   InFlag = Chain.getValue(1);
01460 
01461   if (!Func) {
01462     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
01463     SDValue PrototypeOps[] = { Chain,
01464                                DAG.getConstant(uniqueCallSite, dl, MVT::i32),
01465                                InFlag };
01466     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
01467     InFlag = Chain.getValue(1);
01468   }
01469 
01470   // Generate loads from param memory/moves from registers for result
01471   if (Ins.size() > 0) {
01472     if (retTy && retTy->isVectorTy()) {
01473       EVT ObjectVT = getValueType(retTy);
01474       unsigned NumElts = ObjectVT.getVectorNumElements();
01475       EVT EltVT = ObjectVT.getVectorElementType();
01476       assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
01477                                                       ObjectVT) == NumElts &&
01478              "Vector was not scalarized");
01479       unsigned sz = EltVT.getSizeInBits();
01480       bool needTruncate = sz < 8;
01481 
01482       if (NumElts == 1) {
01483         // Just a simple load
01484         SmallVector<EVT, 4> LoadRetVTs;
01485         if (EltVT == MVT::i1 || EltVT == MVT::i8) {
01486           // If loading i1/i8 result, generate
01487           //   load.b8 i16
01488           //   if i1
01489           //   trunc i16 to i1
01490           LoadRetVTs.push_back(MVT::i16);
01491         } else
01492           LoadRetVTs.push_back(EltVT);
01493         LoadRetVTs.push_back(MVT::Other);
01494         LoadRetVTs.push_back(MVT::Glue);
01495         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
01496                                 DAG.getConstant(0, dl, MVT::i32), InFlag};
01497         SDValue retval = DAG.getMemIntrinsicNode(
01498             NVPTXISD::LoadParam, dl,
01499             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
01500         Chain = retval.getValue(1);
01501         InFlag = retval.getValue(2);
01502         SDValue Ret0 = retval;
01503         if (needTruncate)
01504           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
01505         InVals.push_back(Ret0);
01506       } else if (NumElts == 2) {
01507         // LoadV2
01508         SmallVector<EVT, 4> LoadRetVTs;
01509         if (EltVT == MVT::i1 || EltVT == MVT::i8) {
01510           // If loading i1/i8 result, generate
01511           //   load.b8 i16
01512           //   if i1
01513           //   trunc i16 to i1
01514           LoadRetVTs.push_back(MVT::i16);
01515           LoadRetVTs.push_back(MVT::i16);
01516         } else {
01517           LoadRetVTs.push_back(EltVT);
01518           LoadRetVTs.push_back(EltVT);
01519         }
01520         LoadRetVTs.push_back(MVT::Other);
01521         LoadRetVTs.push_back(MVT::Glue);
01522         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
01523                                 DAG.getConstant(0, dl, MVT::i32), InFlag};
01524         SDValue retval = DAG.getMemIntrinsicNode(
01525             NVPTXISD::LoadParamV2, dl,
01526             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
01527         Chain = retval.getValue(2);
01528         InFlag = retval.getValue(3);
01529         SDValue Ret0 = retval.getValue(0);
01530         SDValue Ret1 = retval.getValue(1);
01531         if (needTruncate) {
01532           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
01533           InVals.push_back(Ret0);
01534           Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
01535           InVals.push_back(Ret1);
01536         } else {
01537           InVals.push_back(Ret0);
01538           InVals.push_back(Ret1);
01539         }
01540       } else {
01541         // Split into N LoadV4
01542         unsigned Ofst = 0;
01543         unsigned VecSize = 4;
01544         unsigned Opc = NVPTXISD::LoadParamV4;
01545         if (EltVT.getSizeInBits() == 64) {
01546           VecSize = 2;
01547           Opc = NVPTXISD::LoadParamV2;
01548         }
01549         EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
01550         for (unsigned i = 0; i < NumElts; i += VecSize) {
01551           SmallVector<EVT, 8> LoadRetVTs;
01552           if (EltVT == MVT::i1 || EltVT == MVT::i8) {
01553             // If loading i1/i8 result, generate
01554             //   load.b8 i16
01555             //   if i1
01556             //   trunc i16 to i1
01557             for (unsigned j = 0; j < VecSize; ++j)
01558               LoadRetVTs.push_back(MVT::i16);
01559           } else {
01560             for (unsigned j = 0; j < VecSize; ++j)
01561               LoadRetVTs.push_back(EltVT);
01562           }
01563           LoadRetVTs.push_back(MVT::Other);
01564           LoadRetVTs.push_back(MVT::Glue);
01565           SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
01566                                   DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
01567           SDValue retval = DAG.getMemIntrinsicNode(
01568               Opc, dl, DAG.getVTList(LoadRetVTs),
01569               LoadRetOps, EltVT, MachinePointerInfo());
01570           if (VecSize == 2) {
01571             Chain = retval.getValue(2);
01572             InFlag = retval.getValue(3);
01573           } else {
01574             Chain = retval.getValue(4);
01575             InFlag = retval.getValue(5);
01576           }
01577 
01578           for (unsigned j = 0; j < VecSize; ++j) {
01579             if (i + j >= NumElts)
01580               break;
01581             SDValue Elt = retval.getValue(j);
01582             if (needTruncate)
01583               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
01584             InVals.push_back(Elt);
01585           }
01586           Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
01587         }
01588       }
01589     } else {
01590       SmallVector<EVT, 16> VTs;
01591       SmallVector<uint64_t, 16> Offsets;
01592       ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
01593       assert(VTs.size() == Ins.size() && "Bad value decomposition");
01594       unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
01595       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
01596         unsigned sz = VTs[i].getSizeInBits();
01597         unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
01598         bool needTruncate = sz < 8;
01599         if (VTs[i].isInteger() && (sz < 8))
01600           sz = 8;
01601 
01602         SmallVector<EVT, 4> LoadRetVTs;
01603         EVT TheLoadType = VTs[i];
01604         if (retTy->isIntegerTy() &&
01605             TD->getTypeAllocSizeInBits(retTy) < 32) {
01606           // This is for integer types only, and specifically not for
01607           // aggregates.
01608           LoadRetVTs.push_back(MVT::i32);
01609           TheLoadType = MVT::i32;
01610         } else if (sz < 16) {
01611           // If loading i1/i8 result, generate
01612           //   load i8 (-> i16)
01613           //   trunc i16 to i1/i8
01614           LoadRetVTs.push_back(MVT::i16);
01615         } else
01616           LoadRetVTs.push_back(Ins[i].VT);
01617         LoadRetVTs.push_back(MVT::Other);
01618         LoadRetVTs.push_back(MVT::Glue);
01619 
01620         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
01621                                 DAG.getConstant(Offsets[i], dl, MVT::i32),
01622                                 InFlag};
01623         SDValue retval = DAG.getMemIntrinsicNode(
01624             NVPTXISD::LoadParam, dl,
01625             DAG.getVTList(LoadRetVTs), LoadRetOps,
01626             TheLoadType, MachinePointerInfo(), AlignI);
01627         Chain = retval.getValue(1);
01628         InFlag = retval.getValue(2);
01629         SDValue Ret0 = retval.getValue(0);
01630         if (needTruncate)
01631           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
01632         InVals.push_back(Ret0);
01633       }
01634     }
01635   }
01636 
01637   Chain = DAG.getCALLSEQ_END(Chain,
01638                              DAG.getIntPtrConstant(uniqueCallSite, dl, true),
01639                              DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
01640                                                    true),
01641                              InFlag, dl);
01642   uniqueCallSite++;
01643 
01644   // set isTailCall to false for now, until we figure out how to express
01645   // tail call optimization in PTX
01646   isTailCall = false;
01647   return Chain;
01648 }
01649 
01650 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
01651 // (see LegalizeDAG.cpp). This is slow and uses local memory.
01652 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
01653 SDValue
01654 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
01655   SDNode *Node = Op.getNode();
01656   SDLoc dl(Node);
01657   SmallVector<SDValue, 8> Ops;
01658   unsigned NumOperands = Node->getNumOperands();
01659   for (unsigned i = 0; i < NumOperands; ++i) {
01660     SDValue SubOp = Node->getOperand(i);
01661     EVT VVT = SubOp.getNode()->getValueType(0);
01662     EVT EltVT = VVT.getVectorElementType();
01663     unsigned NumSubElem = VVT.getVectorNumElements();
01664     for (unsigned j = 0; j < NumSubElem; ++j) {
01665       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
01666                                 DAG.getIntPtrConstant(j, dl)));
01667     }
01668   }
01669   return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
01670 }
01671 
01672 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
01673 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
01674 ///    amount, or
01675 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
01676 ///    amount.
01677 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
01678                                                   SelectionDAG &DAG) const {
01679   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
01680   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
01681 
01682   EVT VT = Op.getValueType();
01683   unsigned VTBits = VT.getSizeInBits();
01684   SDLoc dl(Op);
01685   SDValue ShOpLo = Op.getOperand(0);
01686   SDValue ShOpHi = Op.getOperand(1);
01687   SDValue ShAmt  = Op.getOperand(2);
01688   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
01689 
01690   if (VTBits == 32 && STI.getSmVersion() >= 35) {
01691 
01692     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
01693     // {dHi, dLo} = {aHi, aLo} >> Amt
01694     //   dHi = aHi >> Amt
01695     //   dLo = shf.r.clamp aLo, aHi, Amt
01696 
01697     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
01698     SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
01699                              ShAmt);
01700 
01701     SDValue Ops[2] = { Lo, Hi };
01702     return DAG.getMergeValues(Ops, dl);
01703   }
01704   else {
01705 
01706     // {dHi, dLo} = {aHi, aLo} >> Amt
01707     // - if (Amt>=size) then
01708     //      dLo = aHi >> (Amt-size)
01709     //      dHi = aHi >> Amt (this is either all 0 or all 1)
01710     //   else
01711     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
01712     //      dHi = aHi >> Amt
01713 
01714     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
01715                                    DAG.getConstant(VTBits, dl, MVT::i32),
01716                                    ShAmt);
01717     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
01718     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
01719                                      DAG.getConstant(VTBits, dl, MVT::i32));
01720     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
01721     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
01722     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
01723 
01724     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
01725                                DAG.getConstant(VTBits, dl, MVT::i32),
01726                                ISD::SETGE);
01727     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
01728     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
01729 
01730     SDValue Ops[2] = { Lo, Hi };
01731     return DAG.getMergeValues(Ops, dl);
01732   }
01733 }
01734 
01735 /// LowerShiftLeftParts - Lower SHL_PARTS, which
01736 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
01737 ///    amount, or
01738 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
01739 ///    amount.
01740 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
01741                                                  SelectionDAG &DAG) const {
01742   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
01743   assert(Op.getOpcode() == ISD::SHL_PARTS);
01744 
01745   EVT VT = Op.getValueType();
01746   unsigned VTBits = VT.getSizeInBits();
01747   SDLoc dl(Op);
01748   SDValue ShOpLo = Op.getOperand(0);
01749   SDValue ShOpHi = Op.getOperand(1);
01750   SDValue ShAmt  = Op.getOperand(2);
01751 
01752   if (VTBits == 32 && STI.getSmVersion() >= 35) {
01753 
01754     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
01755     // {dHi, dLo} = {aHi, aLo} << Amt
01756     //   dHi = shf.l.clamp aLo, aHi, Amt
01757     //   dLo = aLo << Amt
01758 
01759     SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
01760                              ShAmt);
01761     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
01762 
01763     SDValue Ops[2] = { Lo, Hi };
01764     return DAG.getMergeValues(Ops, dl);
01765   }
01766   else {
01767 
01768     // {dHi, dLo} = {aHi, aLo} << Amt
01769     // - if (Amt>=size) then
01770     //      dLo = aLo << Amt (all 0)
01771     //      dLo = aLo << (Amt-size)
01772     //   else
01773     //      dLo = aLo << Amt
01774     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
01775 
01776     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
01777                                    DAG.getConstant(VTBits, dl, MVT::i32),
01778                                    ShAmt);
01779     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
01780     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
01781                                      DAG.getConstant(VTBits, dl, MVT::i32));
01782     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
01783     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
01784     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
01785 
01786     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
01787                                DAG.getConstant(VTBits, dl, MVT::i32),
01788                                ISD::SETGE);
01789     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
01790     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
01791 
01792     SDValue Ops[2] = { Lo, Hi };
01793     return DAG.getMergeValues(Ops, dl);
01794   }
01795 }
01796 
01797 SDValue
01798 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
01799   switch (Op.getOpcode()) {
01800   case ISD::RETURNADDR:
01801     return SDValue();
01802   case ISD::FRAMEADDR:
01803     return SDValue();
01804   case ISD::GlobalAddress:
01805     return LowerGlobalAddress(Op, DAG);
01806   case ISD::INTRINSIC_W_CHAIN:
01807     return Op;
01808   case ISD::BUILD_VECTOR:
01809   case ISD::EXTRACT_SUBVECTOR:
01810     return Op;
01811   case ISD::CONCAT_VECTORS:
01812     return LowerCONCAT_VECTORS(Op, DAG);
01813   case ISD::STORE:
01814     return LowerSTORE(Op, DAG);
01815   case ISD::LOAD:
01816     return LowerLOAD(Op, DAG);
01817   case ISD::SHL_PARTS:
01818     return LowerShiftLeftParts(Op, DAG);
01819   case ISD::SRA_PARTS:
01820   case ISD::SRL_PARTS:
01821     return LowerShiftRightParts(Op, DAG);
01822   case ISD::SELECT:
01823     return LowerSelect(Op, DAG);
01824   default:
01825     llvm_unreachable("Custom lowering not defined for operation");
01826   }
01827 }
01828 
01829 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
01830   SDValue Op0 = Op->getOperand(0);
01831   SDValue Op1 = Op->getOperand(1);
01832   SDValue Op2 = Op->getOperand(2);
01833   SDLoc DL(Op.getNode());
01834 
01835   assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
01836 
01837   Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
01838   Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
01839   SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
01840   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
01841 
01842   return Trunc;
01843 }
01844 
01845 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
01846   if (Op.getValueType() == MVT::i1)
01847     return LowerLOADi1(Op, DAG);
01848   else
01849     return SDValue();
01850 }
01851 
01852 // v = ld i1* addr
01853 //   =>
01854 // v1 = ld i8* addr (-> i16)
01855 // v = trunc i16 to i1
01856 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
01857   SDNode *Node = Op.getNode();
01858   LoadSDNode *LD = cast<LoadSDNode>(Node);
01859   SDLoc dl(Node);
01860   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
01861   assert(Node->getValueType(0) == MVT::i1 &&
01862          "Custom lowering for i1 load only");
01863   SDValue newLD =
01864       DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
01865                   LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
01866                   LD->isInvariant(), LD->getAlignment());
01867   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
01868   // The legalizer (the caller) is expecting two values from the legalized
01869   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
01870   // in LegalizeDAG.cpp which also uses MergeValues.
01871   SDValue Ops[] = { result, LD->getChain() };
01872   return DAG.getMergeValues(Ops, dl);
01873 }
01874 
01875 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
01876   EVT ValVT = Op.getOperand(1).getValueType();
01877   if (ValVT == MVT::i1)
01878     return LowerSTOREi1(Op, DAG);
01879   else if (ValVT.isVector())
01880     return LowerSTOREVector(Op, DAG);
01881   else
01882     return SDValue();
01883 }
01884 
01885 SDValue
01886 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
01887   SDNode *N = Op.getNode();
01888   SDValue Val = N->getOperand(1);
01889   SDLoc DL(N);
01890   EVT ValVT = Val.getValueType();
01891 
01892   if (ValVT.isVector()) {
01893     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
01894     // legal.  We can (and should) split that into 2 stores of <2 x double> here
01895     // but I'm leaving that as a TODO for now.
01896     if (!ValVT.isSimple())
01897       return SDValue();
01898     switch (ValVT.getSimpleVT().SimpleTy) {
01899     default:
01900       return SDValue();
01901     case MVT::v2i8:
01902     case MVT::v2i16:
01903     case MVT::v2i32:
01904     case MVT::v2i64:
01905     case MVT::v2f32:
01906     case MVT::v2f64:
01907     case MVT::v4i8:
01908     case MVT::v4i16:
01909     case MVT::v4i32:
01910     case MVT::v4f32:
01911       // This is a "native" vector type
01912       break;
01913     }
01914 
01915     MemSDNode *MemSD = cast<MemSDNode>(N);
01916     const DataLayout *TD = getDataLayout();
01917 
01918     unsigned Align = MemSD->getAlignment();
01919     unsigned PrefAlign =
01920       TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
01921     if (Align < PrefAlign) {
01922       // This store is not sufficiently aligned, so bail out and let this vector
01923       // store be scalarized.  Note that we may still be able to emit smaller
01924       // vector stores.  For example, if we are storing a <4 x float> with an
01925       // alignment of 8, this check will fail but the legalizer will try again
01926       // with 2 x <2 x float>, which will succeed with an alignment of 8.
01927       return SDValue();
01928     }
01929 
01930     unsigned Opcode = 0;
01931     EVT EltVT = ValVT.getVectorElementType();
01932     unsigned NumElts = ValVT.getVectorNumElements();
01933 
01934     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
01935     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
01936     // stored type to i16 and propagate the "real" type as the memory type.
01937     bool NeedExt = false;
01938     if (EltVT.getSizeInBits() < 16)
01939       NeedExt = true;
01940 
01941     switch (NumElts) {
01942     default:
01943       return SDValue();
01944     case 2:
01945       Opcode = NVPTXISD::StoreV2;
01946       break;
01947     case 4: {
01948       Opcode = NVPTXISD::StoreV4;
01949       break;
01950     }
01951     }
01952 
01953     SmallVector<SDValue, 8> Ops;
01954 
01955     // First is the chain
01956     Ops.push_back(N->getOperand(0));
01957 
01958     // Then the split values
01959     for (unsigned i = 0; i < NumElts; ++i) {
01960       SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
01961                                    DAG.getIntPtrConstant(i, DL));
01962       if (NeedExt)
01963         ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
01964       Ops.push_back(ExtVal);
01965     }
01966 
01967     // Then any remaining arguments
01968     Ops.append(N->op_begin() + 2, N->op_end());
01969 
01970     SDValue NewSt = DAG.getMemIntrinsicNode(
01971         Opcode, DL, DAG.getVTList(MVT::Other), Ops,
01972         MemSD->getMemoryVT(), MemSD->getMemOperand());
01973 
01974     //return DCI.CombineTo(N, NewSt, true);
01975     return NewSt;
01976   }
01977 
01978   return SDValue();
01979 }
01980 
01981 // st i1 v, addr
01982 //    =>
01983 // v1 = zxt v to i16
01984 // st.u8 i16, addr
01985 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
01986   SDNode *Node = Op.getNode();
01987   SDLoc dl(Node);
01988   StoreSDNode *ST = cast<StoreSDNode>(Node);
01989   SDValue Tmp1 = ST->getChain();
01990   SDValue Tmp2 = ST->getBasePtr();
01991   SDValue Tmp3 = ST->getValue();
01992   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
01993   unsigned Alignment = ST->getAlignment();
01994   bool isVolatile = ST->isVolatile();
01995   bool isNonTemporal = ST->isNonTemporal();
01996   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
01997   SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
01998                                      ST->getPointerInfo(), MVT::i8, isNonTemporal,
01999                                      isVolatile, Alignment);
02000   return Result;
02001 }
02002 
02003 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
02004                                         int idx, EVT v) const {
02005   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
02006   std::stringstream suffix;
02007   suffix << idx;
02008   *name += suffix.str();
02009   return DAG.getTargetExternalSymbol(name->c_str(), v);
02010 }
02011 
02012 SDValue
02013 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
02014   std::string ParamSym;
02015   raw_string_ostream ParamStr(ParamSym);
02016 
02017   ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
02018   ParamStr.flush();
02019 
02020   std::string *SavedStr =
02021     nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
02022   return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
02023 }
02024 
02025 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
02026   return getExtSymb(DAG, ".HLPPARAM", idx);
02027 }
02028 
02029 // Check to see if the kernel argument is image*_t or sampler_t
02030 
02031 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
02032   static const char *const specialTypes[] = { "struct._image2d_t",
02033                                               "struct._image3d_t",
02034                                               "struct._sampler_t" };
02035 
02036   const Type *Ty = arg->getType();
02037   const PointerType *PTy = dyn_cast<PointerType>(Ty);
02038 
02039   if (!PTy)
02040     return false;
02041 
02042   if (!context)
02043     return false;
02044 
02045   const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
02046   const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
02047 
02048   for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
02049     if (TypeName == specialTypes[i])
02050       return true;
02051 
02052   return false;
02053 }
02054 
02055 SDValue NVPTXTargetLowering::LowerFormalArguments(
02056     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
02057     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
02058     SmallVectorImpl<SDValue> &InVals) const {
02059   MachineFunction &MF = DAG.getMachineFunction();
02060   const DataLayout *TD = getDataLayout();
02061 
02062   const Function *F = MF.getFunction();
02063   const AttributeSet &PAL = F->getAttributes();
02064   const TargetLowering *TLI = STI.getTargetLowering();
02065 
02066   SDValue Root = DAG.getRoot();
02067   std::vector<SDValue> OutChains;
02068 
02069   bool isKernel = llvm::isKernelFunction(*F);
02070   bool isABI = (STI.getSmVersion() >= 20);
02071   assert(isABI && "Non-ABI compilation is not supported");
02072   if (!isABI)
02073     return Chain;
02074 
02075   std::vector<Type *> argTypes;
02076   std::vector<const Argument *> theArgs;
02077   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
02078        I != E; ++I) {
02079     theArgs.push_back(I);
02080     argTypes.push_back(I->getType());
02081   }
02082   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
02083   // Ins.size() will be larger
02084   //   * if there is an aggregate argument with multiple fields (each field
02085   //     showing up separately in Ins)
02086   //   * if there is a vector argument with more than typical vector-length
02087   //     elements (generally if more than 4) where each vector element is
02088   //     individually present in Ins.
02089   // So a different index should be used for indexing into Ins.
02090   // See similar issue in LowerCall.
02091   unsigned InsIdx = 0;
02092 
02093   int idx = 0;
02094   for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
02095     Type *Ty = argTypes[i];
02096 
02097     // If the kernel argument is image*_t or sampler_t, convert it to
02098     // a i32 constant holding the parameter position. This can later
02099     // matched in the AsmPrinter to output the correct mangled name.
02100     if (isImageOrSamplerVal(
02101             theArgs[i],
02102             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
02103                                      : nullptr))) {
02104       assert(isKernel && "Only kernels can have image/sampler params");
02105       InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
02106       continue;
02107     }
02108 
02109     if (theArgs[i]->use_empty()) {
02110       // argument is dead
02111       if (Ty->isAggregateType()) {
02112         SmallVector<EVT, 16> vtparts;
02113 
02114         ComputePTXValueVTs(*this, Ty, vtparts);
02115         assert(vtparts.size() > 0 && "empty aggregate type not expected");
02116         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
02117              ++parti) {
02118           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
02119           ++InsIdx;
02120         }
02121         if (vtparts.size() > 0)
02122           --InsIdx;
02123         continue;
02124       }
02125       if (Ty->isVectorTy()) {
02126         EVT ObjectVT = getValueType(Ty);
02127         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
02128         for (unsigned parti = 0; parti < NumRegs; ++parti) {
02129           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
02130           ++InsIdx;
02131         }
02132         if (NumRegs > 0)
02133           --InsIdx;
02134         continue;
02135       }
02136       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
02137       continue;
02138     }
02139 
02140     // In the following cases, assign a node order of "idx+1"
02141     // to newly created nodes. The SDNodes for params have to
02142     // appear in the same order as their order of appearance
02143     // in the original function. "idx+1" holds that order.
02144     if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
02145       if (Ty->isAggregateType()) {
02146         SmallVector<EVT, 16> vtparts;
02147         SmallVector<uint64_t, 16> offsets;
02148 
02149         // NOTE: Here, we lose the ability to issue vector loads for vectors
02150         // that are a part of a struct.  This should be investigated in the
02151         // future.
02152         ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
02153         assert(vtparts.size() > 0 && "empty aggregate type not expected");
02154         bool aggregateIsPacked = false;
02155         if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
02156           aggregateIsPacked = STy->isPacked();
02157 
02158         SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
02159         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
02160              ++parti) {
02161           EVT partVT = vtparts[parti];
02162           Value *srcValue = Constant::getNullValue(
02163               PointerType::get(partVT.getTypeForEVT(F->getContext()),
02164                                llvm::ADDRESS_SPACE_PARAM));
02165           SDValue srcAddr =
02166               DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
02167                           DAG.getConstant(offsets[parti], dl, getPointerTy()));
02168           unsigned partAlign =
02169               aggregateIsPacked ? 1
02170                                 : TD->getABITypeAlignment(
02171                                       partVT.getTypeForEVT(F->getContext()));
02172           SDValue p;
02173           if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
02174             ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 
02175                                      ISD::SEXTLOAD : ISD::ZEXTLOAD;
02176             p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
02177                                MachinePointerInfo(srcValue), partVT, false,
02178                                false, false, partAlign);
02179           } else {
02180             p = DAG.getLoad(partVT, dl, Root, srcAddr,
02181                             MachinePointerInfo(srcValue), false, false, false,
02182                             partAlign);
02183           }
02184           if (p.getNode())
02185             p.getNode()->setIROrder(idx + 1);
02186           InVals.push_back(p);
02187           ++InsIdx;
02188         }
02189         if (vtparts.size() > 0)
02190           --InsIdx;
02191         continue;
02192       }
02193       if (Ty->isVectorTy()) {
02194         EVT ObjectVT = getValueType(Ty);
02195         SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
02196         unsigned NumElts = ObjectVT.getVectorNumElements();
02197         assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
02198                "Vector was not scalarized");
02199         EVT EltVT = ObjectVT.getVectorElementType();
02200 
02201         // V1 load
02202         // f32 = load ...
02203         if (NumElts == 1) {
02204           // We only have one element, so just directly load it
02205           Value *SrcValue = Constant::getNullValue(PointerType::get(
02206               EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
02207           SDValue P = DAG.getLoad(
02208               EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
02209               false, true,
02210               TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
02211           if (P.getNode())
02212             P.getNode()->setIROrder(idx + 1);
02213 
02214           if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
02215             P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
02216           InVals.push_back(P);
02217           ++InsIdx;
02218         } else if (NumElts == 2) {
02219           // V2 load
02220           // f32,f32 = load ...
02221           EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
02222           Value *SrcValue = Constant::getNullValue(PointerType::get(
02223               VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
02224           SDValue P = DAG.getLoad(
02225               VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
02226               false, true,
02227               TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
02228           if (P.getNode())
02229             P.getNode()->setIROrder(idx + 1);
02230 
02231           SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
02232                                      DAG.getIntPtrConstant(0, dl));
02233           SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
02234                                      DAG.getIntPtrConstant(1, dl));
02235 
02236           if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
02237             Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
02238             Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
02239           }
02240 
02241           InVals.push_back(Elt0);
02242           InVals.push_back(Elt1);
02243           InsIdx += 2;
02244         } else {
02245           // V4 loads
02246           // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
02247           // the
02248           // vector will be expanded to a power of 2 elements, so we know we can
02249           // always round up to the next multiple of 4 when creating the vector
02250           // loads.
02251           // e.g.  4 elem => 1 ld.v4
02252           //       6 elem => 2 ld.v4
02253           //       8 elem => 2 ld.v4
02254           //      11 elem => 3 ld.v4
02255           unsigned VecSize = 4;
02256           if (EltVT.getSizeInBits() == 64) {
02257             VecSize = 2;
02258           }
02259           EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
02260           unsigned Ofst = 0;
02261           for (unsigned i = 0; i < NumElts; i += VecSize) {
02262             Value *SrcValue = Constant::getNullValue(
02263                 PointerType::get(VecVT.getTypeForEVT(F->getContext()),
02264                                  llvm::ADDRESS_SPACE_PARAM));
02265             SDValue SrcAddr =
02266                 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
02267                             DAG.getConstant(Ofst, dl, getPointerTy()));
02268             SDValue P = DAG.getLoad(
02269                 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
02270                 false, true,
02271                 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
02272             if (P.getNode())
02273               P.getNode()->setIROrder(idx + 1);
02274 
02275             for (unsigned j = 0; j < VecSize; ++j) {
02276               if (i + j >= NumElts)
02277                 break;
02278               SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
02279                                         DAG.getIntPtrConstant(j, dl));
02280               if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
02281                 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
02282               InVals.push_back(Elt);
02283             }
02284             Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
02285           }
02286           InsIdx += NumElts;
02287         }
02288 
02289         if (NumElts > 0)
02290           --InsIdx;
02291         continue;
02292       }
02293       // A plain scalar.
02294       EVT ObjectVT = getValueType(Ty);
02295       // If ABI, load from the param symbol
02296       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
02297       Value *srcValue = Constant::getNullValue(PointerType::get(
02298           ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
02299       SDValue p;
02300        if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
02301         ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 
02302                                        ISD::SEXTLOAD : ISD::ZEXTLOAD;
02303         p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
02304                            MachinePointerInfo(srcValue), ObjectVT, false, false,
02305                            false,
02306         TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
02307       } else {
02308         p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
02309                         MachinePointerInfo(srcValue), false, false, false,
02310         TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
02311       }
02312       if (p.getNode())
02313         p.getNode()->setIROrder(idx + 1);
02314       InVals.push_back(p);
02315       continue;
02316     }
02317 
02318     // Param has ByVal attribute
02319     // Return MoveParam(param symbol).
02320     // Ideally, the param symbol can be returned directly,
02321     // but when SDNode builder decides to use it in a CopyToReg(),
02322     // machine instruction fails because TargetExternalSymbol
02323     // (not lowered) is target dependent, and CopyToReg assumes
02324     // the source is lowered.
02325     EVT ObjectVT = getValueType(Ty);
02326     assert(ObjectVT == Ins[InsIdx].VT &&
02327            "Ins type did not match function type");
02328     SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
02329     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
02330     if (p.getNode())
02331       p.getNode()->setIROrder(idx + 1);
02332     if (isKernel)
02333       InVals.push_back(p);
02334     else {
02335       SDValue p2 = DAG.getNode(
02336           ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
02337           DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
02338       InVals.push_back(p2);
02339     }
02340   }
02341 
02342   // Clang will check explicit VarArg and issue error if any. However, Clang
02343   // will let code with
02344   // implicit var arg like f() pass. See bug 617733.
02345   // We treat this case as if the arg list is empty.
02346   // if (F.isVarArg()) {
02347   // assert(0 && "VarArg not supported yet!");
02348   //}
02349 
02350   if (!OutChains.empty())
02351     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
02352 
02353   return Chain;
02354 }
02355 
02356 
02357 SDValue
02358 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
02359                                  bool isVarArg,
02360                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
02361                                  const SmallVectorImpl<SDValue> &OutVals,
02362                                  SDLoc dl, SelectionDAG &DAG) const {
02363   MachineFunction &MF = DAG.getMachineFunction();
02364   const Function *F = MF.getFunction();
02365   Type *RetTy = F->getReturnType();
02366   const DataLayout *TD = getDataLayout();
02367 
02368   bool isABI = (STI.getSmVersion() >= 20);
02369   assert(isABI && "Non-ABI compilation is not supported");
02370   if (!isABI)
02371     return Chain;
02372 
02373   if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
02374     // If we have a vector type, the OutVals array will be the scalarized
02375     // components and we have combine them into 1 or more vector stores.
02376     unsigned NumElts = VTy->getNumElements();
02377     assert(NumElts == Outs.size() && "Bad scalarization of return value");
02378 
02379     // const_cast can be removed in later LLVM versions
02380     EVT EltVT = getValueType(RetTy).getVectorElementType();
02381     bool NeedExtend = false;
02382     if (EltVT.getSizeInBits() < 16)
02383       NeedExtend = true;
02384 
02385     // V1 store
02386     if (NumElts == 1) {
02387       SDValue StoreVal = OutVals[0];
02388       // We only have one element, so just directly store it
02389       if (NeedExtend)
02390         StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
02391       SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
02392       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
02393                                       DAG.getVTList(MVT::Other), Ops,
02394                                       EltVT, MachinePointerInfo());
02395 
02396     } else if (NumElts == 2) {
02397       // V2 store
02398       SDValue StoreVal0 = OutVals[0];
02399       SDValue StoreVal1 = OutVals[1];
02400 
02401       if (NeedExtend) {
02402         StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
02403         StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
02404       }
02405 
02406       SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
02407                         StoreVal1 };
02408       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
02409                                       DAG.getVTList(MVT::Other), Ops,
02410                                       EltVT, MachinePointerInfo());
02411     } else {
02412       // V4 stores
02413       // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
02414       // vector will be expanded to a power of 2 elements, so we know we can
02415       // always round up to the next multiple of 4 when creating the vector
02416       // stores.
02417       // e.g.  4 elem => 1 st.v4
02418       //       6 elem => 2 st.v4
02419       //       8 elem => 2 st.v4
02420       //      11 elem => 3 st.v4
02421 
02422       unsigned VecSize = 4;
02423       if (OutVals[0].getValueType().getSizeInBits() == 64)
02424         VecSize = 2;
02425 
02426       unsigned Offset = 0;
02427 
02428       EVT VecVT =
02429           EVT::getVectorVT(F->getContext(), EltVT, VecSize);
02430       unsigned PerStoreOffset =
02431           TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
02432 
02433       for (unsigned i = 0; i < NumElts; i += VecSize) {
02434         // Get values
02435         SDValue StoreVal;
02436         SmallVector<SDValue, 8> Ops;
02437         Ops.push_back(Chain);
02438         Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
02439         unsigned Opc = NVPTXISD::StoreRetvalV2;
02440         EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
02441 
02442         StoreVal = OutVals[i];
02443         if (NeedExtend)
02444           StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
02445         Ops.push_back(StoreVal);
02446 
02447         if (i + 1 < NumElts) {
02448           StoreVal = OutVals[i + 1];
02449           if (NeedExtend)
02450             StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
02451         } else {
02452           StoreVal = DAG.getUNDEF(ExtendedVT);
02453         }
02454         Ops.push_back(StoreVal);
02455 
02456         if (VecSize == 4) {
02457           Opc = NVPTXISD::StoreRetvalV4;
02458           if (i + 2 < NumElts) {
02459             StoreVal = OutVals[i + 2];
02460             if (NeedExtend)
02461               StoreVal =
02462                   DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
02463           } else {
02464             StoreVal = DAG.getUNDEF(ExtendedVT);
02465           }
02466           Ops.push_back(StoreVal);
02467 
02468           if (i + 3 < NumElts) {
02469             StoreVal = OutVals[i + 3];
02470             if (NeedExtend)
02471               StoreVal =
02472                   DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
02473           } else {
02474             StoreVal = DAG.getUNDEF(ExtendedVT);
02475           }
02476           Ops.push_back(StoreVal);
02477         }
02478 
02479         // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
02480         Chain =
02481             DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
02482                                     EltVT, MachinePointerInfo());
02483         Offset += PerStoreOffset;
02484       }
02485     }
02486   } else {
02487     SmallVector<EVT, 16> ValVTs;
02488     SmallVector<uint64_t, 16> Offsets;
02489     ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
02490     assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
02491 
02492     for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
02493       SDValue theVal = OutVals[i];
02494       EVT TheValType = theVal.getValueType();
02495       unsigned numElems = 1;
02496       if (TheValType.isVector())
02497         numElems = TheValType.getVectorNumElements();
02498       for (unsigned j = 0, je = numElems; j != je; ++j) {
02499         SDValue TmpVal = theVal;
02500         if (TheValType.isVector())
02501           TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
02502                                TheValType.getVectorElementType(), TmpVal,
02503                                DAG.getIntPtrConstant(j, dl));
02504         EVT TheStoreType = ValVTs[i];
02505         if (RetTy->isIntegerTy() &&
02506             TD->getTypeAllocSizeInBits(RetTy) < 32) {
02507           // The following zero-extension is for integer types only, and
02508           // specifically not for aggregates.
02509           TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
02510           TheStoreType = MVT::i32;
02511         }
02512         else if (TmpVal.getValueType().getSizeInBits() < 16)
02513           TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
02514 
02515         SDValue Ops[] = {
02516           Chain,
02517           DAG.getConstant(Offsets[i], dl, MVT::i32),
02518           TmpVal };
02519         Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
02520                                         DAG.getVTList(MVT::Other), Ops,
02521                                         TheStoreType,
02522                                         MachinePointerInfo());
02523       }
02524     }
02525   }
02526 
02527   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
02528 }
02529 
02530 
02531 void NVPTXTargetLowering::LowerAsmOperandForConstraint(
02532     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
02533     SelectionDAG &DAG) const {
02534   if (Constraint.length() > 1)
02535     return;
02536   else
02537     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
02538 }
02539 
02540 // NVPTX suuport vector of legal types of any length in Intrinsics because the
02541 // NVPTX specific type legalizer
02542 // will legalize them to the PTX supported length.
02543 bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
02544   if (isTypeLegal(VT))
02545     return true;
02546   if (VT.isVector()) {
02547     MVT eVT = VT.getVectorElementType();
02548     if (isTypeLegal(eVT))
02549       return true;
02550   }
02551   return false;
02552 }
02553 
02554 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
02555   switch (Intrinsic) {
02556   default:
02557     return 0;
02558 
02559   case Intrinsic::nvvm_tex_1d_v4f32_s32:
02560     return NVPTXISD::Tex1DFloatS32;
02561   case Intrinsic::nvvm_tex_1d_v4f32_f32:
02562     return NVPTXISD::Tex1DFloatFloat;
02563   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
02564     return NVPTXISD::Tex1DFloatFloatLevel;
02565   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
02566     return NVPTXISD::Tex1DFloatFloatGrad;
02567   case Intrinsic::nvvm_tex_1d_v4s32_s32:
02568     return NVPTXISD::Tex1DS32S32;
02569   case Intrinsic::nvvm_tex_1d_v4s32_f32:
02570     return NVPTXISD::Tex1DS32Float;
02571   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
02572     return NVPTXISD::Tex1DS32FloatLevel;
02573   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
02574     return NVPTXISD::Tex1DS32FloatGrad;
02575   case Intrinsic::nvvm_tex_1d_v4u32_s32:
02576     return NVPTXISD::Tex1DU32S32;
02577   case Intrinsic::nvvm_tex_1d_v4u32_f32:
02578     return NVPTXISD::Tex1DU32Float;
02579   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
02580     return NVPTXISD::Tex1DU32FloatLevel;
02581   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
02582     return NVPTXISD::Tex1DU32FloatGrad;
02583 
02584   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
02585     return NVPTXISD::Tex1DArrayFloatS32;
02586   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
02587     return NVPTXISD::Tex1DArrayFloatFloat;
02588   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
02589     return NVPTXISD::Tex1DArrayFloatFloatLevel;
02590   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
02591     return NVPTXISD::Tex1DArrayFloatFloatGrad;
02592   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
02593     return NVPTXISD::Tex1DArrayS32S32;
02594   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
02595     return NVPTXISD::Tex1DArrayS32Float;
02596   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
02597     return NVPTXISD::Tex1DArrayS32FloatLevel;
02598   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
02599     return NVPTXISD::Tex1DArrayS32FloatGrad;
02600   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
02601     return NVPTXISD::Tex1DArrayU32S32;
02602   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
02603     return NVPTXISD::Tex1DArrayU32Float;
02604   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
02605     return NVPTXISD::Tex1DArrayU32FloatLevel;
02606   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
02607     return NVPTXISD::Tex1DArrayU32FloatGrad;
02608 
02609   case Intrinsic::nvvm_tex_2d_v4f32_s32:
02610     return NVPTXISD::Tex2DFloatS32;
02611   case Intrinsic::nvvm_tex_2d_v4f32_f32:
02612     return NVPTXISD::Tex2DFloatFloat;
02613   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
02614     return NVPTXISD::Tex2DFloatFloatLevel;
02615   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
02616     return NVPTXISD::Tex2DFloatFloatGrad;
02617   case Intrinsic::nvvm_tex_2d_v4s32_s32:
02618     return NVPTXISD::Tex2DS32S32;
02619   case Intrinsic::nvvm_tex_2d_v4s32_f32:
02620     return NVPTXISD::Tex2DS32Float;
02621   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
02622     return NVPTXISD::Tex2DS32FloatLevel;
02623   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
02624     return NVPTXISD::Tex2DS32FloatGrad;
02625   case Intrinsic::nvvm_tex_2d_v4u32_s32:
02626     return NVPTXISD::Tex2DU32S32;
02627   case Intrinsic::nvvm_tex_2d_v4u32_f32:
02628     return NVPTXISD::Tex2DU32Float;
02629   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
02630     return NVPTXISD::Tex2DU32FloatLevel;
02631   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
02632     return NVPTXISD::Tex2DU32FloatGrad;
02633 
02634   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
02635     return NVPTXISD::Tex2DArrayFloatS32;
02636   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
02637     return NVPTXISD::Tex2DArrayFloatFloat;
02638   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
02639     return NVPTXISD::Tex2DArrayFloatFloatLevel;
02640   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
02641     return NVPTXISD::Tex2DArrayFloatFloatGrad;
02642   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
02643     return NVPTXISD::Tex2DArrayS32S32;
02644   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
02645     return NVPTXISD::Tex2DArrayS32Float;
02646   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
02647     return NVPTXISD::Tex2DArrayS32FloatLevel;
02648   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
02649     return NVPTXISD::Tex2DArrayS32FloatGrad;
02650   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
02651     return NVPTXISD::Tex2DArrayU32S32;
02652   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
02653     return NVPTXISD::Tex2DArrayU32Float;
02654   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
02655     return NVPTXISD::Tex2DArrayU32FloatLevel;
02656   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
02657     return NVPTXISD::Tex2DArrayU32FloatGrad;
02658 
02659   case Intrinsic::nvvm_tex_3d_v4f32_s32:
02660     return NVPTXISD::Tex3DFloatS32;
02661   case Intrinsic::nvvm_tex_3d_v4f32_f32:
02662     return NVPTXISD::Tex3DFloatFloat;
02663   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
02664     return NVPTXISD::Tex3DFloatFloatLevel;
02665   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
02666     return NVPTXISD::Tex3DFloatFloatGrad;
02667   case Intrinsic::nvvm_tex_3d_v4s32_s32:
02668     return NVPTXISD::Tex3DS32S32;
02669   case Intrinsic::nvvm_tex_3d_v4s32_f32:
02670     return NVPTXISD::Tex3DS32Float;
02671   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
02672     return NVPTXISD::Tex3DS32FloatLevel;
02673   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
02674     return NVPTXISD::Tex3DS32FloatGrad;
02675   case Intrinsic::nvvm_tex_3d_v4u32_s32:
02676     return NVPTXISD::Tex3DU32S32;
02677   case Intrinsic::nvvm_tex_3d_v4u32_f32:
02678     return NVPTXISD::Tex3DU32Float;
02679   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
02680     return NVPTXISD::Tex3DU32FloatLevel;
02681   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
02682     return NVPTXISD::Tex3DU32FloatGrad;
02683 
02684   case Intrinsic::nvvm_tex_cube_v4f32_f32:
02685     return NVPTXISD::TexCubeFloatFloat;
02686   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
02687     return NVPTXISD::TexCubeFloatFloatLevel;
02688   case Intrinsic::nvvm_tex_cube_v4s32_f32:
02689     return NVPTXISD::TexCubeS32Float;
02690   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
02691     return NVPTXISD::TexCubeS32FloatLevel;
02692   case Intrinsic::nvvm_tex_cube_v4u32_f32:
02693     return NVPTXISD::TexCubeU32Float;
02694   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
02695     return NVPTXISD::TexCubeU32FloatLevel;
02696 
02697   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
02698     return NVPTXISD::TexCubeArrayFloatFloat;
02699   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
02700     return NVPTXISD::TexCubeArrayFloatFloatLevel;
02701   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
02702     return NVPTXISD::TexCubeArrayS32Float;
02703   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
02704     return NVPTXISD::TexCubeArrayS32FloatLevel;
02705   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
02706     return NVPTXISD::TexCubeArrayU32Float;
02707   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
02708     return NVPTXISD::TexCubeArrayU32FloatLevel;
02709 
02710   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
02711     return NVPTXISD::Tld4R2DFloatFloat;
02712   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
02713     return NVPTXISD::Tld4G2DFloatFloat;
02714   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
02715     return NVPTXISD::Tld4B2DFloatFloat;
02716   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
02717     return NVPTXISD::Tld4A2DFloatFloat;
02718   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
02719     return NVPTXISD::Tld4R2DS64Float;
02720   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
02721     return NVPTXISD::Tld4G2DS64Float;
02722   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
02723     return NVPTXISD::Tld4B2DS64Float;
02724   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
02725     return NVPTXISD::Tld4A2DS64Float;
02726   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
02727     return NVPTXISD::Tld4R2DU64Float;
02728   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
02729     return NVPTXISD::Tld4G2DU64Float;
02730   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
02731     return NVPTXISD::Tld4B2DU64Float;
02732   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
02733     return NVPTXISD::Tld4A2DU64Float;
02734 
02735   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
02736     return NVPTXISD::TexUnified1DFloatS32;
02737   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
02738     return NVPTXISD::TexUnified1DFloatFloat;
02739   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
02740     return NVPTXISD::TexUnified1DFloatFloatLevel;
02741   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
02742     return NVPTXISD::TexUnified1DFloatFloatGrad;
02743   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
02744     return NVPTXISD::TexUnified1DS32S32;
02745   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
02746     return NVPTXISD::TexUnified1DS32Float;
02747   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
02748     return NVPTXISD::TexUnified1DS32FloatLevel;
02749   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
02750     return NVPTXISD::TexUnified1DS32FloatGrad;
02751   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
02752     return NVPTXISD::TexUnified1DU32S32;
02753   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
02754     return NVPTXISD::TexUnified1DU32Float;
02755   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
02756     return NVPTXISD::TexUnified1DU32FloatLevel;
02757   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
02758     return NVPTXISD::TexUnified1DU32FloatGrad;
02759 
02760   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
02761     return NVPTXISD::TexUnified1DArrayFloatS32;
02762   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
02763     return NVPTXISD::TexUnified1DArrayFloatFloat;
02764   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
02765     return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
02766   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
02767     return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
02768   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
02769     return NVPTXISD::TexUnified1DArrayS32S32;
02770   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
02771     return NVPTXISD::TexUnified1DArrayS32Float;
02772   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
02773     return NVPTXISD::TexUnified1DArrayS32FloatLevel;
02774   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
02775     return NVPTXISD::TexUnified1DArrayS32FloatGrad;
02776   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
02777     return NVPTXISD::TexUnified1DArrayU32S32;
02778   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
02779     return NVPTXISD::TexUnified1DArrayU32Float;
02780   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
02781     return NVPTXISD::TexUnified1DArrayU32FloatLevel;
02782   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
02783     return NVPTXISD::TexUnified1DArrayU32FloatGrad;
02784 
02785   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
02786     return NVPTXISD::TexUnified2DFloatS32;
02787   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
02788     return NVPTXISD::TexUnified2DFloatFloat;
02789   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
02790     return NVPTXISD::TexUnified2DFloatFloatLevel;
02791   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
02792     return NVPTXISD::TexUnified2DFloatFloatGrad;
02793   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
02794     return NVPTXISD::TexUnified2DS32S32;
02795   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
02796     return NVPTXISD::TexUnified2DS32Float;
02797   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
02798     return NVPTXISD::TexUnified2DS32FloatLevel;
02799   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
02800     return NVPTXISD::TexUnified2DS32FloatGrad;
02801   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
02802     return NVPTXISD::TexUnified2DU32S32;
02803   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
02804     return NVPTXISD::TexUnified2DU32Float;
02805   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
02806     return NVPTXISD::TexUnified2DU32FloatLevel;
02807   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
02808     return NVPTXISD::TexUnified2DU32FloatGrad;
02809 
02810   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
02811     return NVPTXISD::TexUnified2DArrayFloatS32;
02812   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
02813     return NVPTXISD::TexUnified2DArrayFloatFloat;
02814   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
02815     return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
02816   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
02817     return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
02818   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
02819     return NVPTXISD::TexUnified2DArrayS32S32;
02820   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
02821     return NVPTXISD::TexUnified2DArrayS32Float;
02822   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
02823     return NVPTXISD::TexUnified2DArrayS32FloatLevel;
02824   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
02825     return NVPTXISD::TexUnified2DArrayS32FloatGrad;
02826   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
02827     return NVPTXISD::TexUnified2DArrayU32S32;
02828   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
02829     return NVPTXISD::TexUnified2DArrayU32Float;
02830   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
02831     return NVPTXISD::TexUnified2DArrayU32FloatLevel;
02832   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
02833     return NVPTXISD::TexUnified2DArrayU32FloatGrad;
02834 
02835   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
02836     return NVPTXISD::TexUnified3DFloatS32;
02837   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
02838     return NVPTXISD::TexUnified3DFloatFloat;
02839   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
02840     return NVPTXISD::TexUnified3DFloatFloatLevel;
02841   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
02842     return NVPTXISD::TexUnified3DFloatFloatGrad;
02843   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
02844     return NVPTXISD::TexUnified3DS32S32;
02845   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
02846     return NVPTXISD::TexUnified3DS32Float;
02847   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
02848     return NVPTXISD::TexUnified3DS32FloatLevel;
02849   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
02850     return NVPTXISD::TexUnified3DS32FloatGrad;
02851   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
02852     return NVPTXISD::TexUnified3DU32S32;
02853   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
02854     return NVPTXISD::TexUnified3DU32Float;
02855   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
02856     return NVPTXISD::TexUnified3DU32FloatLevel;
02857   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
02858     return NVPTXISD::TexUnified3DU32FloatGrad;
02859 
02860   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
02861     return NVPTXISD::TexUnifiedCubeFloatFloat;
02862   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
02863     return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
02864   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
02865     return NVPTXISD::TexUnifiedCubeS32Float;
02866   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
02867     return NVPTXISD::TexUnifiedCubeS32FloatLevel;
02868   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
02869     return NVPTXISD::TexUnifiedCubeU32Float;
02870   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
02871     return NVPTXISD::TexUnifiedCubeU32FloatLevel;
02872 
02873   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
02874     return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
02875   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
02876     return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
02877   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
02878     return NVPTXISD::TexUnifiedCubeArrayS32Float;
02879   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
02880     return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
02881   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
02882     return NVPTXISD::TexUnifiedCubeArrayU32Float;
02883   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
02884     return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
02885 
02886   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
02887     return NVPTXISD::Tld4UnifiedR2DFloatFloat;
02888   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
02889     return NVPTXISD::Tld4UnifiedG2DFloatFloat;
02890   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
02891     return NVPTXISD::Tld4UnifiedB2DFloatFloat;
02892   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
02893     return NVPTXISD::Tld4UnifiedA2DFloatFloat;
02894   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
02895     return NVPTXISD::Tld4UnifiedR2DS64Float;
02896   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
02897     return NVPTXISD::Tld4UnifiedG2DS64Float;
02898   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
02899     return NVPTXISD::Tld4UnifiedB2DS64Float;
02900   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
02901     return NVPTXISD::Tld4UnifiedA2DS64Float;
02902   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
02903     return NVPTXISD::Tld4UnifiedR2DU64Float;
02904   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
02905     return NVPTXISD::Tld4UnifiedG2DU64Float;
02906   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
02907     return NVPTXISD::Tld4UnifiedB2DU64Float;
02908   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
02909     return NVPTXISD::Tld4UnifiedA2DU64Float;
02910   }
02911 }
02912 
02913 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
02914   switch (Intrinsic) {
02915   default:
02916     return 0;
02917   case Intrinsic::nvvm_suld_1d_i8_clamp:
02918     return NVPTXISD::Suld1DI8Clamp;
02919   case Intrinsic::nvvm_suld_1d_i16_clamp:
02920     return NVPTXISD::Suld1DI16Clamp;
02921   case Intrinsic::nvvm_suld_1d_i32_clamp:
02922     return NVPTXISD::Suld1DI32Clamp;
02923   case Intrinsic::nvvm_suld_1d_i64_clamp:
02924     return NVPTXISD::Suld1DI64Clamp;
02925   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
02926     return NVPTXISD::Suld1DV2I8Clamp;
02927   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
02928     return NVPTXISD::Suld1DV2I16Clamp;
02929   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
02930     return NVPTXISD::Suld1DV2I32Clamp;
02931   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
02932     return NVPTXISD::Suld1DV2I64Clamp;
02933   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
02934     return NVPTXISD::Suld1DV4I8Clamp;
02935   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
02936     return NVPTXISD::Suld1DV4I16Clamp;
02937   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
02938     return NVPTXISD::Suld1DV4I32Clamp;
02939   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
02940     return NVPTXISD::Suld1DArrayI8Clamp;
02941   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
02942     return NVPTXISD::Suld1DArrayI16Clamp;
02943   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
02944     return NVPTXISD::Suld1DArrayI32Clamp;
02945   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
02946     return NVPTXISD::Suld1DArrayI64Clamp;
02947   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
02948     return NVPTXISD::Suld1DArrayV2I8Clamp;
02949   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
02950     return NVPTXISD::Suld1DArrayV2I16Clamp;
02951   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
02952     return NVPTXISD::Suld1DArrayV2I32Clamp;
02953   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
02954     return NVPTXISD::Suld1DArrayV2I64Clamp;
02955   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
02956     return NVPTXISD::Suld1DArrayV4I8Clamp;
02957   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
02958     return NVPTXISD::Suld1DArrayV4I16Clamp;
02959   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
02960     return NVPTXISD::Suld1DArrayV4I32Clamp;
02961   case Intrinsic::nvvm_suld_2d_i8_clamp:
02962     return NVPTXISD::Suld2DI8Clamp;
02963   case Intrinsic::nvvm_suld_2d_i16_clamp:
02964     return NVPTXISD::Suld2DI16Clamp;
02965   case Intrinsic::nvvm_suld_2d_i32_clamp:
02966     return NVPTXISD::Suld2DI32Clamp;
02967   case Intrinsic::nvvm_suld_2d_i64_clamp:
02968     return NVPTXISD::Suld2DI64Clamp;
02969   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
02970     return NVPTXISD::Suld2DV2I8Clamp;
02971   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
02972     return NVPTXISD::Suld2DV2I16Clamp;
02973   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
02974     return NVPTXISD::Suld2DV2I32Clamp;
02975   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
02976     return NVPTXISD::Suld2DV2I64Clamp;
02977   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
02978     return NVPTXISD::Suld2DV4I8Clamp;
02979   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
02980     return NVPTXISD::Suld2DV4I16Clamp;
02981   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
02982     return NVPTXISD::Suld2DV4I32Clamp;
02983   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
02984     return NVPTXISD::Suld2DArrayI8Clamp;
02985   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
02986     return NVPTXISD::Suld2DArrayI16Clamp;
02987   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
02988     return NVPTXISD::Suld2DArrayI32Clamp;
02989   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
02990     return NVPTXISD::Suld2DArrayI64Clamp;
02991   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
02992     return NVPTXISD::Suld2DArrayV2I8Clamp;
02993   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
02994     return NVPTXISD::Suld2DArrayV2I16Clamp;
02995   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
02996     return NVPTXISD::Suld2DArrayV2I32Clamp;
02997   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
02998     return NVPTXISD::Suld2DArrayV2I64Clamp;
02999   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
03000     return NVPTXISD::Suld2DArrayV4I8Clamp;
03001   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
03002     return NVPTXISD::Suld2DArrayV4I16Clamp;
03003   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
03004     return NVPTXISD::Suld2DArrayV4I32Clamp;
03005   case Intrinsic::nvvm_suld_3d_i8_clamp:
03006     return NVPTXISD::Suld3DI8Clamp;
03007   case Intrinsic::nvvm_suld_3d_i16_clamp:
03008     return NVPTXISD::Suld3DI16Clamp;
03009   case Intrinsic::nvvm_suld_3d_i32_clamp:
03010     return NVPTXISD::Suld3DI32Clamp;
03011   case Intrinsic::nvvm_suld_3d_i64_clamp:
03012     return NVPTXISD::Suld3DI64Clamp;
03013   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
03014     return NVPTXISD::Suld3DV2I8Clamp;
03015   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
03016     return NVPTXISD::Suld3DV2I16Clamp;
03017   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
03018     return NVPTXISD::Suld3DV2I32Clamp;
03019   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
03020     return NVPTXISD::Suld3DV2I64Clamp;
03021   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
03022     return NVPTXISD::Suld3DV4I8Clamp;
03023   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
03024     return NVPTXISD::Suld3DV4I16Clamp;
03025   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
03026     return NVPTXISD::Suld3DV4I32Clamp;
03027   case Intrinsic::nvvm_suld_1d_i8_trap:
03028     return NVPTXISD::Suld1DI8Trap;
03029   case Intrinsic::nvvm_suld_1d_i16_trap:
03030     return NVPTXISD::Suld1DI16Trap;
03031   case Intrinsic::nvvm_suld_1d_i32_trap:
03032     return NVPTXISD::Suld1DI32Trap;
03033   case Intrinsic::nvvm_suld_1d_i64_trap:
03034     return NVPTXISD::Suld1DI64Trap;
03035   case Intrinsic::nvvm_suld_1d_v2i8_trap:
03036     return NVPTXISD::Suld1DV2I8Trap;
03037   case Intrinsic::nvvm_suld_1d_v2i16_trap:
03038     return NVPTXISD::Suld1DV2I16Trap;
03039   case Intrinsic::nvvm_suld_1d_v2i32_trap:
03040     return NVPTXISD::Suld1DV2I32Trap;
03041   case Intrinsic::nvvm_suld_1d_v2i64_trap:
03042     return NVPTXISD::Suld1DV2I64Trap;
03043   case Intrinsic::nvvm_suld_1d_v4i8_trap:
03044     return NVPTXISD::Suld1DV4I8Trap;
03045   case Intrinsic::nvvm_suld_1d_v4i16_trap:
03046     return NVPTXISD::Suld1DV4I16Trap;
03047   case Intrinsic::nvvm_suld_1d_v4i32_trap:
03048     return NVPTXISD::Suld1DV4I32Trap;
03049   case Intrinsic::nvvm_suld_1d_array_i8_trap:
03050     return NVPTXISD::Suld1DArrayI8Trap;
03051   case Intrinsic::nvvm_suld_1d_array_i16_trap:
03052     return NVPTXISD::Suld1DArrayI16Trap;
03053   case Intrinsic::nvvm_suld_1d_array_i32_trap:
03054     return NVPTXISD::Suld1DArrayI32Trap;
03055   case Intrinsic::nvvm_suld_1d_array_i64_trap:
03056     return NVPTXISD::Suld1DArrayI64Trap;
03057   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
03058     return NVPTXISD::Suld1DArrayV2I8Trap;
03059   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
03060     return NVPTXISD::Suld1DArrayV2I16Trap;
03061   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
03062     return NVPTXISD::Suld1DArrayV2I32Trap;
03063   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
03064     return NVPTXISD::Suld1DArrayV2I64Trap;
03065   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
03066     return NVPTXISD::Suld1DArrayV4I8Trap;
03067   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
03068     return NVPTXISD::Suld1DArrayV4I16Trap;
03069   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
03070     return NVPTXISD::Suld1DArrayV4I32Trap;
03071   case Intrinsic::nvvm_suld_2d_i8_trap:
03072     return NVPTXISD::Suld2DI8Trap;
03073   case Intrinsic::nvvm_suld_2d_i16_trap:
03074     return NVPTXISD::Suld2DI16Trap;
03075   case Intrinsic::nvvm_suld_2d_i32_trap:
03076     return NVPTXISD::Suld2DI32Trap;
03077   case Intrinsic::nvvm_suld_2d_i64_trap:
03078     return NVPTXISD::Suld2DI64Trap;
03079   case Intrinsic::nvvm_suld_2d_v2i8_trap:
03080     return NVPTXISD::Suld2DV2I8Trap;
03081   case Intrinsic::nvvm_suld_2d_v2i16_trap:
03082     return NVPTXISD::Suld2DV2I16Trap;
03083   case Intrinsic::nvvm_suld_2d_v2i32_trap:
03084     return NVPTXISD::Suld2DV2I32Trap;
03085   case Intrinsic::nvvm_suld_2d_v2i64_trap:
03086     return NVPTXISD::Suld2DV2I64Trap;
03087   case Intrinsic::nvvm_suld_2d_v4i8_trap:
03088     return NVPTXISD::Suld2DV4I8Trap;
03089   case Intrinsic::nvvm_suld_2d_v4i16_trap:
03090     return NVPTXISD::Suld2DV4I16Trap;
03091   case Intrinsic::nvvm_suld_2d_v4i32_trap:
03092     return NVPTXISD::Suld2DV4I32Trap;
03093   case Intrinsic::nvvm_suld_2d_array_i8_trap:
03094     return NVPTXISD::Suld2DArrayI8Trap;
03095   case Intrinsic::nvvm_suld_2d_array_i16_trap:
03096     return NVPTXISD::Suld2DArrayI16Trap;
03097   case Intrinsic::nvvm_suld_2d_array_i32_trap:
03098     return NVPTXISD::Suld2DArrayI32Trap;
03099   case Intrinsic::nvvm_suld_2d_array_i64_trap:
03100     return NVPTXISD::Suld2DArrayI64Trap;
03101   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
03102     return NVPTXISD::Suld2DArrayV2I8Trap;
03103   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
03104     return NVPTXISD::Suld2DArrayV2I16Trap;
03105   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
03106     return NVPTXISD::Suld2DArrayV2I32Trap;
03107   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
03108     return NVPTXISD::Suld2DArrayV2I64Trap;
03109   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
03110     return NVPTXISD::Suld2DArrayV4I8Trap;
03111   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
03112     return NVPTXISD::Suld2DArrayV4I16Trap;
03113   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
03114     return NVPTXISD::Suld2DArrayV4I32Trap;
03115   case Intrinsic::nvvm_suld_3d_i8_trap:
03116     return NVPTXISD::Suld3DI8Trap;
03117   case Intrinsic::nvvm_suld_3d_i16_trap:
03118     return NVPTXISD::Suld3DI16Trap;
03119   case Intrinsic::nvvm_suld_3d_i32_trap:
03120     return NVPTXISD::Suld3DI32Trap;
03121   case Intrinsic::nvvm_suld_3d_i64_trap:
03122     return NVPTXISD::Suld3DI64Trap;
03123   case Intrinsic::nvvm_suld_3d_v2i8_trap:
03124     return NVPTXISD::Suld3DV2I8Trap;
03125   case Intrinsic::nvvm_suld_3d_v2i16_trap:
03126     return NVPTXISD::Suld3DV2I16Trap;
03127   case Intrinsic::nvvm_suld_3d_v2i32_trap:
03128     return NVPTXISD::Suld3DV2I32Trap;
03129   case Intrinsic::nvvm_suld_3d_v2i64_trap:
03130     return NVPTXISD::Suld3DV2I64Trap;
03131   case Intrinsic::nvvm_suld_3d_v4i8_trap:
03132     return NVPTXISD::Suld3DV4I8Trap;
03133   case Intrinsic::nvvm_suld_3d_v4i16_trap:
03134     return NVPTXISD::Suld3DV4I16Trap;
03135   case Intrinsic::nvvm_suld_3d_v4i32_trap:
03136     return NVPTXISD::Suld3DV4I32Trap;
03137   case Intrinsic::nvvm_suld_1d_i8_zero:
03138     return NVPTXISD::Suld1DI8Zero;
03139   case Intrinsic::nvvm_suld_1d_i16_zero:
03140     return NVPTXISD::Suld1DI16Zero;
03141   case Intrinsic::nvvm_suld_1d_i32_zero:
03142     return NVPTXISD::Suld1DI32Zero;
03143   case Intrinsic::nvvm_suld_1d_i64_zero:
03144     return NVPTXISD::Suld1DI64Zero;
03145   case Intrinsic::nvvm_suld_1d_v2i8_zero:
03146     return NVPTXISD::Suld1DV2I8Zero;
03147   case Intrinsic::nvvm_suld_1d_v2i16_zero:
03148     return NVPTXISD::Suld1DV2I16Zero;
03149   case Intrinsic::nvvm_suld_1d_v2i32_zero:
03150     return NVPTXISD::Suld1DV2I32Zero;
03151   case Intrinsic::nvvm_suld_1d_v2i64_zero:
03152     return NVPTXISD::Suld1DV2I64Zero;
03153   case Intrinsic::nvvm_suld_1d_v4i8_zero:
03154     return NVPTXISD::Suld1DV4I8Zero;
03155   case Intrinsic::nvvm_suld_1d_v4i16_zero:
03156     return NVPTXISD::Suld1DV4I16Zero;
03157   case Intrinsic::nvvm_suld_1d_v4i32_zero:
03158     return NVPTXISD::Suld1DV4I32Zero;
03159   case Intrinsic::nvvm_suld_1d_array_i8_zero:
03160     return NVPTXISD::Suld1DArrayI8Zero;
03161   case Intrinsic::nvvm_suld_1d_array_i16_zero:
03162     return NVPTXISD::Suld1DArrayI16Zero;
03163   case Intrinsic::nvvm_suld_1d_array_i32_zero:
03164     return NVPTXISD::Suld1DArrayI32Zero;
03165   case Intrinsic::nvvm_suld_1d_array_i64_zero:
03166     return NVPTXISD::Suld1DArrayI64Zero;
03167   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
03168     return NVPTXISD::Suld1DArrayV2I8Zero;
03169   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
03170     return NVPTXISD::Suld1DArrayV2I16Zero;
03171   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
03172     return NVPTXISD::Suld1DArrayV2I32Zero;
03173   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
03174     return NVPTXISD::Suld1DArrayV2I64Zero;
03175   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
03176     return NVPTXISD::Suld1DArrayV4I8Zero;
03177   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
03178     return NVPTXISD::Suld1DArrayV4I16Zero;
03179   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
03180     return NVPTXISD::Suld1DArrayV4I32Zero;
03181   case Intrinsic::nvvm_suld_2d_i8_zero:
03182     return NVPTXISD::Suld2DI8Zero;
03183   case Intrinsic::nvvm_suld_2d_i16_zero:
03184     return NVPTXISD::Suld2DI16Zero;
03185   case Intrinsic::nvvm_suld_2d_i32_zero:
03186     return NVPTXISD::Suld2DI32Zero;
03187   case Intrinsic::nvvm_suld_2d_i64_zero:
03188     return NVPTXISD::Suld2DI64Zero;
03189   case Intrinsic::nvvm_suld_2d_v2i8_zero:
03190     return NVPTXISD::Suld2DV2I8Zero;
03191   case Intrinsic::nvvm_suld_2d_v2i16_zero:
03192     return NVPTXISD::Suld2DV2I16Zero;
03193   case Intrinsic::nvvm_suld_2d_v2i32_zero:
03194     return NVPTXISD::Suld2DV2I32Zero;
03195   case Intrinsic::nvvm_suld_2d_v2i64_zero:
03196     return NVPTXISD::Suld2DV2I64Zero;
03197   case Intrinsic::nvvm_suld_2d_v4i8_zero:
03198     return NVPTXISD::Suld2DV4I8Zero;
03199   case Intrinsic::nvvm_suld_2d_v4i16_zero:
03200     return NVPTXISD::Suld2DV4I16Zero;
03201   case Intrinsic::nvvm_suld_2d_v4i32_zero:
03202     return NVPTXISD::Suld2DV4I32Zero;
03203   case Intrinsic::nvvm_suld_2d_array_i8_zero:
03204     return NVPTXISD::Suld2DArrayI8Zero;
03205   case Intrinsic::nvvm_suld_2d_array_i16_zero:
03206     return NVPTXISD::Suld2DArrayI16Zero;
03207   case Intrinsic::nvvm_suld_2d_array_i32_zero:
03208     return NVPTXISD::Suld2DArrayI32Zero;
03209   case Intrinsic::nvvm_suld_2d_array_i64_zero:
03210     return NVPTXISD::Suld2DArrayI64Zero;
03211   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
03212     return NVPTXISD::Suld2DArrayV2I8Zero;
03213   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
03214     return NVPTXISD::Suld2DArrayV2I16Zero;
03215   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
03216     return NVPTXISD::Suld2DArrayV2I32Zero;
03217   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
03218     return NVPTXISD::Suld2DArrayV2I64Zero;
03219   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
03220     return NVPTXISD::Suld2DArrayV4I8Zero;
03221   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
03222     return NVPTXISD::Suld2DArrayV4I16Zero;
03223   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
03224     return NVPTXISD::Suld2DArrayV4I32Zero;
03225   case Intrinsic::nvvm_suld_3d_i8_zero:
03226     return NVPTXISD::Suld3DI8Zero;
03227   case Intrinsic::nvvm_suld_3d_i16_zero:
03228     return NVPTXISD::Suld3DI16Zero;
03229   case Intrinsic::nvvm_suld_3d_i32_zero:
03230     return NVPTXISD::Suld3DI32Zero;
03231   case Intrinsic::nvvm_suld_3d_i64_zero:
03232     return NVPTXISD::Suld3DI64Zero;
03233   case Intrinsic::nvvm_suld_3d_v2i8_zero:
03234     return NVPTXISD::Suld3DV2I8Zero;
03235   case Intrinsic::nvvm_suld_3d_v2i16_zero:
03236     return NVPTXISD::Suld3DV2I16Zero;
03237   case Intrinsic::nvvm_suld_3d_v2i32_zero:
03238     return NVPTXISD::Suld3DV2I32Zero;
03239   case Intrinsic::nvvm_suld_3d_v2i64_zero:
03240     return NVPTXISD::Suld3DV2I64Zero;
03241   case Intrinsic::nvvm_suld_3d_v4i8_zero:
03242     return NVPTXISD::Suld3DV4I8Zero;
03243   case Intrinsic::nvvm_suld_3d_v4i16_zero:
03244     return NVPTXISD::Suld3DV4I16Zero;
03245   case Intrinsic::nvvm_suld_3d_v4i32_zero:
03246     return NVPTXISD::Suld3DV4I32Zero;
03247   }
03248 }
03249 
03250 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
03251 // TgtMemIntrinsic
03252 // because we need the information that is only available in the "Value" type
03253 // of destination
03254 // pointer. In particular, the address space information.
03255 bool NVPTXTargetLowering::getTgtMemIntrinsic(
03256     IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
03257   switch (Intrinsic) {
03258   default:
03259     return false;
03260 
03261   case Intrinsic::nvvm_atomic_load_add_f32:
03262     Info.opc = ISD::INTRINSIC_W_CHAIN;
03263     Info.memVT = MVT::f32;
03264     Info.ptrVal = I.getArgOperand(0);
03265     Info.offset = 0;
03266     Info.vol = 0;
03267     Info.readMem = true;
03268     Info.writeMem = true;
03269     Info.align = 0;
03270     return true;
03271 
03272   case Intrinsic::nvvm_atomic_load_inc_32:
03273   case Intrinsic::nvvm_atomic_load_dec_32:
03274     Info.opc = ISD::INTRINSIC_W_CHAIN;
03275     Info.memVT = MVT::i32;
03276     Info.ptrVal = I.getArgOperand(0);
03277     Info.offset = 0;
03278     Info.vol = 0;
03279     Info.readMem = true;
03280     Info.writeMem = true;
03281     Info.align = 0;
03282     return true;
03283 
03284   case Intrinsic::nvvm_ldu_global_i:
03285   case Intrinsic::nvvm_ldu_global_f:
03286   case Intrinsic::nvvm_ldu_global_p: {
03287 
03288     Info.opc = ISD::INTRINSIC_W_CHAIN;
03289     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
03290       Info.memVT = getValueType(I.getType());
03291     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
03292       Info.memVT = getPointerTy();
03293     else
03294       Info.memVT = getValueType(I.getType());
03295     Info.ptrVal = I.getArgOperand(0);
03296     Info.offset = 0;
03297     Info.vol = 0;
03298     Info.readMem = true;
03299     Info.writeMem = false;
03300     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
03301 
03302     return true;
03303   }
03304   case Intrinsic::nvvm_ldg_global_i:
03305   case Intrinsic::nvvm_ldg_global_f:
03306   case Intrinsic::nvvm_ldg_global_p: {
03307 
03308     Info.opc = ISD::INTRINSIC_W_CHAIN;
03309     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
03310       Info.memVT = getValueType(I.getType());
03311     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
03312       Info.memVT = getPointerTy();
03313     else
03314       Info.memVT = getValueType(I.getType());
03315     Info.ptrVal = I.getArgOperand(0);
03316     Info.offset = 0;
03317     Info.vol = 0;
03318     Info.readMem = true;
03319     Info.writeMem = false;
03320     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
03321 
03322     return true;
03323   }
03324 
03325   case Intrinsic::nvvm_tex_1d_v4f32_s32:
03326   case Intrinsic::nvvm_tex_1d_v4f32_f32:
03327   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
03328   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
03329   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
03330   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
03331   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
03332   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
03333   case Intrinsic::nvvm_tex_2d_v4f32_s32:
03334   case Intrinsic::nvvm_tex_2d_v4f32_f32:
03335   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
03336   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
03337   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
03338   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
03339   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
03340   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
03341   case Intrinsic::nvvm_tex_3d_v4f32_s32:
03342   case Intrinsic::nvvm_tex_3d_v4f32_f32:
03343   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
03344   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
03345   case Intrinsic::nvvm_tex_cube_v4f32_f32:
03346   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
03347   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
03348   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
03349   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
03350   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
03351   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
03352   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
03353   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
03354   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
03355   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
03356   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
03357   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
03358   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
03359   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
03360   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
03361   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
03362   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
03363   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
03364   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
03365   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
03366   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
03367   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
03368   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
03369   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
03370   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
03371   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
03372   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
03373   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
03374   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
03375   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
03376   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
03377   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
03378   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
03379   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
03380   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
03381     Info.opc = getOpcForTextureInstr(Intrinsic);
03382     Info.memVT = MVT::v4f32;
03383     Info.ptrVal = nullptr;
03384     Info.offset = 0;
03385     Info.vol = 0;
03386     Info.readMem = true;
03387     Info.writeMem = false;
03388     Info.align = 16;
03389     return true;
03390   }
03391   case Intrinsic::nvvm_tex_1d_v4s32_s32:
03392   case Intrinsic::nvvm_tex_1d_v4s32_f32:
03393   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
03394   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
03395   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
03396   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
03397   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
03398   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
03399   case Intrinsic::nvvm_tex_2d_v4s32_s32:
03400   case Intrinsic::nvvm_tex_2d_v4s32_f32:
03401   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
03402   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
03403   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
03404   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
03405   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
03406   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
03407   case Intrinsic::nvvm_tex_3d_v4s32_s32:
03408   case Intrinsic::nvvm_tex_3d_v4s32_f32:
03409   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
03410   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
03411   case Intrinsic::nvvm_tex_cube_v4s32_f32:
03412   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
03413   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
03414   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
03415   case Intrinsic::nvvm_tex_cube_v4u32_f32:
03416   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
03417   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
03418   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
03419   case Intrinsic::nvvm_tex_1d_v4u32_s32:
03420   case Intrinsic::nvvm_tex_1d_v4u32_f32:
03421   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
03422   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
03423   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
03424   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
03425   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
03426   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
03427   case Intrinsic::nvvm_tex_2d_v4u32_s32:
03428   case Intrinsic::nvvm_tex_2d_v4u32_f32:
03429   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
03430   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
03431   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
03432   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
03433   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
03434   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
03435   case Intrinsic::nvvm_tex_3d_v4u32_s32:
03436   case Intrinsic::nvvm_tex_3d_v4u32_f32:
03437   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
03438   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
03439   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
03440   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
03441   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
03442   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
03443   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
03444   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
03445   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
03446   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
03447   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
03448   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
03449   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
03450   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
03451   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
03452   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
03453   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
03454   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
03455   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
03456   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
03457   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
03458   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
03459   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
03460   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
03461   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
03462   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
03463   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
03464   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
03465   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
03466   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
03467   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
03468   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
03469   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
03470   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
03471   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
03472   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
03473   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
03474   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
03475   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
03476   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
03477   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
03478   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
03479   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
03480   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
03481   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
03482   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
03483   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
03484   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
03485   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
03486   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
03487   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
03488   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
03489   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
03490   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
03491   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
03492   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
03493   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
03494   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
03495   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
03496   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
03497   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
03498   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
03499   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
03500   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
03501   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
03502   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
03503     Info.opc = getOpcForTextureInstr(Intrinsic);
03504     Info.memVT = MVT::v4i32;
03505     Info.ptrVal = nullptr;
03506     Info.offset = 0;
03507     Info.vol = 0;
03508     Info.readMem = true;
03509     Info.writeMem = false;
03510     Info.align = 16;
03511     return true;
03512   }
03513   case Intrinsic::nvvm_suld_1d_i8_clamp:
03514   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
03515   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
03516   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
03517   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
03518   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
03519   case Intrinsic::nvvm_suld_2d_i8_clamp:
03520   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
03521   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
03522   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
03523   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
03524   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
03525   case Intrinsic::nvvm_suld_3d_i8_clamp:
03526   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
03527   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
03528   case Intrinsic::nvvm_suld_1d_i8_trap:
03529   case Intrinsic::nvvm_suld_1d_v2i8_trap:
03530   case Intrinsic::nvvm_suld_1d_v4i8_trap:
03531   case Intrinsic::nvvm_suld_1d_array_i8_trap:
03532   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
03533   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
03534   case Intrinsic::nvvm_suld_2d_i8_trap:
03535   case Intrinsic::nvvm_suld_2d_v2i8_trap:
03536   case Intrinsic::nvvm_suld_2d_v4i8_trap:
03537   case Intrinsic::nvvm_suld_2d_array_i8_trap:
03538   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
03539   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
03540   case Intrinsic::nvvm_suld_3d_i8_trap:
03541   case Intrinsic::nvvm_suld_3d_v2i8_trap:
03542   case Intrinsic::nvvm_suld_3d_v4i8_trap:
03543   case Intrinsic::nvvm_suld_1d_i8_zero:
03544   case Intrinsic::nvvm_suld_1d_v2i8_zero:
03545   case Intrinsic::nvvm_suld_1d_v4i8_zero:
03546   case Intrinsic::nvvm_suld_1d_array_i8_zero:
03547   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
03548   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
03549   case Intrinsic::nvvm_suld_2d_i8_zero:
03550   case Intrinsic::nvvm_suld_2d_v2i8_zero:
03551   case Intrinsic::nvvm_suld_2d_v4i8_zero:
03552   case Intrinsic::nvvm_suld_2d_array_i8_zero:
03553   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
03554   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
03555   case Intrinsic::nvvm_suld_3d_i8_zero:
03556   case Intrinsic::nvvm_suld_3d_v2i8_zero:
03557   case Intrinsic::nvvm_suld_3d_v4i8_zero: {
03558     Info.opc = getOpcForSurfaceInstr(Intrinsic);
03559     Info.memVT = MVT::i8;
03560     Info.ptrVal = nullptr;
03561     Info.offset = 0;
03562     Info.vol = 0;
03563     Info.readMem = true;
03564     Info.writeMem = false;
03565     Info.align = 16;
03566     return true;
03567   }
03568   case Intrinsic::nvvm_suld_1d_i16_clamp:
03569   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
03570   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
03571   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
03572   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
03573   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
03574   case Intrinsic::nvvm_suld_2d_i16_clamp:
03575   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
03576   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
03577   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
03578   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
03579   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
03580   case Intrinsic::nvvm_suld_3d_i16_clamp:
03581   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
03582   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
03583   case Intrinsic::nvvm_suld_1d_i16_trap:
03584   case Intrinsic::nvvm_suld_1d_v2i16_trap:
03585   case Intrinsic::nvvm_suld_1d_v4i16_trap:
03586   case Intrinsic::nvvm_suld_1d_array_i16_trap:
03587   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
03588   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
03589   case Intrinsic::nvvm_suld_2d_i16_trap:
03590   case Intrinsic::nvvm_suld_2d_v2i16_trap:
03591   case Intrinsic::nvvm_suld_2d_v4i16_trap:
03592   case Intrinsic::nvvm_suld_2d_array_i16_trap:
03593   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
03594   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
03595   case Intrinsic::nvvm_suld_3d_i16_trap:
03596   case Intrinsic::nvvm_suld_3d_v2i16_trap:
03597   case Intrinsic::nvvm_suld_3d_v4i16_trap:
03598   case Intrinsic::nvvm_suld_1d_i16_zero:
03599   case Intrinsic::nvvm_suld_1d_v2i16_zero:
03600   case Intrinsic::nvvm_suld_1d_v4i16_zero:
03601   case Intrinsic::nvvm_suld_1d_array_i16_zero:
03602   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
03603   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
03604   case Intrinsic::nvvm_suld_2d_i16_zero:
03605   case Intrinsic::nvvm_suld_2d_v2i16_zero:
03606   case Intrinsic::nvvm_suld_2d_v4i16_zero:
03607   case Intrinsic::nvvm_suld_2d_array_i16_zero:
03608   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
03609   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
03610   case Intrinsic::nvvm_suld_3d_i16_zero:
03611   case Intrinsic::nvvm_suld_3d_v2i16_zero:
03612   case Intrinsic::nvvm_suld_3d_v4i16_zero: {
03613     Info.opc = getOpcForSurfaceInstr(Intrinsic);
03614     Info.memVT = MVT::i16;
03615     Info.ptrVal = nullptr;
03616     Info.offset = 0;
03617     Info.vol = 0;
03618     Info.readMem = true;
03619     Info.writeMem = false;
03620     Info.align = 16;
03621     return true;
03622   }
03623   case Intrinsic::nvvm_suld_1d_i32_clamp:
03624   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
03625   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
03626   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
03627   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
03628   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
03629   case Intrinsic::nvvm_suld_2d_i32_clamp:
03630   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
03631   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
03632   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
03633   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
03634   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
03635   case Intrinsic::nvvm_suld_3d_i32_clamp:
03636   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
03637   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
03638   case Intrinsic::nvvm_suld_1d_i32_trap:
03639   case Intrinsic::nvvm_suld_1d_v2i32_trap:
03640   case Intrinsic::nvvm_suld_1d_v4i32_trap:
03641   case Intrinsic::nvvm_suld_1d_array_i32_trap:
03642   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
03643   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
03644   case Intrinsic::nvvm_suld_2d_i32_trap:
03645   case Intrinsic::nvvm_suld_2d_v2i32_trap:
03646   case Intrinsic::nvvm_suld_2d_v4i32_trap:
03647   case Intrinsic::nvvm_suld_2d_array_i32_trap:
03648   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
03649   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
03650   case Intrinsic::nvvm_suld_3d_i32_trap:
03651   case Intrinsic::nvvm_suld_3d_v2i32_trap:
03652   case Intrinsic::nvvm_suld_3d_v4i32_trap:
03653   case Intrinsic::nvvm_suld_1d_i32_zero:
03654   case Intrinsic::nvvm_suld_1d_v2i32_zero:
03655   case Intrinsic::nvvm_suld_1d_v4i32_zero:
03656   case Intrinsic::nvvm_suld_1d_array_i32_zero:
03657   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
03658   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
03659   case Intrinsic::nvvm_suld_2d_i32_zero:
03660   case Intrinsic::nvvm_suld_2d_v2i32_zero:
03661   case Intrinsic::nvvm_suld_2d_v4i32_zero:
03662   case Intrinsic::nvvm_suld_2d_array_i32_zero:
03663   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
03664   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
03665   case Intrinsic::nvvm_suld_3d_i32_zero:
03666   case Intrinsic::nvvm_suld_3d_v2i32_zero:
03667   case Intrinsic::nvvm_suld_3d_v4i32_zero: {
03668     Info.opc = getOpcForSurfaceInstr(Intrinsic);
03669     Info.memVT = MVT::i32;
03670     Info.ptrVal = nullptr;
03671     Info.offset = 0;
03672     Info.vol = 0;
03673     Info.readMem = true;
03674     Info.writeMem = false;
03675     Info.align = 16;
03676     return true;
03677   }
03678   case Intrinsic::nvvm_suld_1d_i64_clamp:
03679   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
03680   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
03681   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
03682   case Intrinsic::nvvm_suld_2d_i64_clamp:
03683   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
03684   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
03685   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
03686   case Intrinsic::nvvm_suld_3d_i64_clamp:
03687   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
03688   case Intrinsic::nvvm_suld_1d_i64_trap:
03689   case Intrinsic::nvvm_suld_1d_v2i64_trap:
03690   case Intrinsic::nvvm_suld_1d_array_i64_trap:
03691   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
03692   case Intrinsic::nvvm_suld_2d_i64_trap:
03693   case Intrinsic::nvvm_suld_2d_v2i64_trap:
03694   case Intrinsic::nvvm_suld_2d_array_i64_trap:
03695   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
03696   case Intrinsic::nvvm_suld_3d_i64_trap:
03697   case Intrinsic::nvvm_suld_3d_v2i64_trap:
03698   case Intrinsic::nvvm_suld_1d_i64_zero:
03699   case Intrinsic::nvvm_suld_1d_v2i64_zero:
03700   case Intrinsic::nvvm_suld_1d_array_i64_zero:
03701   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
03702   case Intrinsic::nvvm_suld_2d_i64_zero:
03703   case Intrinsic::nvvm_suld_2d_v2i64_zero:
03704   case Intrinsic::nvvm_suld_2d_array_i64_zero:
03705   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
03706   case Intrinsic::nvvm_suld_3d_i64_zero:
03707   case Intrinsic::nvvm_suld_3d_v2i64_zero: {
03708     Info.opc = getOpcForSurfaceInstr(Intrinsic);
03709     Info.memVT = MVT::i64;
03710     Info.ptrVal = nullptr;
03711     Info.offset = 0;
03712     Info.vol = 0;
03713     Info.readMem = true;
03714     Info.writeMem = false;
03715     Info.align = 16;
03716     return true;
03717   }
03718   }
03719   return false;
03720 }
03721 
03722 /// isLegalAddressingMode - Return true if the addressing mode represented
03723 /// by AM is legal for this target, for a load/store of the specified type.
03724 /// Used to guide target specific optimizations, like loop strength reduction
03725 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
03726 /// (CodeGenPrepare.cpp)
03727 bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
03728                                                 Type *Ty) const {
03729 
03730   // AddrMode - This represents an addressing mode of:
03731   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
03732   //
03733   // The legal address modes are
03734   // - [avar]
03735   // - [areg]
03736   // - [areg+immoff]
03737   // - [immAddr]
03738 
03739   if (AM.BaseGV) {
03740     if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
03741       return false;
03742     return true;
03743   }
03744 
03745   switch (AM.Scale) {
03746   case 0: // "r", "r+i" or "i" is allowed
03747     break;
03748   case 1:
03749     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
03750       return false;
03751     // Otherwise we have r+i.
03752     break;
03753   default:
03754     // No scale > 1 is allowed
03755     return false;
03756   }
03757   return true;
03758 }
03759 
03760 //===----------------------------------------------------------------------===//
03761 //                         NVPTX Inline Assembly Support
03762 //===----------------------------------------------------------------------===//
03763 
03764 /// getConstraintType - Given a constraint letter, return the type of
03765 /// constraint it is for this target.
03766 NVPTXTargetLowering::ConstraintType
03767 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
03768   if (Constraint.size() == 1) {
03769     switch (Constraint[0]) {
03770     default:
03771       break;
03772     case 'b':
03773     case 'r':
03774     case 'h':
03775     case 'c':
03776     case 'l':
03777     case 'f':
03778     case 'd':
03779     case '0':
03780     case 'N':
03781       return C_RegisterClass;
03782     }
03783   }
03784   return TargetLowering::getConstraintType(Constraint);
03785 }
03786 
03787 std::pair<unsigned, const TargetRegisterClass *>
03788 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
03789                                                   const std::string &Constraint,
03790                                                   MVT VT) const {
03791   if (Constraint.size() == 1) {
03792     switch (Constraint[0]) {
03793     case 'b':
03794       return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
03795     case 'c':
03796       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
03797     case 'h':
03798       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
03799     case 'r':
03800       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
03801     case 'l':
03802     case 'N':
03803       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
03804     case 'f':
03805       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
03806     case 'd':
03807       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
03808     }
03809   }
03810   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
03811 }
03812 
03813 /// getFunctionAlignment - Return the Log2 alignment of this function.
03814 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
03815   return 4;
03816 }
03817 
03818 //===----------------------------------------------------------------------===//
03819 //                         NVPTX DAG Combining
03820 //===----------------------------------------------------------------------===//
03821 
03822 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
03823                                    CodeGenOpt::Level OptLevel) const {
03824   const Function *F = MF.getFunction();
03825   const TargetOptions &TO = MF.getTarget().Options;
03826 
03827   // Always honor command-line argument
03828   if (FMAContractLevelOpt.getNumOccurrences() > 0) {
03829     return FMAContractLevelOpt > 0;
03830   } else if (OptLevel == 0) {
03831     // Do not contract if we're not optimizing the code
03832     return false;
03833   } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
03834     // Honor TargetOptions flags that explicitly say fusion is okay
03835     return true;
03836   } else if (F->hasFnAttribute("unsafe-fp-math")) {
03837     // Check for unsafe-fp-math=true coming from Clang
03838     Attribute Attr = F->getFnAttribute("unsafe-fp-math");
03839     StringRef Val = Attr.getValueAsString();
03840     if (Val == "true")
03841       return true;
03842   }
03843 
03844   // We did not have a clear indication that fusion is allowed, so assume not
03845   return false;
03846 }
03847 
03848 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
03849 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
03850 /// called with the default operands, and if that fails, with commuted
03851 /// operands.
03852 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
03853                                            TargetLowering::DAGCombinerInfo &DCI,
03854                                              const NVPTXSubtarget &Subtarget,
03855                                              CodeGenOpt::Level OptLevel) {
03856   SelectionDAG  &DAG = DCI.DAG;
03857   // Skip non-integer, non-scalar case
03858   EVT VT=N0.getValueType();
03859   if (VT.isVector())
03860     return SDValue();
03861 
03862   // fold (add (mul a, b), c) -> (mad a, b, c)
03863   //
03864   if (N0.getOpcode() == ISD::MUL) {
03865     assert (VT.isInteger());
03866     // For integer:
03867     // Since integer multiply-add costs the same as integer multiply
03868     // but is more costly than integer add, do the fusion only when
03869     // the mul is only used in the add.
03870     if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
03871         !N0.getNode()->hasOneUse())
03872       return SDValue();
03873 
03874     // Do the folding
03875     return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
03876                        N0.getOperand(0), N0.getOperand(1), N1);
03877   }
03878   else if (N0.getOpcode() == ISD::FMUL) {
03879     if (VT == MVT::f32 || VT == MVT::f64) {
03880       const auto *TLI = static_cast<const NVPTXTargetLowering *>(
03881           &DAG.getTargetLoweringInfo());
03882       if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
03883         return SDValue();
03884 
03885       // For floating point:
03886       // Do the fusion only when the mul has less than 5 uses and all
03887       // are add.
03888       // The heuristic is that if a use is not an add, then that use
03889       // cannot be fused into fma, therefore mul is still needed anyway.
03890       // If there are more than 4 uses, even if they are all add, fusing
03891       // them will increase register pressue.
03892       //
03893       int numUses = 0;
03894       int nonAddCount = 0;
03895       for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
03896            UE = N0.getNode()->use_end();
03897            UI != UE; ++UI) {
03898         numUses++;
03899         SDNode *User = *UI;
03900         if (User->getOpcode() != ISD::FADD)
03901           ++nonAddCount;
03902       }
03903       if (numUses >= 5)
03904         return SDValue();
03905       if (nonAddCount) {
03906         int orderNo = N->getIROrder();
03907         int orderNo2 = N0.getNode()->getIROrder();
03908         // simple heuristics here for considering potential register
03909         // pressure, the logics here is that the differnce are used
03910         // to measure the distance between def and use, the longer distance
03911         // more likely cause register pressure.
03912         if (orderNo - orderNo2 < 500)
03913           return SDValue();
03914 
03915         // Now, check if at least one of the FMUL's operands is live beyond the node N,
03916         // which guarantees that the FMA will not increase register pressure at node N.
03917         bool opIsLive = false;
03918         const SDNode *left = N0.getOperand(0).getNode();
03919         const SDNode *right = N0.getOperand(1).getNode();
03920 
03921         if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
03922           opIsLive = true;
03923 
03924         if (!opIsLive)
03925           for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
03926             SDNode *User = *UI;
03927             int orderNo3 = User->getIROrder();
03928             if (orderNo3 > orderNo) {
03929               opIsLive = true;
03930               break;
03931             }
03932           }
03933 
03934         if (!opIsLive)
03935           for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
03936             SDNode *User = *UI;
03937             int orderNo3 = User->getIROrder();
03938             if (orderNo3 > orderNo) {
03939               opIsLive = true;
03940               break;
03941             }
03942           }
03943 
03944         if (!opIsLive)
03945           return SDValue();
03946       }
03947 
03948       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
03949                          N0.getOperand(0), N0.getOperand(1), N1);
03950     }
03951   }
03952 
03953   return SDValue();
03954 }
03955 
03956 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
03957 ///
03958 static SDValue PerformADDCombine(SDNode *N,
03959                                  TargetLowering::DAGCombinerInfo &DCI,
03960                                  const NVPTXSubtarget &Subtarget,
03961                                  CodeGenOpt::Level OptLevel) {
03962   SDValue N0 = N->getOperand(0);
03963   SDValue N1 = N->getOperand(1);
03964 
03965   // First try with the default operand order.
03966   SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
03967                                                  OptLevel);
03968   if (Result.getNode())
03969     return Result;
03970 
03971   // If that didn't work, try again with the operands commuted.
03972   return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
03973 }
03974 
03975 static SDValue PerformANDCombine(SDNode *N,
03976                                  TargetLowering::DAGCombinerInfo &DCI) {
03977   // The type legalizer turns a vector load of i8 values into a zextload to i16
03978   // registers, optionally ANY_EXTENDs it (if target type is integer),
03979   // and ANDs off the high 8 bits. Since we turn this load into a
03980   // target-specific DAG node, the DAG combiner fails to eliminate these AND
03981   // nodes. Do that here.
03982   SDValue Val = N->getOperand(0);
03983   SDValue Mask = N->getOperand(1);
03984 
03985   if (isa<ConstantSDNode>(Val)) {
03986     std::swap(Val, Mask);
03987   }
03988 
03989   SDValue AExt;
03990   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
03991   if (Val.getOpcode() == ISD::ANY_EXTEND) {
03992     AExt = Val;
03993     Val = Val->getOperand(0);
03994   }
03995 
03996   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
03997     Val = Val->getOperand(0);
03998   }
03999 
04000   if (Val->getOpcode() == NVPTXISD::LoadV2 ||
04001       Val->getOpcode() == NVPTXISD::LoadV4) {
04002     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
04003     if (!MaskCnst) {
04004       // Not an AND with a constant
04005       return SDValue();
04006     }
04007 
04008     uint64_t MaskVal = MaskCnst->getZExtValue();
04009     if (MaskVal != 0xff) {
04010       // Not an AND that chops off top 8 bits
04011       return SDValue();
04012     }
04013 
04014     MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
04015     if (!Mem) {
04016       // Not a MemSDNode?!?
04017       return SDValue();
04018     }
04019 
04020     EVT MemVT = Mem->getMemoryVT();
04021     if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
04022       // We only handle the i8 case
04023       return SDValue();
04024     }
04025 
04026     unsigned ExtType =
04027       cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
04028         getZExtValue();
04029     if (ExtType == ISD::SEXTLOAD) {
04030       // If for some reason the load is a sextload, the and is needed to zero
04031       // out the high 8 bits
04032       return SDValue();
04033     }
04034 
04035     bool AddTo = false;
04036     if (AExt.getNode() != 0) {
04037       // Re-insert the ext as a zext.
04038       Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
04039                             AExt.getValueType(), Val);
04040       AddTo = true;
04041     }
04042 
04043     // If we get here, the AND is unnecessary.  Just replace it with the load
04044     DCI.CombineTo(N, Val, AddTo);
04045   }
04046 
04047   return SDValue();
04048 }
04049 
04050 enum OperandSignedness {
04051   Signed = 0,
04052   Unsigned,
04053   Unknown
04054 };
04055 
04056 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
04057 /// that can be demoted to \p OptSize bits without loss of information. The
04058 /// signedness of the operand, if determinable, is placed in \p S.
04059 static bool IsMulWideOperandDemotable(SDValue Op,
04060                                       unsigned OptSize,
04061                                       OperandSignedness &S) {
04062   S = Unknown;
04063 
04064   if (Op.getOpcode() == ISD::SIGN_EXTEND ||
04065       Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
04066     EVT OrigVT = Op.getOperand(0).getValueType();
04067     if (OrigVT.getSizeInBits() <= OptSize) {
04068       S = Signed;
04069       return true;
04070     }
04071   } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
04072     EVT OrigVT = Op.getOperand(0).getValueType();
04073     if (OrigVT.getSizeInBits() <= OptSize) {
04074       S = Unsigned;
04075       return true;
04076     }
04077   }
04078 
04079   return false;
04080 }
04081 
04082 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
04083 /// be demoted to \p OptSize bits without loss of information. If the operands
04084 /// contain a constant, it should appear as the RHS operand. The signedness of
04085 /// the operands is placed in \p IsSigned.
04086 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
04087                                         unsigned OptSize,
04088                                         bool &IsSigned) {
04089 
04090   OperandSignedness LHSSign;
04091 
04092   // The LHS operand must be a demotable op
04093   if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
04094     return false;
04095 
04096   // We should have been able to determine the signedness from the LHS
04097   if (LHSSign == Unknown)
04098     return false;
04099 
04100   IsSigned = (LHSSign == Signed);
04101 
04102   // The RHS can be a demotable op or a constant
04103   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
04104     APInt Val = CI->getAPIntValue();
04105     if (LHSSign == Unsigned) {
04106       if (Val.isIntN(OptSize)) {
04107         return true;
04108       }
04109       return false;
04110     } else {
04111       if (Val.isSignedIntN(OptSize)) {
04112         return true;
04113       }
04114       return false;
04115     }
04116   } else {
04117     OperandSignedness RHSSign;
04118     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
04119       return false;
04120 
04121     if (LHSSign != RHSSign)
04122       return false;
04123 
04124     return true;
04125   }
04126 }
04127 
04128 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
04129 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
04130 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
04131 /// amount.
04132 static SDValue TryMULWIDECombine(SDNode *N,
04133                                  TargetLowering::DAGCombinerInfo &DCI) {
04134   EVT MulType = N->getValueType(0);
04135   if (MulType != MVT::i32 && MulType != MVT::i64) {
04136     return SDValue();
04137   }
04138 
04139   SDLoc DL(N);
04140   unsigned OptSize = MulType.getSizeInBits() >> 1;
04141   SDValue LHS = N->getOperand(0);
04142   SDValue RHS = N->getOperand(1);
04143 
04144   // Canonicalize the multiply so the constant (if any) is on the right
04145   if (N->getOpcode() == ISD::MUL) {
04146     if (isa<ConstantSDNode>(LHS)) {
04147       std::swap(LHS, RHS);
04148     }
04149   }
04150 
04151   // If we have a SHL, determine the actual multiply amount
04152   if (N->getOpcode() == ISD::SHL) {
04153     ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
04154     if (!ShlRHS) {
04155       return SDValue();
04156     }
04157 
04158     APInt ShiftAmt = ShlRHS->getAPIntValue();
04159     unsigned BitWidth = MulType.getSizeInBits();
04160     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
04161       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
04162       RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
04163     } else {
04164       return SDValue();
04165     }
04166   }
04167 
04168   bool Signed;
04169   // Verify that our operands are demotable
04170   if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
04171     return SDValue();
04172   }
04173 
04174   EVT DemotedVT;
04175   if (MulType == MVT::i32) {
04176     DemotedVT = MVT::i16;
04177   } else {
04178     DemotedVT = MVT::i32;
04179   }
04180 
04181   // Truncate the operands to the correct size. Note that these are just for
04182   // type consistency and will (likely) be eliminated in later phases.
04183   SDValue TruncLHS =
04184     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
04185   SDValue TruncRHS =
04186     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
04187 
04188   unsigned Opc;
04189   if (Signed) {
04190     Opc = NVPTXISD::MUL_WIDE_SIGNED;
04191   } else {
04192     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
04193   }
04194 
04195   return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
04196 }
04197 
04198 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
04199 static SDValue PerformMULCombine(SDNode *N,
04200                                  TargetLowering::DAGCombinerInfo &DCI,
04201                                  CodeGenOpt::Level OptLevel) {
04202   if (OptLevel > 0) {
04203     // Try mul.wide combining at OptLevel > 0
04204     SDValue Ret = TryMULWIDECombine(N, DCI);
04205     if (Ret.getNode())
04206       return Ret;
04207   }
04208 
04209   return SDValue();
04210 }
04211 
04212 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
04213 static SDValue PerformSHLCombine(SDNode *N,
04214                                  TargetLowering::DAGCombinerInfo &DCI,
04215                                  CodeGenOpt::Level OptLevel) {
04216   if (OptLevel > 0) {
04217     // Try mul.wide combining at OptLevel > 0
04218     SDValue Ret = TryMULWIDECombine(N, DCI);
04219     if (Ret.getNode())
04220       return Ret;
04221   }
04222 
04223   return SDValue();
04224 }
04225 
04226 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
04227                                                DAGCombinerInfo &DCI) const {
04228   CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
04229   switch (N->getOpcode()) {
04230     default: break;
04231     case ISD::ADD:
04232     case ISD::FADD:
04233       return PerformADDCombine(N, DCI, STI, OptLevel);
04234     case ISD::MUL:
04235       return PerformMULCombine(N, DCI, OptLevel);
04236     case ISD::SHL:
04237       return PerformSHLCombine(N, DCI, OptLevel);
04238     case ISD::AND:
04239       return PerformANDCombine(N, DCI);
04240   }
04241   return SDValue();
04242 }
04243 
04244 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
04245 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
04246                               const DataLayout *TD,
04247                               SmallVectorImpl<SDValue> &Results) {
04248   EVT ResVT = N->getValueType(0);
04249   SDLoc DL(N);
04250 
04251   assert(ResVT.isVector() && "Vector load must have vector type");
04252 
04253   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
04254   // legal.  We can (and should) split that into 2 loads of <2 x double> here
04255   // but I'm leaving that as a TODO for now.
04256   assert(ResVT.isSimple() && "Can only handle simple types");
04257   switch (ResVT.getSimpleVT().SimpleTy) {
04258   default:
04259     return;
04260   case MVT::v2i8:
04261   case MVT::v2i16:
04262   case MVT::v2i32:
04263   case MVT::v2i64:
04264   case MVT::v2f32:
04265   case MVT::v2f64:
04266   case MVT::v4i8:
04267   case MVT::v4i16:
04268   case MVT::v4i32:
04269   case MVT::v4f32:
04270     // This is a "native" vector type
04271     break;
04272   }
04273 
04274   LoadSDNode *LD = cast<LoadSDNode>(N);
04275 
04276   unsigned Align = LD->getAlignment();
04277   unsigned PrefAlign =
04278     TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
04279   if (Align < PrefAlign) {
04280     // This load is not sufficiently aligned, so bail out and let this vector
04281     // load be scalarized.  Note that we may still be able to emit smaller
04282     // vector loads.  For example, if we are loading a <4 x float> with an
04283     // alignment of 8, this check will fail but the legalizer will try again
04284     // with 2 x <2 x float>, which will succeed with an alignment of 8.
04285     return;
04286   }
04287 
04288   EVT EltVT = ResVT.getVectorElementType();
04289   unsigned NumElts = ResVT.getVectorNumElements();
04290 
04291   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
04292   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
04293   // loaded type to i16 and propagate the "real" type as the memory type.
04294   bool NeedTrunc = false;
04295   if (EltVT.getSizeInBits() < 16) {
04296     EltVT = MVT::i16;
04297     NeedTrunc = true;
04298   }
04299 
04300   unsigned Opcode = 0;
04301   SDVTList LdResVTs;
04302 
04303   switch (NumElts) {
04304   default:
04305     return;
04306   case 2:
04307     Opcode = NVPTXISD::LoadV2;
04308     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
04309     break;
04310   case 4: {
04311     Opcode = NVPTXISD::LoadV4;
04312     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
04313     LdResVTs = DAG.getVTList(ListVTs);
04314     break;
04315   }
04316   }
04317 
04318   // Copy regular operands
04319   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
04320 
04321   // The select routine does not have access to the LoadSDNode instance, so
04322   // pass along the extension information
04323   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
04324 
04325   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
04326                                           LD->getMemoryVT(),
04327                                           LD->getMemOperand());
04328 
04329   SmallVector<SDValue, 4> ScalarRes;
04330 
04331   for (unsigned i = 0; i < NumElts; ++i) {
04332     SDValue Res = NewLD.getValue(i);
04333     if (NeedTrunc)
04334       Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
04335     ScalarRes.push_back(Res);
04336   }
04337 
04338   SDValue LoadChain = NewLD.getValue(NumElts);
04339 
04340   SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
04341 
04342   Results.push_back(BuildVec);
04343   Results.push_back(LoadChain);
04344 }
04345 
04346 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
04347                                      SmallVectorImpl<SDValue> &Results) {
04348   SDValue Chain = N->getOperand(0);
04349   SDValue Intrin = N->getOperand(1);
04350   SDLoc DL(N);
04351 
04352   // Get the intrinsic ID
04353   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
04354   switch (IntrinNo) {
04355   default:
04356     return;
04357   case Intrinsic::nvvm_ldg_global_i:
04358   case Intrinsic::nvvm_ldg_global_f:
04359   case Intrinsic::nvvm_ldg_global_p:
04360   case Intrinsic::nvvm_ldu_global_i:
04361   case Intrinsic::nvvm_ldu_global_f:
04362   case Intrinsic::nvvm_ldu_global_p: {
04363     EVT ResVT = N->getValueType(0);
04364 
04365     if (ResVT.isVector()) {
04366       // Vector LDG/LDU
04367 
04368       unsigned NumElts = ResVT.getVectorNumElements();
04369       EVT EltVT = ResVT.getVectorElementType();
04370 
04371       // Since LDU/LDG are target nodes, we cannot rely on DAG type
04372       // legalization.
04373       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
04374       // loaded type to i16 and propagate the "real" type as the memory type.
04375       bool NeedTrunc = false;
04376       if (EltVT.getSizeInBits() < 16) {
04377         EltVT = MVT::i16;
04378         NeedTrunc = true;
04379       }
04380 
04381       unsigned Opcode = 0;
04382       SDVTList LdResVTs;
04383 
04384       switch (NumElts) {
04385       default:
04386         return;
04387       case 2:
04388         switch (IntrinNo) {
04389         default:
04390           return;
04391         case Intrinsic::nvvm_ldg_global_i:
04392         case Intrinsic::nvvm_ldg_global_f:
04393         case Intrinsic::nvvm_ldg_global_p:
04394           Opcode = NVPTXISD::LDGV2;
04395           break;
04396         case Intrinsic::nvvm_ldu_global_i:
04397         case Intrinsic::nvvm_ldu_global_f:
04398         case Intrinsic::nvvm_ldu_global_p:
04399           Opcode = NVPTXISD::LDUV2;
04400           break;
04401         }
04402         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
04403         break;
04404       case 4: {
04405         switch (IntrinNo) {
04406         default:
04407           return;
04408         case Intrinsic::nvvm_ldg_global_i:
04409         case Intrinsic::nvvm_ldg_global_f:
04410         case Intrinsic::nvvm_ldg_global_p:
04411           Opcode = NVPTXISD::LDGV4;
04412           break;
04413         case Intrinsic::nvvm_ldu_global_i:
04414         case Intrinsic::nvvm_ldu_global_f:
04415         case Intrinsic::nvvm_ldu_global_p:
04416           Opcode = NVPTXISD::LDUV4;
04417           break;
04418         }
04419         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
04420         LdResVTs = DAG.getVTList(ListVTs);
04421         break;
04422       }
04423       }
04424 
04425       SmallVector<SDValue, 8> OtherOps;
04426 
04427       // Copy regular operands
04428 
04429       OtherOps.push_back(Chain); // Chain
04430                                  // Skip operand 1 (intrinsic ID)
04431       // Others
04432       OtherOps.append(N->op_begin() + 2, N->op_end());
04433 
04434       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
04435 
04436       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
04437                                               MemSD->getMemoryVT(),
04438                                               MemSD->getMemOperand());
04439 
04440       SmallVector<SDValue, 4> ScalarRes;
04441 
04442       for (unsigned i = 0; i < NumElts; ++i) {
04443         SDValue Res = NewLD.getValue(i);
04444         if (NeedTrunc)
04445           Res =
04446               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
04447         ScalarRes.push_back(Res);
04448       }
04449 
04450       SDValue LoadChain = NewLD.getValue(NumElts);
04451 
04452       SDValue BuildVec =
04453           DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
04454 
04455       Results.push_back(BuildVec);
04456       Results.push_back(LoadChain);
04457     } else {
04458       // i8 LDG/LDU
04459       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
04460              "Custom handling of non-i8 ldu/ldg?");
04461 
04462       // Just copy all operands as-is
04463       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
04464 
04465       // Force output to i16
04466       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
04467 
04468       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
04469 
04470       // We make sure the memory type is i8, which will be used during isel
04471       // to select the proper instruction.
04472       SDValue NewLD =
04473           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
04474                                   MVT::i8, MemSD->getMemOperand());
04475 
04476       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
04477                                     NewLD.getValue(0)));
04478       Results.push_back(NewLD.getValue(1));
04479     }
04480   }
04481   }
04482 }
04483 
04484 void NVPTXTargetLowering::ReplaceNodeResults(
04485     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
04486   switch (N->getOpcode()) {
04487   default:
04488     report_fatal_error("Unhandled custom legalization");
04489   case ISD::LOAD:
04490     ReplaceLoadVector(N, DAG, getDataLayout(), Results);
04491     return;
04492   case ISD::INTRINSIC_W_CHAIN:
04493     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
04494     return;
04495   }
04496 }
04497 
04498 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
04499 void NVPTXSection::anchor() {}
04500 
04501 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
04502   delete TextSection;
04503   delete DataSection;
04504   delete BSSSection;
04505   delete ReadOnlySection;
04506 
04507   delete StaticCtorSection;
04508   delete StaticDtorSection;
04509   delete LSDASection;
04510   delete EHFrameSection;
04511   delete DwarfAbbrevSection;
04512   delete DwarfInfoSection;
04513   delete DwarfLineSection;
04514   delete DwarfFrameSection;
04515   delete DwarfPubTypesSection;
04516   delete DwarfDebugInlineSection;
04517   delete DwarfStrSection;
04518   delete DwarfLocSection;
04519   delete DwarfARangesSection;
04520   delete DwarfRangesSection;
04521 }
04522 
04523 MCSection *
04524 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
04525                                               SectionKind Kind, Mangler &Mang,
04526                                               const TargetMachine &TM) const {
04527   return getDataSection();
04528 }