LLVM  mainline
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
00001 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines an instruction selector for the NVPTX target.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "NVPTXISelDAGToDAG.h"
00015 #include "NVPTXUtilities.h"
00016 #include "llvm/Analysis/ValueTracking.h"
00017 #include "llvm/IR/GlobalValue.h"
00018 #include "llvm/IR/Instructions.h"
00019 #include "llvm/Support/CommandLine.h"
00020 #include "llvm/Support/Debug.h"
00021 #include "llvm/Support/ErrorHandling.h"
00022 #include "llvm/Support/raw_ostream.h"
00023 #include "llvm/Target/TargetIntrinsicInfo.h"
00024 
00025 using namespace llvm;
00026 
00027 #define DEBUG_TYPE "nvptx-isel"
00028 
00029 static cl::opt<int> UsePrecDivF32(
00030     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
00031     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
00032              " IEEE Compliant F32 div.rnd if available."),
00033     cl::init(2));
00034 
00035 static cl::opt<bool>
00036 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
00037           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
00038           cl::init(true));
00039 
00040 static cl::opt<bool>
00041 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
00042            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
00043            cl::init(false));
00044 
00045 
00046 /// createNVPTXISelDag - This pass converts a legalized DAG into a
00047 /// NVPTX-specific DAG, ready for instruction scheduling.
00048 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
00049                                        llvm::CodeGenOpt::Level OptLevel) {
00050   return new NVPTXDAGToDAGISel(TM, OptLevel);
00051 }
00052 
00053 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
00054                                      CodeGenOpt::Level OptLevel)
00055     : SelectionDAGISel(tm, OptLevel), TM(tm) {
00056   doMulWide = (OptLevel > 0);
00057 }
00058 
00059 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
00060     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
00061     return SelectionDAGISel::runOnMachineFunction(MF);
00062 }
00063 
00064 int NVPTXDAGToDAGISel::getDivF32Level() const {
00065   if (UsePrecDivF32.getNumOccurrences() > 0) {
00066     // If nvptx-prec-div32=N is used on the command-line, always honor it
00067     return UsePrecDivF32;
00068   } else {
00069     // Otherwise, use div.approx if fast math is enabled
00070     if (TM.Options.UnsafeFPMath)
00071       return 0;
00072     else
00073       return 2;
00074   }
00075 }
00076 
00077 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
00078   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
00079     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
00080     return UsePrecSqrtF32;
00081   } else {
00082     // Otherwise, use sqrt.approx if fast math is enabled
00083     return !TM.Options.UnsafeFPMath;
00084   }
00085 }
00086 
00087 bool NVPTXDAGToDAGISel::useF32FTZ() const {
00088   if (FtzEnabled.getNumOccurrences() > 0) {
00089     // If nvptx-f32ftz is used on the command-line, always honor it
00090     return FtzEnabled;
00091   } else {
00092     const Function *F = MF->getFunction();
00093     // Otherwise, check for an nvptx-f32ftz attribute on the function
00094     if (F->hasFnAttribute("nvptx-f32ftz"))
00095       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
00096     else
00097       return false;
00098   }
00099 }
00100 
00101 bool NVPTXDAGToDAGISel::allowFMA() const {
00102   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
00103   return TL->allowFMA(*MF, OptLevel);
00104 }
00105 
00106 /// Select - Select instructions not customized! Used for
00107 /// expanded, promoted and normal instructions.
00108 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
00109 
00110   if (N->isMachineOpcode()) {
00111     N->setNodeId(-1);
00112     return nullptr; // Already selected.
00113   }
00114 
00115   SDNode *ResNode = nullptr;
00116   switch (N->getOpcode()) {
00117   case ISD::LOAD:
00118     ResNode = SelectLoad(N);
00119     break;
00120   case ISD::STORE:
00121     ResNode = SelectStore(N);
00122     break;
00123   case NVPTXISD::LoadV2:
00124   case NVPTXISD::LoadV4:
00125     ResNode = SelectLoadVector(N);
00126     break;
00127   case NVPTXISD::LDGV2:
00128   case NVPTXISD::LDGV4:
00129   case NVPTXISD::LDUV2:
00130   case NVPTXISD::LDUV4:
00131     ResNode = SelectLDGLDU(N);
00132     break;
00133   case NVPTXISD::StoreV2:
00134   case NVPTXISD::StoreV4:
00135     ResNode = SelectStoreVector(N);
00136     break;
00137   case NVPTXISD::LoadParam:
00138   case NVPTXISD::LoadParamV2:
00139   case NVPTXISD::LoadParamV4:
00140     ResNode = SelectLoadParam(N);
00141     break;
00142   case NVPTXISD::StoreRetval:
00143   case NVPTXISD::StoreRetvalV2:
00144   case NVPTXISD::StoreRetvalV4:
00145     ResNode = SelectStoreRetval(N);
00146     break;
00147   case NVPTXISD::StoreParam:
00148   case NVPTXISD::StoreParamV2:
00149   case NVPTXISD::StoreParamV4:
00150   case NVPTXISD::StoreParamS32:
00151   case NVPTXISD::StoreParamU32:
00152     ResNode = SelectStoreParam(N);
00153     break;
00154   case ISD::INTRINSIC_WO_CHAIN:
00155     ResNode = SelectIntrinsicNoChain(N);
00156     break;
00157   case ISD::INTRINSIC_W_CHAIN:
00158     ResNode = SelectIntrinsicChain(N);
00159     break;
00160   case NVPTXISD::Tex1DFloatS32:
00161   case NVPTXISD::Tex1DFloatFloat:
00162   case NVPTXISD::Tex1DFloatFloatLevel:
00163   case NVPTXISD::Tex1DFloatFloatGrad:
00164   case NVPTXISD::Tex1DS32S32:
00165   case NVPTXISD::Tex1DS32Float:
00166   case NVPTXISD::Tex1DS32FloatLevel:
00167   case NVPTXISD::Tex1DS32FloatGrad:
00168   case NVPTXISD::Tex1DU32S32:
00169   case NVPTXISD::Tex1DU32Float:
00170   case NVPTXISD::Tex1DU32FloatLevel:
00171   case NVPTXISD::Tex1DU32FloatGrad:
00172   case NVPTXISD::Tex1DArrayFloatS32:
00173   case NVPTXISD::Tex1DArrayFloatFloat:
00174   case NVPTXISD::Tex1DArrayFloatFloatLevel:
00175   case NVPTXISD::Tex1DArrayFloatFloatGrad:
00176   case NVPTXISD::Tex1DArrayS32S32:
00177   case NVPTXISD::Tex1DArrayS32Float:
00178   case NVPTXISD::Tex1DArrayS32FloatLevel:
00179   case NVPTXISD::Tex1DArrayS32FloatGrad:
00180   case NVPTXISD::Tex1DArrayU32S32:
00181   case NVPTXISD::Tex1DArrayU32Float:
00182   case NVPTXISD::Tex1DArrayU32FloatLevel:
00183   case NVPTXISD::Tex1DArrayU32FloatGrad:
00184   case NVPTXISD::Tex2DFloatS32:
00185   case NVPTXISD::Tex2DFloatFloat:
00186   case NVPTXISD::Tex2DFloatFloatLevel:
00187   case NVPTXISD::Tex2DFloatFloatGrad:
00188   case NVPTXISD::Tex2DS32S32:
00189   case NVPTXISD::Tex2DS32Float:
00190   case NVPTXISD::Tex2DS32FloatLevel:
00191   case NVPTXISD::Tex2DS32FloatGrad:
00192   case NVPTXISD::Tex2DU32S32:
00193   case NVPTXISD::Tex2DU32Float:
00194   case NVPTXISD::Tex2DU32FloatLevel:
00195   case NVPTXISD::Tex2DU32FloatGrad:
00196   case NVPTXISD::Tex2DArrayFloatS32:
00197   case NVPTXISD::Tex2DArrayFloatFloat:
00198   case NVPTXISD::Tex2DArrayFloatFloatLevel:
00199   case NVPTXISD::Tex2DArrayFloatFloatGrad:
00200   case NVPTXISD::Tex2DArrayS32S32:
00201   case NVPTXISD::Tex2DArrayS32Float:
00202   case NVPTXISD::Tex2DArrayS32FloatLevel:
00203   case NVPTXISD::Tex2DArrayS32FloatGrad:
00204   case NVPTXISD::Tex2DArrayU32S32:
00205   case NVPTXISD::Tex2DArrayU32Float:
00206   case NVPTXISD::Tex2DArrayU32FloatLevel:
00207   case NVPTXISD::Tex2DArrayU32FloatGrad:
00208   case NVPTXISD::Tex3DFloatS32:
00209   case NVPTXISD::Tex3DFloatFloat:
00210   case NVPTXISD::Tex3DFloatFloatLevel:
00211   case NVPTXISD::Tex3DFloatFloatGrad:
00212   case NVPTXISD::Tex3DS32S32:
00213   case NVPTXISD::Tex3DS32Float:
00214   case NVPTXISD::Tex3DS32FloatLevel:
00215   case NVPTXISD::Tex3DS32FloatGrad:
00216   case NVPTXISD::Tex3DU32S32:
00217   case NVPTXISD::Tex3DU32Float:
00218   case NVPTXISD::Tex3DU32FloatLevel:
00219   case NVPTXISD::Tex3DU32FloatGrad:
00220   case NVPTXISD::TexCubeFloatFloat:
00221   case NVPTXISD::TexCubeFloatFloatLevel:
00222   case NVPTXISD::TexCubeS32Float:
00223   case NVPTXISD::TexCubeS32FloatLevel:
00224   case NVPTXISD::TexCubeU32Float:
00225   case NVPTXISD::TexCubeU32FloatLevel:
00226   case NVPTXISD::TexCubeArrayFloatFloat:
00227   case NVPTXISD::TexCubeArrayFloatFloatLevel:
00228   case NVPTXISD::TexCubeArrayS32Float:
00229   case NVPTXISD::TexCubeArrayS32FloatLevel:
00230   case NVPTXISD::TexCubeArrayU32Float:
00231   case NVPTXISD::TexCubeArrayU32FloatLevel:
00232   case NVPTXISD::Tld4R2DFloatFloat:
00233   case NVPTXISD::Tld4G2DFloatFloat:
00234   case NVPTXISD::Tld4B2DFloatFloat:
00235   case NVPTXISD::Tld4A2DFloatFloat:
00236   case NVPTXISD::Tld4R2DS64Float:
00237   case NVPTXISD::Tld4G2DS64Float:
00238   case NVPTXISD::Tld4B2DS64Float:
00239   case NVPTXISD::Tld4A2DS64Float:
00240   case NVPTXISD::Tld4R2DU64Float:
00241   case NVPTXISD::Tld4G2DU64Float:
00242   case NVPTXISD::Tld4B2DU64Float:
00243   case NVPTXISD::Tld4A2DU64Float:
00244   case NVPTXISD::TexUnified1DFloatS32:
00245   case NVPTXISD::TexUnified1DFloatFloat:
00246   case NVPTXISD::TexUnified1DFloatFloatLevel:
00247   case NVPTXISD::TexUnified1DFloatFloatGrad:
00248   case NVPTXISD::TexUnified1DS32S32:
00249   case NVPTXISD::TexUnified1DS32Float:
00250   case NVPTXISD::TexUnified1DS32FloatLevel:
00251   case NVPTXISD::TexUnified1DS32FloatGrad:
00252   case NVPTXISD::TexUnified1DU32S32:
00253   case NVPTXISD::TexUnified1DU32Float:
00254   case NVPTXISD::TexUnified1DU32FloatLevel:
00255   case NVPTXISD::TexUnified1DU32FloatGrad:
00256   case NVPTXISD::TexUnified1DArrayFloatS32:
00257   case NVPTXISD::TexUnified1DArrayFloatFloat:
00258   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
00259   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
00260   case NVPTXISD::TexUnified1DArrayS32S32:
00261   case NVPTXISD::TexUnified1DArrayS32Float:
00262   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
00263   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
00264   case NVPTXISD::TexUnified1DArrayU32S32:
00265   case NVPTXISD::TexUnified1DArrayU32Float:
00266   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
00267   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
00268   case NVPTXISD::TexUnified2DFloatS32:
00269   case NVPTXISD::TexUnified2DFloatFloat:
00270   case NVPTXISD::TexUnified2DFloatFloatLevel:
00271   case NVPTXISD::TexUnified2DFloatFloatGrad:
00272   case NVPTXISD::TexUnified2DS32S32:
00273   case NVPTXISD::TexUnified2DS32Float:
00274   case NVPTXISD::TexUnified2DS32FloatLevel:
00275   case NVPTXISD::TexUnified2DS32FloatGrad:
00276   case NVPTXISD::TexUnified2DU32S32:
00277   case NVPTXISD::TexUnified2DU32Float:
00278   case NVPTXISD::TexUnified2DU32FloatLevel:
00279   case NVPTXISD::TexUnified2DU32FloatGrad:
00280   case NVPTXISD::TexUnified2DArrayFloatS32:
00281   case NVPTXISD::TexUnified2DArrayFloatFloat:
00282   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
00283   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
00284   case NVPTXISD::TexUnified2DArrayS32S32:
00285   case NVPTXISD::TexUnified2DArrayS32Float:
00286   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
00287   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
00288   case NVPTXISD::TexUnified2DArrayU32S32:
00289   case NVPTXISD::TexUnified2DArrayU32Float:
00290   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
00291   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
00292   case NVPTXISD::TexUnified3DFloatS32:
00293   case NVPTXISD::TexUnified3DFloatFloat:
00294   case NVPTXISD::TexUnified3DFloatFloatLevel:
00295   case NVPTXISD::TexUnified3DFloatFloatGrad:
00296   case NVPTXISD::TexUnified3DS32S32:
00297   case NVPTXISD::TexUnified3DS32Float:
00298   case NVPTXISD::TexUnified3DS32FloatLevel:
00299   case NVPTXISD::TexUnified3DS32FloatGrad:
00300   case NVPTXISD::TexUnified3DU32S32:
00301   case NVPTXISD::TexUnified3DU32Float:
00302   case NVPTXISD::TexUnified3DU32FloatLevel:
00303   case NVPTXISD::TexUnified3DU32FloatGrad:
00304   case NVPTXISD::TexUnifiedCubeFloatFloat:
00305   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
00306   case NVPTXISD::TexUnifiedCubeS32Float:
00307   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
00308   case NVPTXISD::TexUnifiedCubeU32Float:
00309   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
00310   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
00311   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
00312   case NVPTXISD::TexUnifiedCubeArrayS32Float:
00313   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
00314   case NVPTXISD::TexUnifiedCubeArrayU32Float:
00315   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
00316   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
00317   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
00318   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
00319   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
00320   case NVPTXISD::Tld4UnifiedR2DS64Float:
00321   case NVPTXISD::Tld4UnifiedG2DS64Float:
00322   case NVPTXISD::Tld4UnifiedB2DS64Float:
00323   case NVPTXISD::Tld4UnifiedA2DS64Float:
00324   case NVPTXISD::Tld4UnifiedR2DU64Float:
00325   case NVPTXISD::Tld4UnifiedG2DU64Float:
00326   case NVPTXISD::Tld4UnifiedB2DU64Float:
00327   case NVPTXISD::Tld4UnifiedA2DU64Float:
00328     ResNode = SelectTextureIntrinsic(N);
00329     break;
00330   case NVPTXISD::Suld1DI8Clamp:
00331   case NVPTXISD::Suld1DI16Clamp:
00332   case NVPTXISD::Suld1DI32Clamp:
00333   case NVPTXISD::Suld1DI64Clamp:
00334   case NVPTXISD::Suld1DV2I8Clamp:
00335   case NVPTXISD::Suld1DV2I16Clamp:
00336   case NVPTXISD::Suld1DV2I32Clamp:
00337   case NVPTXISD::Suld1DV2I64Clamp:
00338   case NVPTXISD::Suld1DV4I8Clamp:
00339   case NVPTXISD::Suld1DV4I16Clamp:
00340   case NVPTXISD::Suld1DV4I32Clamp:
00341   case NVPTXISD::Suld1DArrayI8Clamp:
00342   case NVPTXISD::Suld1DArrayI16Clamp:
00343   case NVPTXISD::Suld1DArrayI32Clamp:
00344   case NVPTXISD::Suld1DArrayI64Clamp:
00345   case NVPTXISD::Suld1DArrayV2I8Clamp:
00346   case NVPTXISD::Suld1DArrayV2I16Clamp:
00347   case NVPTXISD::Suld1DArrayV2I32Clamp:
00348   case NVPTXISD::Suld1DArrayV2I64Clamp:
00349   case NVPTXISD::Suld1DArrayV4I8Clamp:
00350   case NVPTXISD::Suld1DArrayV4I16Clamp:
00351   case NVPTXISD::Suld1DArrayV4I32Clamp:
00352   case NVPTXISD::Suld2DI8Clamp:
00353   case NVPTXISD::Suld2DI16Clamp:
00354   case NVPTXISD::Suld2DI32Clamp:
00355   case NVPTXISD::Suld2DI64Clamp:
00356   case NVPTXISD::Suld2DV2I8Clamp:
00357   case NVPTXISD::Suld2DV2I16Clamp:
00358   case NVPTXISD::Suld2DV2I32Clamp:
00359   case NVPTXISD::Suld2DV2I64Clamp:
00360   case NVPTXISD::Suld2DV4I8Clamp:
00361   case NVPTXISD::Suld2DV4I16Clamp:
00362   case NVPTXISD::Suld2DV4I32Clamp:
00363   case NVPTXISD::Suld2DArrayI8Clamp:
00364   case NVPTXISD::Suld2DArrayI16Clamp:
00365   case NVPTXISD::Suld2DArrayI32Clamp:
00366   case NVPTXISD::Suld2DArrayI64Clamp:
00367   case NVPTXISD::Suld2DArrayV2I8Clamp:
00368   case NVPTXISD::Suld2DArrayV2I16Clamp:
00369   case NVPTXISD::Suld2DArrayV2I32Clamp:
00370   case NVPTXISD::Suld2DArrayV2I64Clamp:
00371   case NVPTXISD::Suld2DArrayV4I8Clamp:
00372   case NVPTXISD::Suld2DArrayV4I16Clamp:
00373   case NVPTXISD::Suld2DArrayV4I32Clamp:
00374   case NVPTXISD::Suld3DI8Clamp:
00375   case NVPTXISD::Suld3DI16Clamp:
00376   case NVPTXISD::Suld3DI32Clamp:
00377   case NVPTXISD::Suld3DI64Clamp:
00378   case NVPTXISD::Suld3DV2I8Clamp:
00379   case NVPTXISD::Suld3DV2I16Clamp:
00380   case NVPTXISD::Suld3DV2I32Clamp:
00381   case NVPTXISD::Suld3DV2I64Clamp:
00382   case NVPTXISD::Suld3DV4I8Clamp:
00383   case NVPTXISD::Suld3DV4I16Clamp:
00384   case NVPTXISD::Suld3DV4I32Clamp:
00385   case NVPTXISD::Suld1DI8Trap:
00386   case NVPTXISD::Suld1DI16Trap:
00387   case NVPTXISD::Suld1DI32Trap:
00388   case NVPTXISD::Suld1DI64Trap:
00389   case NVPTXISD::Suld1DV2I8Trap:
00390   case NVPTXISD::Suld1DV2I16Trap:
00391   case NVPTXISD::Suld1DV2I32Trap:
00392   case NVPTXISD::Suld1DV2I64Trap:
00393   case NVPTXISD::Suld1DV4I8Trap:
00394   case NVPTXISD::Suld1DV4I16Trap:
00395   case NVPTXISD::Suld1DV4I32Trap:
00396   case NVPTXISD::Suld1DArrayI8Trap:
00397   case NVPTXISD::Suld1DArrayI16Trap:
00398   case NVPTXISD::Suld1DArrayI32Trap:
00399   case NVPTXISD::Suld1DArrayI64Trap:
00400   case NVPTXISD::Suld1DArrayV2I8Trap:
00401   case NVPTXISD::Suld1DArrayV2I16Trap:
00402   case NVPTXISD::Suld1DArrayV2I32Trap:
00403   case NVPTXISD::Suld1DArrayV2I64Trap:
00404   case NVPTXISD::Suld1DArrayV4I8Trap:
00405   case NVPTXISD::Suld1DArrayV4I16Trap:
00406   case NVPTXISD::Suld1DArrayV4I32Trap:
00407   case NVPTXISD::Suld2DI8Trap:
00408   case NVPTXISD::Suld2DI16Trap:
00409   case NVPTXISD::Suld2DI32Trap:
00410   case NVPTXISD::Suld2DI64Trap:
00411   case NVPTXISD::Suld2DV2I8Trap:
00412   case NVPTXISD::Suld2DV2I16Trap:
00413   case NVPTXISD::Suld2DV2I32Trap:
00414   case NVPTXISD::Suld2DV2I64Trap:
00415   case NVPTXISD::Suld2DV4I8Trap:
00416   case NVPTXISD::Suld2DV4I16Trap:
00417   case NVPTXISD::Suld2DV4I32Trap:
00418   case NVPTXISD::Suld2DArrayI8Trap:
00419   case NVPTXISD::Suld2DArrayI16Trap:
00420   case NVPTXISD::Suld2DArrayI32Trap:
00421   case NVPTXISD::Suld2DArrayI64Trap:
00422   case NVPTXISD::Suld2DArrayV2I8Trap:
00423   case NVPTXISD::Suld2DArrayV2I16Trap:
00424   case NVPTXISD::Suld2DArrayV2I32Trap:
00425   case NVPTXISD::Suld2DArrayV2I64Trap:
00426   case NVPTXISD::Suld2DArrayV4I8Trap:
00427   case NVPTXISD::Suld2DArrayV4I16Trap:
00428   case NVPTXISD::Suld2DArrayV4I32Trap:
00429   case NVPTXISD::Suld3DI8Trap:
00430   case NVPTXISD::Suld3DI16Trap:
00431   case NVPTXISD::Suld3DI32Trap:
00432   case NVPTXISD::Suld3DI64Trap:
00433   case NVPTXISD::Suld3DV2I8Trap:
00434   case NVPTXISD::Suld3DV2I16Trap:
00435   case NVPTXISD::Suld3DV2I32Trap:
00436   case NVPTXISD::Suld3DV2I64Trap:
00437   case NVPTXISD::Suld3DV4I8Trap:
00438   case NVPTXISD::Suld3DV4I16Trap:
00439   case NVPTXISD::Suld3DV4I32Trap:
00440   case NVPTXISD::Suld1DI8Zero:
00441   case NVPTXISD::Suld1DI16Zero:
00442   case NVPTXISD::Suld1DI32Zero:
00443   case NVPTXISD::Suld1DI64Zero:
00444   case NVPTXISD::Suld1DV2I8Zero:
00445   case NVPTXISD::Suld1DV2I16Zero:
00446   case NVPTXISD::Suld1DV2I32Zero:
00447   case NVPTXISD::Suld1DV2I64Zero:
00448   case NVPTXISD::Suld1DV4I8Zero:
00449   case NVPTXISD::Suld1DV4I16Zero:
00450   case NVPTXISD::Suld1DV4I32Zero:
00451   case NVPTXISD::Suld1DArrayI8Zero:
00452   case NVPTXISD::Suld1DArrayI16Zero:
00453   case NVPTXISD::Suld1DArrayI32Zero:
00454   case NVPTXISD::Suld1DArrayI64Zero:
00455   case NVPTXISD::Suld1DArrayV2I8Zero:
00456   case NVPTXISD::Suld1DArrayV2I16Zero:
00457   case NVPTXISD::Suld1DArrayV2I32Zero:
00458   case NVPTXISD::Suld1DArrayV2I64Zero:
00459   case NVPTXISD::Suld1DArrayV4I8Zero:
00460   case NVPTXISD::Suld1DArrayV4I16Zero:
00461   case NVPTXISD::Suld1DArrayV4I32Zero:
00462   case NVPTXISD::Suld2DI8Zero:
00463   case NVPTXISD::Suld2DI16Zero:
00464   case NVPTXISD::Suld2DI32Zero:
00465   case NVPTXISD::Suld2DI64Zero:
00466   case NVPTXISD::Suld2DV2I8Zero:
00467   case NVPTXISD::Suld2DV2I16Zero:
00468   case NVPTXISD::Suld2DV2I32Zero:
00469   case NVPTXISD::Suld2DV2I64Zero:
00470   case NVPTXISD::Suld2DV4I8Zero:
00471   case NVPTXISD::Suld2DV4I16Zero:
00472   case NVPTXISD::Suld2DV4I32Zero:
00473   case NVPTXISD::Suld2DArrayI8Zero:
00474   case NVPTXISD::Suld2DArrayI16Zero:
00475   case NVPTXISD::Suld2DArrayI32Zero:
00476   case NVPTXISD::Suld2DArrayI64Zero:
00477   case NVPTXISD::Suld2DArrayV2I8Zero:
00478   case NVPTXISD::Suld2DArrayV2I16Zero:
00479   case NVPTXISD::Suld2DArrayV2I32Zero:
00480   case NVPTXISD::Suld2DArrayV2I64Zero:
00481   case NVPTXISD::Suld2DArrayV4I8Zero:
00482   case NVPTXISD::Suld2DArrayV4I16Zero:
00483   case NVPTXISD::Suld2DArrayV4I32Zero:
00484   case NVPTXISD::Suld3DI8Zero:
00485   case NVPTXISD::Suld3DI16Zero:
00486   case NVPTXISD::Suld3DI32Zero:
00487   case NVPTXISD::Suld3DI64Zero:
00488   case NVPTXISD::Suld3DV2I8Zero:
00489   case NVPTXISD::Suld3DV2I16Zero:
00490   case NVPTXISD::Suld3DV2I32Zero:
00491   case NVPTXISD::Suld3DV2I64Zero:
00492   case NVPTXISD::Suld3DV4I8Zero:
00493   case NVPTXISD::Suld3DV4I16Zero:
00494   case NVPTXISD::Suld3DV4I32Zero:
00495     ResNode = SelectSurfaceIntrinsic(N);
00496     break;
00497   case ISD::AND:
00498   case ISD::SRA:
00499   case ISD::SRL:
00500     // Try to select BFE
00501     ResNode = SelectBFE(N);
00502     break;
00503   case ISD::ADDRSPACECAST:
00504     ResNode = SelectAddrSpaceCast(N);
00505     break;
00506   default:
00507     break;
00508   }
00509   if (ResNode)
00510     return ResNode;
00511   return SelectCode(N);
00512 }
00513 
00514 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
00515   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
00516   switch (IID) {
00517   default:
00518     return NULL;
00519   case Intrinsic::nvvm_ldg_global_f:
00520   case Intrinsic::nvvm_ldg_global_i:
00521   case Intrinsic::nvvm_ldg_global_p:
00522   case Intrinsic::nvvm_ldu_global_f:
00523   case Intrinsic::nvvm_ldu_global_i:
00524   case Intrinsic::nvvm_ldu_global_p:
00525     return SelectLDGLDU(N);
00526   }
00527 }
00528 
00529 static unsigned int getCodeAddrSpace(MemSDNode *N) {
00530   const Value *Src = N->getMemOperand()->getValue();
00531 
00532   if (!Src)
00533     return NVPTX::PTXLdStInstCode::GENERIC;
00534 
00535   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
00536     switch (PT->getAddressSpace()) {
00537     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
00538     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
00539     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
00540     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
00541     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
00542     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
00543     default: break;
00544     }
00545   }
00546   return NVPTX::PTXLdStInstCode::GENERIC;
00547 }
00548 
00549 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
00550                           unsigned CodeAddrSpace, MachineFunction *F) {
00551   // To use non-coherent caching, the load has to be from global
00552   // memory and we have to prove that the memory area is not written
00553   // to anywhere for the duration of the kernel call, not even after
00554   // the load.
00555   //
00556   // To ensure that there are no writes to the memory, we require the
00557   // underlying pointer to be a noalias (__restrict) kernel parameter
00558   // that is never used for a write. We can only do this for kernel
00559   // functions since from within a device function, we cannot know if
00560   // there were or will be writes to the memory from the caller - or we
00561   // could, but then we would have to do inter-procedural analysis.
00562   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
00563       !isKernelFunction(*F->getFunction())) {
00564     return false;
00565   }
00566 
00567   // We use GetUnderlyingObjects() here instead of
00568   // GetUnderlyingObject() mainly because the former looks through phi
00569   // nodes while the latter does not. We need to look through phi
00570   // nodes to handle pointer induction variables.
00571   SmallVector<Value *, 8> Objs;
00572   GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
00573                        Objs, F->getDataLayout());
00574   for (Value *Obj : Objs) {
00575     auto *A = dyn_cast<const Argument>(Obj);
00576     if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
00577   }
00578 
00579   return true;
00580 }
00581 
00582 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
00583   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
00584   switch (IID) {
00585   default:
00586     return nullptr;
00587   case Intrinsic::nvvm_texsurf_handle_internal:
00588     return SelectTexSurfHandle(N);
00589   }
00590 }
00591 
00592 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
00593   // Op 0 is the intrinsic ID
00594   SDValue Wrapper = N->getOperand(1);
00595   SDValue GlobalVal = Wrapper.getOperand(0);
00596   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
00597                                 GlobalVal);
00598 }
00599 
00600 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
00601   SDValue Src = N->getOperand(0);
00602   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
00603   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
00604   unsigned DstAddrSpace = CastN->getDestAddressSpace();
00605 
00606   assert(SrcAddrSpace != DstAddrSpace &&
00607          "addrspacecast must be between different address spaces");
00608 
00609   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
00610     // Specific to generic
00611     unsigned Opc;
00612     switch (SrcAddrSpace) {
00613     default: report_fatal_error("Bad address space in addrspacecast");
00614     case ADDRESS_SPACE_GLOBAL:
00615       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
00616       break;
00617     case ADDRESS_SPACE_SHARED:
00618       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
00619       break;
00620     case ADDRESS_SPACE_CONST:
00621       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
00622       break;
00623     case ADDRESS_SPACE_LOCAL:
00624       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
00625       break;
00626     }
00627     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
00628   } else {
00629     // Generic to specific
00630     if (SrcAddrSpace != 0)
00631       report_fatal_error("Cannot cast between two non-generic address spaces");
00632     unsigned Opc;
00633     switch (DstAddrSpace) {
00634     default: report_fatal_error("Bad address space in addrspacecast");
00635     case ADDRESS_SPACE_GLOBAL:
00636       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
00637                          : NVPTX::cvta_to_global_yes;
00638       break;
00639     case ADDRESS_SPACE_SHARED:
00640       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
00641                          : NVPTX::cvta_to_shared_yes;
00642       break;
00643     case ADDRESS_SPACE_CONST:
00644       Opc =
00645           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
00646       break;
00647     case ADDRESS_SPACE_LOCAL:
00648       Opc =
00649           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
00650       break;
00651     case ADDRESS_SPACE_PARAM:
00652       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
00653                          : NVPTX::nvvm_ptr_gen_to_param;
00654       break;
00655     }
00656     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
00657   }
00658 }
00659 
00660 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
00661   SDLoc dl(N);
00662   LoadSDNode *LD = cast<LoadSDNode>(N);
00663   EVT LoadedVT = LD->getMemoryVT();
00664   SDNode *NVPTXLD = nullptr;
00665 
00666   // do not support pre/post inc/dec
00667   if (LD->isIndexed())
00668     return nullptr;
00669 
00670   if (!LoadedVT.isSimple())
00671     return nullptr;
00672 
00673   // Address Space Setting
00674   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
00675 
00676   if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
00677     return SelectLDGLDU(N);
00678   }
00679 
00680   // Volatile Setting
00681   // - .volatile is only availalble for .global and .shared
00682   bool isVolatile = LD->isVolatile();
00683   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
00684       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
00685       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
00686     isVolatile = false;
00687 
00688   // Vector Setting
00689   MVT SimpleVT = LoadedVT.getSimpleVT();
00690   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
00691   if (SimpleVT.isVector()) {
00692     unsigned num = SimpleVT.getVectorNumElements();
00693     if (num == 2)
00694       vecType = NVPTX::PTXLdStInstCode::V2;
00695     else if (num == 4)
00696       vecType = NVPTX::PTXLdStInstCode::V4;
00697     else
00698       return nullptr;
00699   }
00700 
00701   // Type Setting: fromType + fromTypeWidth
00702   //
00703   // Sign   : ISD::SEXTLOAD
00704   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
00705   //          type is integer
00706   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
00707   MVT ScalarVT = SimpleVT.getScalarType();
00708   // Read at least 8 bits (predicates are stored as 8-bit values)
00709   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
00710   unsigned int fromType;
00711   if ((LD->getExtensionType() == ISD::SEXTLOAD))
00712     fromType = NVPTX::PTXLdStInstCode::Signed;
00713   else if (ScalarVT.isFloatingPoint())
00714     fromType = NVPTX::PTXLdStInstCode::Float;
00715   else
00716     fromType = NVPTX::PTXLdStInstCode::Unsigned;
00717 
00718   // Create the machine instruction DAG
00719   SDValue Chain = N->getOperand(0);
00720   SDValue N1 = N->getOperand(1);
00721   SDValue Addr;
00722   SDValue Offset, Base;
00723   unsigned Opcode;
00724   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
00725 
00726   if (SelectDirectAddr(N1, Addr)) {
00727     switch (TargetVT) {
00728     case MVT::i8:
00729       Opcode = NVPTX::LD_i8_avar;
00730       break;
00731     case MVT::i16:
00732       Opcode = NVPTX::LD_i16_avar;
00733       break;
00734     case MVT::i32:
00735       Opcode = NVPTX::LD_i32_avar;
00736       break;
00737     case MVT::i64:
00738       Opcode = NVPTX::LD_i64_avar;
00739       break;
00740     case MVT::f32:
00741       Opcode = NVPTX::LD_f32_avar;
00742       break;
00743     case MVT::f64:
00744       Opcode = NVPTX::LD_f64_avar;
00745       break;
00746     default:
00747       return nullptr;
00748     }
00749     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
00750                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
00751                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
00752     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
00753   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
00754                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
00755     switch (TargetVT) {
00756     case MVT::i8:
00757       Opcode = NVPTX::LD_i8_asi;
00758       break;
00759     case MVT::i16:
00760       Opcode = NVPTX::LD_i16_asi;
00761       break;
00762     case MVT::i32:
00763       Opcode = NVPTX::LD_i32_asi;
00764       break;
00765     case MVT::i64:
00766       Opcode = NVPTX::LD_i64_asi;
00767       break;
00768     case MVT::f32:
00769       Opcode = NVPTX::LD_f32_asi;
00770       break;
00771     case MVT::f64:
00772       Opcode = NVPTX::LD_f64_asi;
00773       break;
00774     default:
00775       return nullptr;
00776     }
00777     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
00778                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
00779                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
00780     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
00781   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
00782                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
00783     if (TM.is64Bit()) {
00784       switch (TargetVT) {
00785       case MVT::i8:
00786         Opcode = NVPTX::LD_i8_ari_64;
00787         break;
00788       case MVT::i16:
00789         Opcode = NVPTX::LD_i16_ari_64;
00790         break;
00791       case MVT::i32:
00792         Opcode = NVPTX::LD_i32_ari_64;
00793         break;
00794       case MVT::i64:
00795         Opcode = NVPTX::LD_i64_ari_64;
00796         break;
00797       case MVT::f32:
00798         Opcode = NVPTX::LD_f32_ari_64;
00799         break;
00800       case MVT::f64:
00801         Opcode = NVPTX::LD_f64_ari_64;
00802         break;
00803       default:
00804         return nullptr;
00805       }
00806     } else {
00807       switch (TargetVT) {
00808       case MVT::i8:
00809         Opcode = NVPTX::LD_i8_ari;
00810         break;
00811       case MVT::i16:
00812         Opcode = NVPTX::LD_i16_ari;
00813         break;
00814       case MVT::i32:
00815         Opcode = NVPTX::LD_i32_ari;
00816         break;
00817       case MVT::i64:
00818         Opcode = NVPTX::LD_i64_ari;
00819         break;
00820       case MVT::f32:
00821         Opcode = NVPTX::LD_f32_ari;
00822         break;
00823       case MVT::f64:
00824         Opcode = NVPTX::LD_f64_ari;
00825         break;
00826       default:
00827         return nullptr;
00828       }
00829     }
00830     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
00831                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
00832                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
00833     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
00834   } else {
00835     if (TM.is64Bit()) {
00836       switch (TargetVT) {
00837       case MVT::i8:
00838         Opcode = NVPTX::LD_i8_areg_64;
00839         break;
00840       case MVT::i16:
00841         Opcode = NVPTX::LD_i16_areg_64;
00842         break;
00843       case MVT::i32:
00844         Opcode = NVPTX::LD_i32_areg_64;
00845         break;
00846       case MVT::i64:
00847         Opcode = NVPTX::LD_i64_areg_64;
00848         break;
00849       case MVT::f32:
00850         Opcode = NVPTX::LD_f32_areg_64;
00851         break;
00852       case MVT::f64:
00853         Opcode = NVPTX::LD_f64_areg_64;
00854         break;
00855       default:
00856         return nullptr;
00857       }
00858     } else {
00859       switch (TargetVT) {
00860       case MVT::i8:
00861         Opcode = NVPTX::LD_i8_areg;
00862         break;
00863       case MVT::i16:
00864         Opcode = NVPTX::LD_i16_areg;
00865         break;
00866       case MVT::i32:
00867         Opcode = NVPTX::LD_i32_areg;
00868         break;
00869       case MVT::i64:
00870         Opcode = NVPTX::LD_i64_areg;
00871         break;
00872       case MVT::f32:
00873         Opcode = NVPTX::LD_f32_areg;
00874         break;
00875       case MVT::f64:
00876         Opcode = NVPTX::LD_f64_areg;
00877         break;
00878       default:
00879         return nullptr;
00880       }
00881     }
00882     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
00883                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
00884                       getI32Imm(fromTypeWidth, dl), N1, Chain };
00885     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
00886   }
00887 
00888   if (NVPTXLD) {
00889     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
00890     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
00891     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
00892   }
00893 
00894   return NVPTXLD;
00895 }
00896 
00897 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
00898 
00899   SDValue Chain = N->getOperand(0);
00900   SDValue Op1 = N->getOperand(1);
00901   SDValue Addr, Offset, Base;
00902   unsigned Opcode;
00903   SDLoc DL(N);
00904   SDNode *LD;
00905   MemSDNode *MemSD = cast<MemSDNode>(N);
00906   EVT LoadedVT = MemSD->getMemoryVT();
00907 
00908   if (!LoadedVT.isSimple())
00909     return nullptr;
00910 
00911   // Address Space Setting
00912   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
00913 
00914   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
00915     return SelectLDGLDU(N);
00916   }
00917 
00918   // Volatile Setting
00919   // - .volatile is only availalble for .global and .shared
00920   bool IsVolatile = MemSD->isVolatile();
00921   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
00922       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
00923       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
00924     IsVolatile = false;
00925 
00926   // Vector Setting
00927   MVT SimpleVT = LoadedVT.getSimpleVT();
00928 
00929   // Type Setting: fromType + fromTypeWidth
00930   //
00931   // Sign   : ISD::SEXTLOAD
00932   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
00933   //          type is integer
00934   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
00935   MVT ScalarVT = SimpleVT.getScalarType();
00936   // Read at least 8 bits (predicates are stored as 8-bit values)
00937   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
00938   unsigned int FromType;
00939   // The last operand holds the original LoadSDNode::getExtensionType() value
00940   unsigned ExtensionType = cast<ConstantSDNode>(
00941       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
00942   if (ExtensionType == ISD::SEXTLOAD)
00943     FromType = NVPTX::PTXLdStInstCode::Signed;
00944   else if (ScalarVT.isFloatingPoint())
00945     FromType = NVPTX::PTXLdStInstCode::Float;
00946   else
00947     FromType = NVPTX::PTXLdStInstCode::Unsigned;
00948 
00949   unsigned VecType;
00950 
00951   switch (N->getOpcode()) {
00952   case NVPTXISD::LoadV2:
00953     VecType = NVPTX::PTXLdStInstCode::V2;
00954     break;
00955   case NVPTXISD::LoadV4:
00956     VecType = NVPTX::PTXLdStInstCode::V4;
00957     break;
00958   default:
00959     return nullptr;
00960   }
00961 
00962   EVT EltVT = N->getValueType(0);
00963 
00964   if (SelectDirectAddr(Op1, Addr)) {
00965     switch (N->getOpcode()) {
00966     default:
00967       return nullptr;
00968     case NVPTXISD::LoadV2:
00969       switch (EltVT.getSimpleVT().SimpleTy) {
00970       default:
00971         return nullptr;
00972       case MVT::i8:
00973         Opcode = NVPTX::LDV_i8_v2_avar;
00974         break;
00975       case MVT::i16:
00976         Opcode = NVPTX::LDV_i16_v2_avar;
00977         break;
00978       case MVT::i32:
00979         Opcode = NVPTX::LDV_i32_v2_avar;
00980         break;
00981       case MVT::i64:
00982         Opcode = NVPTX::LDV_i64_v2_avar;
00983         break;
00984       case MVT::f32:
00985         Opcode = NVPTX::LDV_f32_v2_avar;
00986         break;
00987       case MVT::f64:
00988         Opcode = NVPTX::LDV_f64_v2_avar;
00989         break;
00990       }
00991       break;
00992     case NVPTXISD::LoadV4:
00993       switch (EltVT.getSimpleVT().SimpleTy) {
00994       default:
00995         return nullptr;
00996       case MVT::i8:
00997         Opcode = NVPTX::LDV_i8_v4_avar;
00998         break;
00999       case MVT::i16:
01000         Opcode = NVPTX::LDV_i16_v4_avar;
01001         break;
01002       case MVT::i32:
01003         Opcode = NVPTX::LDV_i32_v4_avar;
01004         break;
01005       case MVT::f32:
01006         Opcode = NVPTX::LDV_f32_v4_avar;
01007         break;
01008       }
01009       break;
01010     }
01011 
01012     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
01013                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
01014                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
01015     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01016   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
01017                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
01018     switch (N->getOpcode()) {
01019     default:
01020       return nullptr;
01021     case NVPTXISD::LoadV2:
01022       switch (EltVT.getSimpleVT().SimpleTy) {
01023       default:
01024         return nullptr;
01025       case MVT::i8:
01026         Opcode = NVPTX::LDV_i8_v2_asi;
01027         break;
01028       case MVT::i16:
01029         Opcode = NVPTX::LDV_i16_v2_asi;
01030         break;
01031       case MVT::i32:
01032         Opcode = NVPTX::LDV_i32_v2_asi;
01033         break;
01034       case MVT::i64:
01035         Opcode = NVPTX::LDV_i64_v2_asi;
01036         break;
01037       case MVT::f32:
01038         Opcode = NVPTX::LDV_f32_v2_asi;
01039         break;
01040       case MVT::f64:
01041         Opcode = NVPTX::LDV_f64_v2_asi;
01042         break;
01043       }
01044       break;
01045     case NVPTXISD::LoadV4:
01046       switch (EltVT.getSimpleVT().SimpleTy) {
01047       default:
01048         return nullptr;
01049       case MVT::i8:
01050         Opcode = NVPTX::LDV_i8_v4_asi;
01051         break;
01052       case MVT::i16:
01053         Opcode = NVPTX::LDV_i16_v4_asi;
01054         break;
01055       case MVT::i32:
01056         Opcode = NVPTX::LDV_i32_v4_asi;
01057         break;
01058       case MVT::f32:
01059         Opcode = NVPTX::LDV_f32_v4_asi;
01060         break;
01061       }
01062       break;
01063     }
01064 
01065     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
01066                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
01067                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
01068     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01069   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
01070                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
01071     if (TM.is64Bit()) {
01072       switch (N->getOpcode()) {
01073       default:
01074         return nullptr;
01075       case NVPTXISD::LoadV2:
01076         switch (EltVT.getSimpleVT().SimpleTy) {
01077         default:
01078           return nullptr;
01079         case MVT::i8:
01080           Opcode = NVPTX::LDV_i8_v2_ari_64;
01081           break;
01082         case MVT::i16:
01083           Opcode = NVPTX::LDV_i16_v2_ari_64;
01084           break;
01085         case MVT::i32:
01086           Opcode = NVPTX::LDV_i32_v2_ari_64;
01087           break;
01088         case MVT::i64:
01089           Opcode = NVPTX::LDV_i64_v2_ari_64;
01090           break;
01091         case MVT::f32:
01092           Opcode = NVPTX::LDV_f32_v2_ari_64;
01093           break;
01094         case MVT::f64:
01095           Opcode = NVPTX::LDV_f64_v2_ari_64;
01096           break;
01097         }
01098         break;
01099       case NVPTXISD::LoadV4:
01100         switch (EltVT.getSimpleVT().SimpleTy) {
01101         default:
01102           return nullptr;
01103         case MVT::i8:
01104           Opcode = NVPTX::LDV_i8_v4_ari_64;
01105           break;
01106         case MVT::i16:
01107           Opcode = NVPTX::LDV_i16_v4_ari_64;
01108           break;
01109         case MVT::i32:
01110           Opcode = NVPTX::LDV_i32_v4_ari_64;
01111           break;
01112         case MVT::f32:
01113           Opcode = NVPTX::LDV_f32_v4_ari_64;
01114           break;
01115         }
01116         break;
01117       }
01118     } else {
01119       switch (N->getOpcode()) {
01120       default:
01121         return nullptr;
01122       case NVPTXISD::LoadV2:
01123         switch (EltVT.getSimpleVT().SimpleTy) {
01124         default:
01125           return nullptr;
01126         case MVT::i8:
01127           Opcode = NVPTX::LDV_i8_v2_ari;
01128           break;
01129         case MVT::i16:
01130           Opcode = NVPTX::LDV_i16_v2_ari;
01131           break;
01132         case MVT::i32:
01133           Opcode = NVPTX::LDV_i32_v2_ari;
01134           break;
01135         case MVT::i64:
01136           Opcode = NVPTX::LDV_i64_v2_ari;
01137           break;
01138         case MVT::f32:
01139           Opcode = NVPTX::LDV_f32_v2_ari;
01140           break;
01141         case MVT::f64:
01142           Opcode = NVPTX::LDV_f64_v2_ari;
01143           break;
01144         }
01145         break;
01146       case NVPTXISD::LoadV4:
01147         switch (EltVT.getSimpleVT().SimpleTy) {
01148         default:
01149           return nullptr;
01150         case MVT::i8:
01151           Opcode = NVPTX::LDV_i8_v4_ari;
01152           break;
01153         case MVT::i16:
01154           Opcode = NVPTX::LDV_i16_v4_ari;
01155           break;
01156         case MVT::i32:
01157           Opcode = NVPTX::LDV_i32_v4_ari;
01158           break;
01159         case MVT::f32:
01160           Opcode = NVPTX::LDV_f32_v4_ari;
01161           break;
01162         }
01163         break;
01164       }
01165     }
01166 
01167     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
01168                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
01169                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
01170 
01171     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01172   } else {
01173     if (TM.is64Bit()) {
01174       switch (N->getOpcode()) {
01175       default:
01176         return nullptr;
01177       case NVPTXISD::LoadV2:
01178         switch (EltVT.getSimpleVT().SimpleTy) {
01179         default:
01180           return nullptr;
01181         case MVT::i8:
01182           Opcode = NVPTX::LDV_i8_v2_areg_64;
01183           break;
01184         case MVT::i16:
01185           Opcode = NVPTX::LDV_i16_v2_areg_64;
01186           break;
01187         case MVT::i32:
01188           Opcode = NVPTX::LDV_i32_v2_areg_64;
01189           break;
01190         case MVT::i64:
01191           Opcode = NVPTX::LDV_i64_v2_areg_64;
01192           break;
01193         case MVT::f32:
01194           Opcode = NVPTX::LDV_f32_v2_areg_64;
01195           break;
01196         case MVT::f64:
01197           Opcode = NVPTX::LDV_f64_v2_areg_64;
01198           break;
01199         }
01200         break;
01201       case NVPTXISD::LoadV4:
01202         switch (EltVT.getSimpleVT().SimpleTy) {
01203         default:
01204           return nullptr;
01205         case MVT::i8:
01206           Opcode = NVPTX::LDV_i8_v4_areg_64;
01207           break;
01208         case MVT::i16:
01209           Opcode = NVPTX::LDV_i16_v4_areg_64;
01210           break;
01211         case MVT::i32:
01212           Opcode = NVPTX::LDV_i32_v4_areg_64;
01213           break;
01214         case MVT::f32:
01215           Opcode = NVPTX::LDV_f32_v4_areg_64;
01216           break;
01217         }
01218         break;
01219       }
01220     } else {
01221       switch (N->getOpcode()) {
01222       default:
01223         return nullptr;
01224       case NVPTXISD::LoadV2:
01225         switch (EltVT.getSimpleVT().SimpleTy) {
01226         default:
01227           return nullptr;
01228         case MVT::i8:
01229           Opcode = NVPTX::LDV_i8_v2_areg;
01230           break;
01231         case MVT::i16:
01232           Opcode = NVPTX::LDV_i16_v2_areg;
01233           break;
01234         case MVT::i32:
01235           Opcode = NVPTX::LDV_i32_v2_areg;
01236           break;
01237         case MVT::i64:
01238           Opcode = NVPTX::LDV_i64_v2_areg;
01239           break;
01240         case MVT::f32:
01241           Opcode = NVPTX::LDV_f32_v2_areg;
01242           break;
01243         case MVT::f64:
01244           Opcode = NVPTX::LDV_f64_v2_areg;
01245           break;
01246         }
01247         break;
01248       case NVPTXISD::LoadV4:
01249         switch (EltVT.getSimpleVT().SimpleTy) {
01250         default:
01251           return nullptr;
01252         case MVT::i8:
01253           Opcode = NVPTX::LDV_i8_v4_areg;
01254           break;
01255         case MVT::i16:
01256           Opcode = NVPTX::LDV_i16_v4_areg;
01257           break;
01258         case MVT::i32:
01259           Opcode = NVPTX::LDV_i32_v4_areg;
01260           break;
01261         case MVT::f32:
01262           Opcode = NVPTX::LDV_f32_v4_areg;
01263           break;
01264         }
01265         break;
01266       }
01267     }
01268 
01269     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
01270                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
01271                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
01272     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01273   }
01274 
01275   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
01276   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
01277   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
01278 
01279   return LD;
01280 }
01281 
01282 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
01283 
01284   SDValue Chain = N->getOperand(0);
01285   SDValue Op1;
01286   MemSDNode *Mem;
01287   bool IsLDG = true;
01288 
01289   // If this is an LDG intrinsic, the address is the third operand. Its its an
01290   // LDG/LDU SD node (from custom vector handling), then its the second operand
01291   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
01292     Op1 = N->getOperand(2);
01293     Mem = cast<MemIntrinsicSDNode>(N);
01294     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
01295     switch (IID) {
01296     default:
01297       return NULL;
01298     case Intrinsic::nvvm_ldg_global_f:
01299     case Intrinsic::nvvm_ldg_global_i:
01300     case Intrinsic::nvvm_ldg_global_p:
01301       IsLDG = true;
01302       break;
01303     case Intrinsic::nvvm_ldu_global_f:
01304     case Intrinsic::nvvm_ldu_global_i:
01305     case Intrinsic::nvvm_ldu_global_p:
01306       IsLDG = false;
01307       break;
01308     }
01309   } else {
01310     Op1 = N->getOperand(1);
01311     Mem = cast<MemSDNode>(N);
01312   }
01313 
01314   unsigned Opcode;
01315   SDLoc DL(N);
01316   SDNode *LD;
01317   SDValue Base, Offset, Addr;
01318 
01319   EVT EltVT = Mem->getMemoryVT();
01320   if (EltVT.isVector()) {
01321     EltVT = EltVT.getVectorElementType();
01322   }
01323 
01324   if (SelectDirectAddr(Op1, Addr)) {
01325     switch (N->getOpcode()) {
01326     default:
01327       return nullptr;
01328     case ISD::INTRINSIC_W_CHAIN:
01329       if (IsLDG) {
01330         switch (EltVT.getSimpleVT().SimpleTy) {
01331         default:
01332           return nullptr;
01333         case MVT::i8:
01334           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
01335           break;
01336         case MVT::i16:
01337           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
01338           break;
01339         case MVT::i32:
01340           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
01341           break;
01342         case MVT::i64:
01343           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
01344           break;
01345         case MVT::f32:
01346           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
01347           break;
01348         case MVT::f64:
01349           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
01350           break;
01351         }
01352       } else {
01353         switch (EltVT.getSimpleVT().SimpleTy) {
01354         default:
01355           return nullptr;
01356         case MVT::i8:
01357           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
01358           break;
01359         case MVT::i16:
01360           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
01361           break;
01362         case MVT::i32:
01363           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
01364           break;
01365         case MVT::i64:
01366           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
01367           break;
01368         case MVT::f32:
01369           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
01370           break;
01371         case MVT::f64:
01372           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
01373           break;
01374         }
01375       }
01376       break;
01377     case NVPTXISD::LDGV2:
01378       switch (EltVT.getSimpleVT().SimpleTy) {
01379       default:
01380         return nullptr;
01381       case MVT::i8:
01382         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
01383         break;
01384       case MVT::i16:
01385         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
01386         break;
01387       case MVT::i32:
01388         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
01389         break;
01390       case MVT::i64:
01391         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
01392         break;
01393       case MVT::f32:
01394         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
01395         break;
01396       case MVT::f64:
01397         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
01398         break;
01399       }
01400       break;
01401     case NVPTXISD::LDUV2:
01402       switch (EltVT.getSimpleVT().SimpleTy) {
01403       default:
01404         return nullptr;
01405       case MVT::i8:
01406         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
01407         break;
01408       case MVT::i16:
01409         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
01410         break;
01411       case MVT::i32:
01412         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
01413         break;
01414       case MVT::i64:
01415         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
01416         break;
01417       case MVT::f32:
01418         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
01419         break;
01420       case MVT::f64:
01421         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
01422         break;
01423       }
01424       break;
01425     case NVPTXISD::LDGV4:
01426       switch (EltVT.getSimpleVT().SimpleTy) {
01427       default:
01428         return nullptr;
01429       case MVT::i8:
01430         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
01431         break;
01432       case MVT::i16:
01433         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
01434         break;
01435       case MVT::i32:
01436         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
01437         break;
01438       case MVT::f32:
01439         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
01440         break;
01441       }
01442       break;
01443     case NVPTXISD::LDUV4:
01444       switch (EltVT.getSimpleVT().SimpleTy) {
01445       default:
01446         return nullptr;
01447       case MVT::i8:
01448         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
01449         break;
01450       case MVT::i16:
01451         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
01452         break;
01453       case MVT::i32:
01454         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
01455         break;
01456       case MVT::f32:
01457         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
01458         break;
01459       }
01460       break;
01461     }
01462 
01463     SDValue Ops[] = { Addr, Chain };
01464     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01465   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
01466                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
01467     if (TM.is64Bit()) {
01468       switch (N->getOpcode()) {
01469       default:
01470         return nullptr;
01471       case ISD::LOAD:
01472       case ISD::INTRINSIC_W_CHAIN:
01473         if (IsLDG) {
01474           switch (EltVT.getSimpleVT().SimpleTy) {
01475           default:
01476             return nullptr;
01477           case MVT::i8:
01478             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
01479             break;
01480           case MVT::i16:
01481             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
01482             break;
01483           case MVT::i32:
01484             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
01485             break;
01486           case MVT::i64:
01487             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
01488             break;
01489           case MVT::f32:
01490             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
01491             break;
01492           case MVT::f64:
01493             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
01494             break;
01495           }
01496         } else {
01497           switch (EltVT.getSimpleVT().SimpleTy) {
01498           default:
01499             return nullptr;
01500           case MVT::i8:
01501             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
01502             break;
01503           case MVT::i16:
01504             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
01505             break;
01506           case MVT::i32:
01507             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
01508             break;
01509           case MVT::i64:
01510             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
01511             break;
01512           case MVT::f32:
01513             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
01514             break;
01515           case MVT::f64:
01516             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
01517             break;
01518           }
01519         }
01520         break;
01521       case NVPTXISD::LoadV2:
01522       case NVPTXISD::LDGV2:
01523         switch (EltVT.getSimpleVT().SimpleTy) {
01524         default:
01525           return nullptr;
01526         case MVT::i8:
01527           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
01528           break;
01529         case MVT::i16:
01530           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
01531           break;
01532         case MVT::i32:
01533           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
01534           break;
01535         case MVT::i64:
01536           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
01537           break;
01538         case MVT::f32:
01539           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
01540           break;
01541         case MVT::f64:
01542           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
01543           break;
01544         }
01545         break;
01546       case NVPTXISD::LDUV2:
01547         switch (EltVT.getSimpleVT().SimpleTy) {
01548         default:
01549           return nullptr;
01550         case MVT::i8:
01551           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
01552           break;
01553         case MVT::i16:
01554           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
01555           break;
01556         case MVT::i32:
01557           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
01558           break;
01559         case MVT::i64:
01560           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
01561           break;
01562         case MVT::f32:
01563           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
01564           break;
01565         case MVT::f64:
01566           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
01567           break;
01568         }
01569         break;
01570       case NVPTXISD::LoadV4:
01571       case NVPTXISD::LDGV4:
01572         switch (EltVT.getSimpleVT().SimpleTy) {
01573         default:
01574           return nullptr;
01575         case MVT::i8:
01576           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
01577           break;
01578         case MVT::i16:
01579           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
01580           break;
01581         case MVT::i32:
01582           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
01583           break;
01584         case MVT::f32:
01585           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
01586           break;
01587         }
01588         break;
01589       case NVPTXISD::LDUV4:
01590         switch (EltVT.getSimpleVT().SimpleTy) {
01591         default:
01592           return nullptr;
01593         case MVT::i8:
01594           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
01595           break;
01596         case MVT::i16:
01597           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
01598           break;
01599         case MVT::i32:
01600           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
01601           break;
01602         case MVT::f32:
01603           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
01604           break;
01605         }
01606         break;
01607       }
01608     } else {
01609       switch (N->getOpcode()) {
01610       default:
01611         return nullptr;
01612       case ISD::LOAD:
01613       case ISD::INTRINSIC_W_CHAIN:
01614         if (IsLDG) {
01615           switch (EltVT.getSimpleVT().SimpleTy) {
01616           default:
01617             return nullptr;
01618           case MVT::i8:
01619             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
01620             break;
01621           case MVT::i16:
01622             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
01623             break;
01624           case MVT::i32:
01625             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
01626             break;
01627           case MVT::i64:
01628             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
01629             break;
01630           case MVT::f32:
01631             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
01632             break;
01633           case MVT::f64:
01634             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
01635             break;
01636           }
01637         } else {
01638           switch (EltVT.getSimpleVT().SimpleTy) {
01639           default:
01640             return nullptr;
01641           case MVT::i8:
01642             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
01643             break;
01644           case MVT::i16:
01645             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
01646             break;
01647           case MVT::i32:
01648             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
01649             break;
01650           case MVT::i64:
01651             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
01652             break;
01653           case MVT::f32:
01654             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
01655             break;
01656           case MVT::f64:
01657             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
01658             break;
01659           }
01660         }
01661         break;
01662       case NVPTXISD::LoadV2:
01663       case NVPTXISD::LDGV2:
01664         switch (EltVT.getSimpleVT().SimpleTy) {
01665         default:
01666           return nullptr;
01667         case MVT::i8:
01668           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
01669           break;
01670         case MVT::i16:
01671           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
01672           break;
01673         case MVT::i32:
01674           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
01675           break;
01676         case MVT::i64:
01677           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
01678           break;
01679         case MVT::f32:
01680           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
01681           break;
01682         case MVT::f64:
01683           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
01684           break;
01685         }
01686         break;
01687       case NVPTXISD::LDUV2:
01688         switch (EltVT.getSimpleVT().SimpleTy) {
01689         default:
01690           return nullptr;
01691         case MVT::i8:
01692           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
01693           break;
01694         case MVT::i16:
01695           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
01696           break;
01697         case MVT::i32:
01698           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
01699           break;
01700         case MVT::i64:
01701           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
01702           break;
01703         case MVT::f32:
01704           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
01705           break;
01706         case MVT::f64:
01707           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
01708           break;
01709         }
01710         break;
01711       case NVPTXISD::LoadV4:
01712       case NVPTXISD::LDGV4:
01713         switch (EltVT.getSimpleVT().SimpleTy) {
01714         default:
01715           return nullptr;
01716         case MVT::i8:
01717           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
01718           break;
01719         case MVT::i16:
01720           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
01721           break;
01722         case MVT::i32:
01723           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
01724           break;
01725         case MVT::f32:
01726           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
01727           break;
01728         }
01729         break;
01730       case NVPTXISD::LDUV4:
01731         switch (EltVT.getSimpleVT().SimpleTy) {
01732         default:
01733           return nullptr;
01734         case MVT::i8:
01735           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
01736           break;
01737         case MVT::i16:
01738           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
01739           break;
01740         case MVT::i32:
01741           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
01742           break;
01743         case MVT::f32:
01744           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
01745           break;
01746         }
01747         break;
01748       }
01749     }
01750 
01751     SDValue Ops[] = { Base, Offset, Chain };
01752 
01753     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
01754   } else {
01755     if (TM.is64Bit()) {
01756       switch (N->getOpcode()) {
01757       default:
01758         return nullptr;
01759       case ISD::LOAD:
01760       case ISD::INTRINSIC_W_CHAIN:
01761         if (IsLDG) {
01762           switch (EltVT.getSimpleVT().SimpleTy) {
01763           default:
01764             return nullptr;
01765           case MVT::i8:
01766             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
01767             break;
01768           case MVT::i16:
01769             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
01770             break;
01771           case MVT::i32:
01772             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
01773             break;
01774           case MVT::i64:
01775             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
01776             break;
01777           case MVT::f32:
01778             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
01779             break;
01780           case MVT::f64:
01781             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
01782             break;
01783           }
01784         } else {
01785           switch (EltVT.getSimpleVT().SimpleTy) {
01786           default:
01787             return nullptr;
01788           case MVT::i8:
01789             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
01790             break;
01791           case MVT::i16:
01792             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
01793             break;
01794           case MVT::i32:
01795             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
01796             break;
01797           case MVT::i64:
01798             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
01799             break;
01800           case MVT::f32:
01801             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
01802             break;
01803           case MVT::f64:
01804             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
01805             break;
01806           }
01807         }
01808         break;
01809       case NVPTXISD::LoadV2:
01810       case NVPTXISD::LDGV2:
01811         switch (EltVT.getSimpleVT().SimpleTy) {
01812         default:
01813           return nullptr;
01814         case MVT::i8:
01815           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
01816           break;
01817         case MVT::i16:
01818           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
01819           break;
01820         case MVT::i32:
01821           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
01822           break;
01823         case MVT::i64:
01824           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
01825           break;
01826         case MVT::f32:
01827           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
01828           break;
01829         case MVT::f64:
01830           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
01831           break;
01832         }
01833         break;
01834       case NVPTXISD::LDUV2:
01835         switch (EltVT.getSimpleVT().SimpleTy) {
01836         default:
01837           return nullptr;
01838         case MVT::i8:
01839           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
01840           break;
01841         case MVT::i16:
01842           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
01843           break;
01844         case MVT::i32:
01845           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
01846           break;
01847         case MVT::i64:
01848           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
01849           break;
01850         case MVT::f32:
01851           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
01852           break;
01853         case MVT::f64:
01854           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
01855           break;
01856         }
01857         break;
01858       case NVPTXISD::LoadV4:
01859       case NVPTXISD::LDGV4:
01860         switch (EltVT.getSimpleVT().SimpleTy) {
01861         default:
01862           return nullptr;
01863         case MVT::i8:
01864           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
01865           break;
01866         case MVT::i16:
01867           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
01868           break;
01869         case MVT::i32:
01870           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
01871           break;
01872         case MVT::f32:
01873           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
01874           break;
01875         }
01876         break;
01877       case NVPTXISD::LDUV4:
01878         switch (EltVT.getSimpleVT().SimpleTy) {
01879         default:
01880           return nullptr;
01881         case MVT::i8:
01882           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
01883           break;
01884         case MVT::i16:
01885           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
01886           break;
01887         case MVT::i32:
01888           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
01889           break;
01890         case MVT::f32:
01891           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
01892           break;
01893         }
01894         break;
01895       }
01896     } else {
01897       switch (N->getOpcode()) {
01898       default:
01899         return nullptr;
01900       case ISD::LOAD:
01901       case ISD::INTRINSIC_W_CHAIN:
01902         if (IsLDG) {
01903           switch (EltVT.getSimpleVT().SimpleTy) {
01904           default:
01905             return nullptr;
01906           case MVT::i8:
01907             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
01908             break;
01909           case MVT::i16:
01910             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
01911             break;
01912           case MVT::i32:
01913             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
01914             break;
01915           case MVT::i64:
01916             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
01917             break;
01918           case MVT::f32:
01919             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
01920             break;
01921           case MVT::f64:
01922             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
01923             break;
01924           }
01925         } else {
01926           switch (EltVT.getSimpleVT().SimpleTy) {
01927           default:
01928             return nullptr;
01929           case MVT::i8:
01930             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
01931             break;
01932           case MVT::i16:
01933             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
01934             break;
01935           case MVT::i32:
01936             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
01937             break;
01938           case MVT::i64:
01939             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
01940             break;
01941           case MVT::f32:
01942             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
01943             break;
01944           case MVT::f64:
01945             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
01946             break;
01947           }
01948         }
01949         break;
01950       case NVPTXISD::LoadV2:
01951       case NVPTXISD::LDGV2:
01952         switch (EltVT.getSimpleVT().SimpleTy) {
01953         default:
01954           return nullptr;
01955         case MVT::i8:
01956           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
01957           break;
01958         case MVT::i16:
01959           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
01960           break;
01961         case MVT::i32:
01962           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
01963           break;
01964         case MVT::i64:
01965           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
01966           break;
01967         case MVT::f32:
01968           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
01969           break;
01970         case MVT::f64:
01971           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
01972           break;
01973         }
01974         break;
01975       case NVPTXISD::LDUV2:
01976         switch (EltVT.getSimpleVT().SimpleTy) {
01977         default:
01978           return nullptr;
01979         case MVT::i8:
01980           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
01981           break;
01982         case MVT::i16:
01983           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
01984           break;
01985         case MVT::i32:
01986           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
01987           break;
01988         case MVT::i64:
01989           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
01990           break;
01991         case MVT::f32:
01992           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
01993           break;
01994         case MVT::f64:
01995           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
01996           break;
01997         }
01998         break;
01999       case NVPTXISD::LoadV4:
02000       case NVPTXISD::LDGV4:
02001         switch (EltVT.getSimpleVT().SimpleTy) {
02002         default:
02003           return nullptr;
02004         case MVT::i8:
02005           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
02006           break;
02007         case MVT::i16:
02008           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
02009           break;
02010         case MVT::i32:
02011           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
02012           break;
02013         case MVT::f32:
02014           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
02015           break;
02016         }
02017         break;
02018       case NVPTXISD::LDUV4:
02019         switch (EltVT.getSimpleVT().SimpleTy) {
02020         default:
02021           return nullptr;
02022         case MVT::i8:
02023           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
02024           break;
02025         case MVT::i16:
02026           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
02027           break;
02028         case MVT::i32:
02029           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
02030           break;
02031         case MVT::f32:
02032           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
02033           break;
02034         }
02035         break;
02036       }
02037     }
02038 
02039     SDValue Ops[] = { Op1, Chain };
02040     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
02041   }
02042 
02043   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
02044   MemRefs0[0] = Mem->getMemOperand();
02045   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
02046 
02047   return LD;
02048 }
02049 
02050 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
02051   SDLoc dl(N);
02052   StoreSDNode *ST = cast<StoreSDNode>(N);
02053   EVT StoreVT = ST->getMemoryVT();
02054   SDNode *NVPTXST = nullptr;
02055 
02056   // do not support pre/post inc/dec
02057   if (ST->isIndexed())
02058     return nullptr;
02059 
02060   if (!StoreVT.isSimple())
02061     return nullptr;
02062 
02063   // Address Space Setting
02064   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
02065 
02066   // Volatile Setting
02067   // - .volatile is only availalble for .global and .shared
02068   bool isVolatile = ST->isVolatile();
02069   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
02070       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
02071       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
02072     isVolatile = false;
02073 
02074   // Vector Setting
02075   MVT SimpleVT = StoreVT.getSimpleVT();
02076   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
02077   if (SimpleVT.isVector()) {
02078     unsigned num = SimpleVT.getVectorNumElements();
02079     if (num == 2)
02080       vecType = NVPTX::PTXLdStInstCode::V2;
02081     else if (num == 4)
02082       vecType = NVPTX::PTXLdStInstCode::V4;
02083     else
02084       return nullptr;
02085   }
02086 
02087   // Type Setting: toType + toTypeWidth
02088   // - for integer type, always use 'u'
02089   //
02090   MVT ScalarVT = SimpleVT.getScalarType();
02091   unsigned toTypeWidth = ScalarVT.getSizeInBits();
02092   unsigned int toType;
02093   if (ScalarVT.isFloatingPoint())
02094     toType = NVPTX::PTXLdStInstCode::Float;
02095   else
02096     toType = NVPTX::PTXLdStInstCode::Unsigned;
02097 
02098   // Create the machine instruction DAG
02099   SDValue Chain = N->getOperand(0);
02100   SDValue N1 = N->getOperand(1);
02101   SDValue N2 = N->getOperand(2);
02102   SDValue Addr;
02103   SDValue Offset, Base;
02104   unsigned Opcode;
02105   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
02106 
02107   if (SelectDirectAddr(N2, Addr)) {
02108     switch (SourceVT) {
02109     case MVT::i8:
02110       Opcode = NVPTX::ST_i8_avar;
02111       break;
02112     case MVT::i16:
02113       Opcode = NVPTX::ST_i16_avar;
02114       break;
02115     case MVT::i32:
02116       Opcode = NVPTX::ST_i32_avar;
02117       break;
02118     case MVT::i64:
02119       Opcode = NVPTX::ST_i64_avar;
02120       break;
02121     case MVT::f32:
02122       Opcode = NVPTX::ST_f32_avar;
02123       break;
02124     case MVT::f64:
02125       Opcode = NVPTX::ST_f64_avar;
02126       break;
02127     default:
02128       return nullptr;
02129     }
02130     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
02131                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
02132                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
02133                       Chain };
02134     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
02135   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
02136                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
02137     switch (SourceVT) {
02138     case MVT::i8:
02139       Opcode = NVPTX::ST_i8_asi;
02140       break;
02141     case MVT::i16:
02142       Opcode = NVPTX::ST_i16_asi;
02143       break;
02144     case MVT::i32:
02145       Opcode = NVPTX::ST_i32_asi;
02146       break;
02147     case MVT::i64:
02148       Opcode = NVPTX::ST_i64_asi;
02149       break;
02150     case MVT::f32:
02151       Opcode = NVPTX::ST_f32_asi;
02152       break;
02153     case MVT::f64:
02154       Opcode = NVPTX::ST_f64_asi;
02155       break;
02156     default:
02157       return nullptr;
02158     }
02159     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
02160                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
02161                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
02162                       Offset, Chain };
02163     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
02164   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
02165                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
02166     if (TM.is64Bit()) {
02167       switch (SourceVT) {
02168       case MVT::i8:
02169         Opcode = NVPTX::ST_i8_ari_64;
02170         break;
02171       case MVT::i16:
02172         Opcode = NVPTX::ST_i16_ari_64;
02173         break;
02174       case MVT::i32:
02175         Opcode = NVPTX::ST_i32_ari_64;
02176         break;
02177       case MVT::i64:
02178         Opcode = NVPTX::ST_i64_ari_64;
02179         break;
02180       case MVT::f32:
02181         Opcode = NVPTX::ST_f32_ari_64;
02182         break;
02183       case MVT::f64:
02184         Opcode = NVPTX::ST_f64_ari_64;
02185         break;
02186       default:
02187         return nullptr;
02188       }
02189     } else {
02190       switch (SourceVT) {
02191       case MVT::i8:
02192         Opcode = NVPTX::ST_i8_ari;
02193         break;
02194       case MVT::i16:
02195         Opcode = NVPTX::ST_i16_ari;
02196         break;
02197       case MVT::i32:
02198         Opcode = NVPTX::ST_i32_ari;
02199         break;
02200       case MVT::i64:
02201         Opcode = NVPTX::ST_i64_ari;
02202         break;
02203       case MVT::f32:
02204         Opcode = NVPTX::ST_f32_ari;
02205         break;
02206       case MVT::f64:
02207         Opcode = NVPTX::ST_f64_ari;
02208         break;
02209       default:
02210         return nullptr;
02211       }
02212     }
02213     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
02214                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
02215                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
02216                       Offset, Chain };
02217     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
02218   } else {
02219     if (TM.is64Bit()) {
02220       switch (SourceVT) {
02221       case MVT::i8:
02222         Opcode = NVPTX::ST_i8_areg_64;
02223         break;
02224       case MVT::i16:
02225         Opcode = NVPTX::ST_i16_areg_64;
02226         break;
02227       case MVT::i32:
02228         Opcode = NVPTX::ST_i32_areg_64;
02229         break;
02230       case MVT::i64:
02231         Opcode = NVPTX::ST_i64_areg_64;
02232         break;
02233       case MVT::f32:
02234         Opcode = NVPTX::ST_f32_areg_64;
02235         break;
02236       case MVT::f64:
02237         Opcode = NVPTX::ST_f64_areg_64;
02238         break;
02239       default:
02240         return nullptr;
02241       }
02242     } else {
02243       switch (SourceVT) {
02244       case MVT::i8:
02245         Opcode = NVPTX::ST_i8_areg;
02246         break;
02247       case MVT::i16:
02248         Opcode = NVPTX::ST_i16_areg;
02249         break;
02250       case MVT::i32:
02251         Opcode = NVPTX::ST_i32_areg;
02252         break;
02253       case MVT::i64:
02254         Opcode = NVPTX::ST_i64_areg;
02255         break;
02256       case MVT::f32:
02257         Opcode = NVPTX::ST_f32_areg;
02258         break;
02259       case MVT::f64:
02260         Opcode = NVPTX::ST_f64_areg;
02261         break;
02262       default:
02263         return nullptr;
02264       }
02265     }
02266     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
02267                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
02268                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
02269                       Chain };
02270     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
02271   }
02272 
02273   if (NVPTXST) {
02274     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
02275     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
02276     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
02277   }
02278 
02279   return NVPTXST;
02280 }
02281 
02282 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
02283   SDValue Chain = N->getOperand(0);
02284   SDValue Op1 = N->getOperand(1);
02285   SDValue Addr, Offset, Base;
02286   unsigned Opcode;
02287   SDLoc DL(N);
02288   SDNode *ST;
02289   EVT EltVT = Op1.getValueType();
02290   MemSDNode *MemSD = cast<MemSDNode>(N);
02291   EVT StoreVT = MemSD->getMemoryVT();
02292 
02293   // Address Space Setting
02294   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
02295 
02296   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
02297     report_fatal_error("Cannot store to pointer that points to constant "
02298                        "memory space");
02299   }
02300 
02301   // Volatile Setting
02302   // - .volatile is only availalble for .global and .shared
02303   bool IsVolatile = MemSD->isVolatile();
02304   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
02305       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
02306       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
02307     IsVolatile = false;
02308 
02309   // Type Setting: toType + toTypeWidth
02310   // - for integer type, always use 'u'
02311   assert(StoreVT.isSimple() && "Store value is not simple");
02312   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
02313   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
02314   unsigned ToType;
02315   if (ScalarVT.isFloatingPoint())
02316     ToType = NVPTX::PTXLdStInstCode::Float;
02317   else
02318     ToType = NVPTX::PTXLdStInstCode::Unsigned;
02319 
02320   SmallVector<SDValue, 12> StOps;
02321   SDValue N2;
02322   unsigned VecType;
02323 
02324   switch (N->getOpcode()) {
02325   case NVPTXISD::StoreV2:
02326     VecType = NVPTX::PTXLdStInstCode::V2;
02327     StOps.push_back(N->getOperand(1));
02328     StOps.push_back(N->getOperand(2));
02329     N2 = N->getOperand(3);
02330     break;
02331   case NVPTXISD::StoreV4:
02332     VecType = NVPTX::PTXLdStInstCode::V4;
02333     StOps.push_back(N->getOperand(1));
02334     StOps.push_back(N->getOperand(2));
02335     StOps.push_back(N->getOperand(3));
02336     StOps.push_back(N->getOperand(4));
02337     N2 = N->getOperand(5);
02338     break;
02339   default:
02340     return nullptr;
02341   }
02342 
02343   StOps.push_back(getI32Imm(IsVolatile, DL));
02344   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
02345   StOps.push_back(getI32Imm(VecType, DL));
02346   StOps.push_back(getI32Imm(ToType, DL));
02347   StOps.push_back(getI32Imm(ToTypeWidth, DL));
02348 
02349   if (SelectDirectAddr(N2, Addr)) {
02350     switch (N->getOpcode()) {
02351     default:
02352       return nullptr;
02353     case NVPTXISD::StoreV2:
02354       switch (EltVT.getSimpleVT().SimpleTy) {
02355       default:
02356         return nullptr;
02357       case MVT::i8:
02358         Opcode = NVPTX::STV_i8_v2_avar;
02359         break;
02360       case MVT::i16:
02361         Opcode = NVPTX::STV_i16_v2_avar;
02362         break;
02363       case MVT::i32:
02364         Opcode = NVPTX::STV_i32_v2_avar;
02365         break;
02366       case MVT::i64:
02367         Opcode = NVPTX::STV_i64_v2_avar;
02368         break;
02369       case MVT::f32:
02370         Opcode = NVPTX::STV_f32_v2_avar;
02371         break;
02372       case MVT::f64:
02373         Opcode = NVPTX::STV_f64_v2_avar;
02374         break;
02375       }
02376       break;
02377     case NVPTXISD::StoreV4:
02378       switch (EltVT.getSimpleVT().SimpleTy) {
02379       default:
02380         return nullptr;
02381       case MVT::i8:
02382         Opcode = NVPTX::STV_i8_v4_avar;
02383         break;
02384       case MVT::i16:
02385         Opcode = NVPTX::STV_i16_v4_avar;
02386         break;
02387       case MVT::i32:
02388         Opcode = NVPTX::STV_i32_v4_avar;
02389         break;
02390       case MVT::f32:
02391         Opcode = NVPTX::STV_f32_v4_avar;
02392         break;
02393       }
02394       break;
02395     }
02396     StOps.push_back(Addr);
02397   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
02398                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
02399     switch (N->getOpcode()) {
02400     default:
02401       return nullptr;
02402     case NVPTXISD::StoreV2:
02403       switch (EltVT.getSimpleVT().SimpleTy) {
02404       default:
02405         return nullptr;
02406       case MVT::i8:
02407         Opcode = NVPTX::STV_i8_v2_asi;
02408         break;
02409       case MVT::i16:
02410         Opcode = NVPTX::STV_i16_v2_asi;
02411         break;
02412       case MVT::i32:
02413         Opcode = NVPTX::STV_i32_v2_asi;
02414         break;
02415       case MVT::i64:
02416         Opcode = NVPTX::STV_i64_v2_asi;
02417         break;
02418       case MVT::f32:
02419         Opcode = NVPTX::STV_f32_v2_asi;
02420         break;
02421       case MVT::f64:
02422         Opcode = NVPTX::STV_f64_v2_asi;
02423         break;
02424       }
02425       break;
02426     case NVPTXISD::StoreV4:
02427       switch (EltVT.getSimpleVT().SimpleTy) {
02428       default:
02429         return nullptr;
02430       case MVT::i8:
02431         Opcode = NVPTX::STV_i8_v4_asi;
02432         break;
02433       case MVT::i16:
02434         Opcode = NVPTX::STV_i16_v4_asi;
02435         break;
02436       case MVT::i32:
02437         Opcode = NVPTX::STV_i32_v4_asi;
02438         break;
02439       case MVT::f32:
02440         Opcode = NVPTX::STV_f32_v4_asi;
02441         break;
02442       }
02443       break;
02444     }
02445     StOps.push_back(Base);
02446     StOps.push_back(Offset);
02447   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
02448                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
02449     if (TM.is64Bit()) {
02450       switch (N->getOpcode()) {
02451       default:
02452         return nullptr;
02453       case NVPTXISD::StoreV2:
02454         switch (EltVT.getSimpleVT().SimpleTy) {
02455         default:
02456           return nullptr;
02457         case MVT::i8:
02458           Opcode = NVPTX::STV_i8_v2_ari_64;
02459           break;
02460         case MVT::i16:
02461           Opcode = NVPTX::STV_i16_v2_ari_64;
02462           break;
02463         case MVT::i32:
02464           Opcode = NVPTX::STV_i32_v2_ari_64;
02465           break;
02466         case MVT::i64:
02467           Opcode = NVPTX::STV_i64_v2_ari_64;
02468           break;
02469         case MVT::f32:
02470           Opcode = NVPTX::STV_f32_v2_ari_64;
02471           break;
02472         case MVT::f64:
02473           Opcode = NVPTX::STV_f64_v2_ari_64;
02474           break;
02475         }
02476         break;
02477       case NVPTXISD::StoreV4:
02478         switch (EltVT.getSimpleVT().SimpleTy) {
02479         default:
02480           return nullptr;
02481         case MVT::i8:
02482           Opcode = NVPTX::STV_i8_v4_ari_64;
02483           break;
02484         case MVT::i16:
02485           Opcode = NVPTX::STV_i16_v4_ari_64;
02486           break;
02487         case MVT::i32:
02488           Opcode = NVPTX::STV_i32_v4_ari_64;
02489           break;
02490         case MVT::f32:
02491           Opcode = NVPTX::STV_f32_v4_ari_64;
02492           break;
02493         }
02494         break;
02495       }
02496     } else {
02497       switch (N->getOpcode()) {
02498       default:
02499         return nullptr;
02500       case NVPTXISD::StoreV2:
02501         switch (EltVT.getSimpleVT().SimpleTy) {
02502         default:
02503           return nullptr;
02504         case MVT::i8:
02505           Opcode = NVPTX::STV_i8_v2_ari;
02506           break;
02507         case MVT::i16:
02508           Opcode = NVPTX::STV_i16_v2_ari;
02509           break;
02510         case MVT::i32:
02511           Opcode = NVPTX::STV_i32_v2_ari;
02512           break;
02513         case MVT::i64:
02514           Opcode = NVPTX::STV_i64_v2_ari;
02515           break;
02516         case MVT::f32:
02517           Opcode = NVPTX::STV_f32_v2_ari;
02518           break;
02519         case MVT::f64:
02520           Opcode = NVPTX::STV_f64_v2_ari;
02521           break;
02522         }
02523         break;
02524       case NVPTXISD::StoreV4:
02525         switch (EltVT.getSimpleVT().SimpleTy) {
02526         default:
02527           return nullptr;
02528         case MVT::i8:
02529           Opcode = NVPTX::STV_i8_v4_ari;
02530           break;
02531         case MVT::i16:
02532           Opcode = NVPTX::STV_i16_v4_ari;
02533           break;
02534         case MVT::i32:
02535           Opcode = NVPTX::STV_i32_v4_ari;
02536           break;
02537         case MVT::f32:
02538           Opcode = NVPTX::STV_f32_v4_ari;
02539           break;
02540         }
02541         break;
02542       }
02543     }
02544     StOps.push_back(Base);
02545     StOps.push_back(Offset);
02546   } else {
02547     if (TM.is64Bit()) {
02548       switch (N->getOpcode()) {
02549       default:
02550         return nullptr;
02551       case NVPTXISD::StoreV2:
02552         switch (EltVT.getSimpleVT().SimpleTy) {
02553         default:
02554           return nullptr;
02555         case MVT::i8:
02556           Opcode = NVPTX::STV_i8_v2_areg_64;
02557           break;
02558         case MVT::i16:
02559           Opcode = NVPTX::STV_i16_v2_areg_64;
02560           break;
02561         case MVT::i32:
02562           Opcode = NVPTX::STV_i32_v2_areg_64;
02563           break;
02564         case MVT::i64:
02565           Opcode = NVPTX::STV_i64_v2_areg_64;
02566           break;
02567         case MVT::f32:
02568           Opcode = NVPTX::STV_f32_v2_areg_64;
02569           break;
02570         case MVT::f64:
02571           Opcode = NVPTX::STV_f64_v2_areg_64;
02572           break;
02573         }
02574         break;
02575       case NVPTXISD::StoreV4:
02576         switch (EltVT.getSimpleVT().SimpleTy) {
02577         default:
02578           return nullptr;
02579         case MVT::i8:
02580           Opcode = NVPTX::STV_i8_v4_areg_64;
02581           break;
02582         case MVT::i16:
02583           Opcode = NVPTX::STV_i16_v4_areg_64;
02584           break;
02585         case MVT::i32:
02586           Opcode = NVPTX::STV_i32_v4_areg_64;
02587           break;
02588         case MVT::f32:
02589           Opcode = NVPTX::STV_f32_v4_areg_64;
02590           break;
02591         }
02592         break;
02593       }
02594     } else {
02595       switch (N->getOpcode()) {
02596       default:
02597         return nullptr;
02598       case NVPTXISD::StoreV2:
02599         switch (EltVT.getSimpleVT().SimpleTy) {
02600         default:
02601           return nullptr;
02602         case MVT::i8:
02603           Opcode = NVPTX::STV_i8_v2_areg;
02604           break;
02605         case MVT::i16:
02606           Opcode = NVPTX::STV_i16_v2_areg;
02607           break;
02608         case MVT::i32:
02609           Opcode = NVPTX::STV_i32_v2_areg;
02610           break;
02611         case MVT::i64:
02612           Opcode = NVPTX::STV_i64_v2_areg;
02613           break;
02614         case MVT::f32:
02615           Opcode = NVPTX::STV_f32_v2_areg;
02616           break;
02617         case MVT::f64:
02618           Opcode = NVPTX::STV_f64_v2_areg;
02619           break;
02620         }
02621         break;
02622       case NVPTXISD::StoreV4:
02623         switch (EltVT.getSimpleVT().SimpleTy) {
02624         default:
02625           return nullptr;
02626         case MVT::i8:
02627           Opcode = NVPTX::STV_i8_v4_areg;
02628           break;
02629         case MVT::i16:
02630           Opcode = NVPTX::STV_i16_v4_areg;
02631           break;
02632         case MVT::i32:
02633           Opcode = NVPTX::STV_i32_v4_areg;
02634           break;
02635         case MVT::f32:
02636           Opcode = NVPTX::STV_f32_v4_areg;
02637           break;
02638         }
02639         break;
02640       }
02641     }
02642     StOps.push_back(N2);
02643   }
02644 
02645   StOps.push_back(Chain);
02646 
02647   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
02648 
02649   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
02650   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
02651   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
02652 
02653   return ST;
02654 }
02655 
02656 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
02657   SDValue Chain = Node->getOperand(0);
02658   SDValue Offset = Node->getOperand(2);
02659   SDValue Flag = Node->getOperand(3);
02660   SDLoc DL(Node);
02661   MemSDNode *Mem = cast<MemSDNode>(Node);
02662 
02663   unsigned VecSize;
02664   switch (Node->getOpcode()) {
02665   default:
02666     return nullptr;
02667   case NVPTXISD::LoadParam:
02668     VecSize = 1;
02669     break;
02670   case NVPTXISD::LoadParamV2:
02671     VecSize = 2;
02672     break;
02673   case NVPTXISD::LoadParamV4:
02674     VecSize = 4;
02675     break;
02676   }
02677 
02678   EVT EltVT = Node->getValueType(0);
02679   EVT MemVT = Mem->getMemoryVT();
02680 
02681   unsigned Opc = 0;
02682 
02683   switch (VecSize) {
02684   default:
02685     return nullptr;
02686   case 1:
02687     switch (MemVT.getSimpleVT().SimpleTy) {
02688     default:
02689       return nullptr;
02690     case MVT::i1:
02691       Opc = NVPTX::LoadParamMemI8;
02692       break;
02693     case MVT::i8:
02694       Opc = NVPTX::LoadParamMemI8;
02695       break;
02696     case MVT::i16:
02697       Opc = NVPTX::LoadParamMemI16;
02698       break;
02699     case MVT::i32:
02700       Opc = NVPTX::LoadParamMemI32;
02701       break;
02702     case MVT::i64:
02703       Opc = NVPTX::LoadParamMemI64;
02704       break;
02705     case MVT::f32:
02706       Opc = NVPTX::LoadParamMemF32;
02707       break;
02708     case MVT::f64:
02709       Opc = NVPTX::LoadParamMemF64;
02710       break;
02711     }
02712     break;
02713   case 2:
02714     switch (MemVT.getSimpleVT().SimpleTy) {
02715     default:
02716       return nullptr;
02717     case MVT::i1:
02718       Opc = NVPTX::LoadParamMemV2I8;
02719       break;
02720     case MVT::i8:
02721       Opc = NVPTX::LoadParamMemV2I8;
02722       break;
02723     case MVT::i16:
02724       Opc = NVPTX::LoadParamMemV2I16;
02725       break;
02726     case MVT::i32:
02727       Opc = NVPTX::LoadParamMemV2I32;
02728       break;
02729     case MVT::i64:
02730       Opc = NVPTX::LoadParamMemV2I64;
02731       break;
02732     case MVT::f32:
02733       Opc = NVPTX::LoadParamMemV2F32;
02734       break;
02735     case MVT::f64:
02736       Opc = NVPTX::LoadParamMemV2F64;
02737       break;
02738     }
02739     break;
02740   case 4:
02741     switch (MemVT.getSimpleVT().SimpleTy) {
02742     default:
02743       return nullptr;
02744     case MVT::i1:
02745       Opc = NVPTX::LoadParamMemV4I8;
02746       break;
02747     case MVT::i8:
02748       Opc = NVPTX::LoadParamMemV4I8;
02749       break;
02750     case MVT::i16:
02751       Opc = NVPTX::LoadParamMemV4I16;
02752       break;
02753     case MVT::i32:
02754       Opc = NVPTX::LoadParamMemV4I32;
02755       break;
02756     case MVT::f32:
02757       Opc = NVPTX::LoadParamMemV4F32;
02758       break;
02759     }
02760     break;
02761   }
02762 
02763   SDVTList VTs;
02764   if (VecSize == 1) {
02765     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
02766   } else if (VecSize == 2) {
02767     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
02768   } else {
02769     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
02770     VTs = CurDAG->getVTList(EVTs);
02771   }
02772 
02773   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
02774 
02775   SmallVector<SDValue, 2> Ops;
02776   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
02777   Ops.push_back(Chain);
02778   Ops.push_back(Flag);
02779 
02780   return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
02781 }
02782 
02783 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
02784   SDLoc DL(N);
02785   SDValue Chain = N->getOperand(0);
02786   SDValue Offset = N->getOperand(1);
02787   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
02788   MemSDNode *Mem = cast<MemSDNode>(N);
02789 
02790   // How many elements do we have?
02791   unsigned NumElts = 1;
02792   switch (N->getOpcode()) {
02793   default:
02794     return nullptr;
02795   case NVPTXISD::StoreRetval:
02796     NumElts = 1;
02797     break;
02798   case NVPTXISD::StoreRetvalV2:
02799     NumElts = 2;
02800     break;
02801   case NVPTXISD::StoreRetvalV4:
02802     NumElts = 4;
02803     break;
02804   }
02805 
02806   // Build vector of operands
02807   SmallVector<SDValue, 6> Ops;
02808   for (unsigned i = 0; i < NumElts; ++i)
02809     Ops.push_back(N->getOperand(i + 2));
02810   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
02811   Ops.push_back(Chain);
02812 
02813   // Determine target opcode
02814   // If we have an i1, use an 8-bit store. The lowering code in
02815   // NVPTXISelLowering will have already emitted an upcast.
02816   unsigned Opcode = 0;
02817   switch (NumElts) {
02818   default:
02819     return nullptr;
02820   case 1:
02821     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
02822     default:
02823       return nullptr;
02824     case MVT::i1:
02825       Opcode = NVPTX::StoreRetvalI8;
02826       break;
02827     case MVT::i8:
02828       Opcode = NVPTX::StoreRetvalI8;
02829       break;
02830     case MVT::i16:
02831       Opcode = NVPTX::StoreRetvalI16;
02832       break;
02833     case MVT::i32:
02834       Opcode = NVPTX::StoreRetvalI32;
02835       break;
02836     case MVT::i64:
02837       Opcode = NVPTX::StoreRetvalI64;
02838       break;
02839     case MVT::f32:
02840       Opcode = NVPTX::StoreRetvalF32;
02841       break;
02842     case MVT::f64:
02843       Opcode = NVPTX::StoreRetvalF64;
02844       break;
02845     }
02846     break;
02847   case 2:
02848     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
02849     default:
02850       return nullptr;
02851     case MVT::i1:
02852       Opcode = NVPTX::StoreRetvalV2I8;
02853       break;
02854     case MVT::i8:
02855       Opcode = NVPTX::StoreRetvalV2I8;
02856       break;
02857     case MVT::i16:
02858       Opcode = NVPTX::StoreRetvalV2I16;
02859       break;
02860     case MVT::i32:
02861       Opcode = NVPTX::StoreRetvalV2I32;
02862       break;
02863     case MVT::i64:
02864       Opcode = NVPTX::StoreRetvalV2I64;
02865       break;
02866     case MVT::f32:
02867       Opcode = NVPTX::StoreRetvalV2F32;
02868       break;
02869     case MVT::f64:
02870       Opcode = NVPTX::StoreRetvalV2F64;
02871       break;
02872     }
02873     break;
02874   case 4:
02875     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
02876     default:
02877       return nullptr;
02878     case MVT::i1:
02879       Opcode = NVPTX::StoreRetvalV4I8;
02880       break;
02881     case MVT::i8:
02882       Opcode = NVPTX::StoreRetvalV4I8;
02883       break;
02884     case MVT::i16:
02885       Opcode = NVPTX::StoreRetvalV4I16;
02886       break;
02887     case MVT::i32:
02888       Opcode = NVPTX::StoreRetvalV4I32;
02889       break;
02890     case MVT::f32:
02891       Opcode = NVPTX::StoreRetvalV4F32;
02892       break;
02893     }
02894     break;
02895   }
02896 
02897   SDNode *Ret =
02898       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
02899   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
02900   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
02901   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
02902 
02903   return Ret;
02904 }
02905 
02906 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
02907   SDLoc DL(N);
02908   SDValue Chain = N->getOperand(0);
02909   SDValue Param = N->getOperand(1);
02910   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
02911   SDValue Offset = N->getOperand(2);
02912   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
02913   MemSDNode *Mem = cast<MemSDNode>(N);
02914   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
02915 
02916   // How many elements do we have?
02917   unsigned NumElts = 1;
02918   switch (N->getOpcode()) {
02919   default:
02920     return nullptr;
02921   case NVPTXISD::StoreParamU32:
02922   case NVPTXISD::StoreParamS32:
02923   case NVPTXISD::StoreParam:
02924     NumElts = 1;
02925     break;
02926   case NVPTXISD::StoreParamV2:
02927     NumElts = 2;
02928     break;
02929   case NVPTXISD::StoreParamV4:
02930     NumElts = 4;
02931     break;
02932   }
02933 
02934   // Build vector of operands
02935   SmallVector<SDValue, 8> Ops;
02936   for (unsigned i = 0; i < NumElts; ++i)
02937     Ops.push_back(N->getOperand(i + 3));
02938   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
02939   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
02940   Ops.push_back(Chain);
02941   Ops.push_back(Flag);
02942 
02943   // Determine target opcode
02944   // If we have an i1, use an 8-bit store. The lowering code in
02945   // NVPTXISelLowering will have already emitted an upcast.
02946   unsigned Opcode = 0;
02947   switch (N->getOpcode()) {
02948   default:
02949     switch (NumElts) {
02950     default:
02951       return nullptr;
02952     case 1:
02953       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
02954       default:
02955         return nullptr;
02956       case MVT::i1:
02957         Opcode = NVPTX::StoreParamI8;
02958         break;
02959       case MVT::i8:
02960         Opcode = NVPTX::StoreParamI8;
02961         break;
02962       case MVT::i16:
02963         Opcode = NVPTX::StoreParamI16;
02964         break;
02965       case MVT::i32:
02966         Opcode = NVPTX::StoreParamI32;
02967         break;
02968       case MVT::i64:
02969         Opcode = NVPTX::StoreParamI64;
02970         break;
02971       case MVT::f32:
02972         Opcode = NVPTX::StoreParamF32;
02973         break;
02974       case MVT::f64:
02975         Opcode = NVPTX::StoreParamF64;
02976         break;
02977       }
02978       break;
02979     case 2:
02980       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
02981       default:
02982         return nullptr;
02983       case MVT::i1:
02984         Opcode = NVPTX::StoreParamV2I8;
02985         break;
02986       case MVT::i8:
02987         Opcode = NVPTX::StoreParamV2I8;
02988         break;
02989       case MVT::i16:
02990         Opcode = NVPTX::StoreParamV2I16;
02991         break;
02992       case MVT::i32:
02993         Opcode = NVPTX::StoreParamV2I32;
02994         break;
02995       case MVT::i64:
02996         Opcode = NVPTX::StoreParamV2I64;
02997         break;
02998       case MVT::f32:
02999         Opcode = NVPTX::StoreParamV2F32;
03000         break;
03001       case MVT::f64:
03002         Opcode = NVPTX::StoreParamV2F64;
03003         break;
03004       }
03005       break;
03006     case 4:
03007       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
03008       default:
03009         return nullptr;
03010       case MVT::i1:
03011         Opcode = NVPTX::StoreParamV4I8;
03012         break;
03013       case MVT::i8:
03014         Opcode = NVPTX::StoreParamV4I8;
03015         break;
03016       case MVT::i16:
03017         Opcode = NVPTX::StoreParamV4I16;
03018         break;
03019       case MVT::i32:
03020         Opcode = NVPTX::StoreParamV4I32;
03021         break;
03022       case MVT::f32:
03023         Opcode = NVPTX::StoreParamV4F32;
03024         break;
03025       }
03026       break;
03027     }
03028     break;
03029   // Special case: if we have a sign-extend/zero-extend node, insert the
03030   // conversion instruction first, and use that as the value operand to
03031   // the selected StoreParam node.
03032   case NVPTXISD::StoreParamU32: {
03033     Opcode = NVPTX::StoreParamI32;
03034     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
03035                                                 MVT::i32);
03036     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
03037                                          MVT::i32, Ops[0], CvtNone);
03038     Ops[0] = SDValue(Cvt, 0);
03039     break;
03040   }
03041   case NVPTXISD::StoreParamS32: {
03042     Opcode = NVPTX::StoreParamI32;
03043     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
03044                                                 MVT::i32);
03045     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
03046                                          MVT::i32, Ops[0], CvtNone);
03047     Ops[0] = SDValue(Cvt, 0);
03048     break;
03049   }
03050   }
03051 
03052   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
03053   SDNode *Ret =
03054       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
03055   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
03056   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
03057   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
03058 
03059   return Ret;
03060 }
03061 
03062 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
03063   SDValue Chain = N->getOperand(0);
03064   SDNode *Ret = nullptr;
03065   unsigned Opc = 0;
03066   SmallVector<SDValue, 8> Ops;
03067 
03068   switch (N->getOpcode()) {
03069   default: return nullptr;
03070   case NVPTXISD::Tex1DFloatS32:
03071     Opc = NVPTX::TEX_1D_F32_S32;
03072     break;
03073   case NVPTXISD::Tex1DFloatFloat:
03074     Opc = NVPTX::TEX_1D_F32_F32;
03075     break;
03076   case NVPTXISD::Tex1DFloatFloatLevel:
03077     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
03078     break;
03079   case NVPTXISD::Tex1DFloatFloatGrad:
03080     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
03081     break;
03082   case NVPTXISD::Tex1DS32S32:
03083     Opc = NVPTX::TEX_1D_S32_S32;
03084     break;
03085   case NVPTXISD::Tex1DS32Float:
03086     Opc = NVPTX::TEX_1D_S32_F32;
03087     break;
03088   case NVPTXISD::Tex1DS32FloatLevel:
03089     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
03090     break;
03091   case NVPTXISD::Tex1DS32FloatGrad:
03092     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
03093     break;
03094   case NVPTXISD::Tex1DU32S32:
03095     Opc = NVPTX::TEX_1D_U32_S32;
03096     break;
03097   case NVPTXISD::Tex1DU32Float:
03098     Opc = NVPTX::TEX_1D_U32_F32;
03099     break;
03100   case NVPTXISD::Tex1DU32FloatLevel:
03101     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
03102     break;
03103   case NVPTXISD::Tex1DU32FloatGrad:
03104     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
03105     break;
03106   case NVPTXISD::Tex1DArrayFloatS32:
03107     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
03108     break;
03109   case NVPTXISD::Tex1DArrayFloatFloat:
03110     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
03111     break;
03112   case NVPTXISD::Tex1DArrayFloatFloatLevel:
03113     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
03114     break;
03115   case NVPTXISD::Tex1DArrayFloatFloatGrad:
03116     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
03117     break;
03118   case NVPTXISD::Tex1DArrayS32S32:
03119     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
03120     break;
03121   case NVPTXISD::Tex1DArrayS32Float:
03122     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
03123     break;
03124   case NVPTXISD::Tex1DArrayS32FloatLevel:
03125     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
03126     break;
03127   case NVPTXISD::Tex1DArrayS32FloatGrad:
03128     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
03129     break;
03130   case NVPTXISD::Tex1DArrayU32S32:
03131     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
03132     break;
03133   case NVPTXISD::Tex1DArrayU32Float:
03134     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
03135     break;
03136   case NVPTXISD::Tex1DArrayU32FloatLevel:
03137     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
03138     break;
03139   case NVPTXISD::Tex1DArrayU32FloatGrad:
03140     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
03141     break;
03142   case NVPTXISD::Tex2DFloatS32:
03143     Opc = NVPTX::TEX_2D_F32_S32;
03144     break;
03145   case NVPTXISD::Tex2DFloatFloat:
03146     Opc = NVPTX::TEX_2D_F32_F32;
03147     break;
03148   case NVPTXISD::Tex2DFloatFloatLevel:
03149     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
03150     break;
03151   case NVPTXISD::Tex2DFloatFloatGrad:
03152     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
03153     break;
03154   case NVPTXISD::Tex2DS32S32:
03155     Opc = NVPTX::TEX_2D_S32_S32;
03156     break;
03157   case NVPTXISD::Tex2DS32Float:
03158     Opc = NVPTX::TEX_2D_S32_F32;
03159     break;
03160   case NVPTXISD::Tex2DS32FloatLevel:
03161     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
03162     break;
03163   case NVPTXISD::Tex2DS32FloatGrad:
03164     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
03165     break;
03166   case NVPTXISD::Tex2DU32S32:
03167     Opc = NVPTX::TEX_2D_U32_S32;
03168     break;
03169   case NVPTXISD::Tex2DU32Float:
03170     Opc = NVPTX::TEX_2D_U32_F32;
03171     break;
03172   case NVPTXISD::Tex2DU32FloatLevel:
03173     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
03174     break;
03175   case NVPTXISD::Tex2DU32FloatGrad:
03176     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
03177     break;
03178   case NVPTXISD::Tex2DArrayFloatS32:
03179     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
03180     break;
03181   case NVPTXISD::Tex2DArrayFloatFloat:
03182     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
03183     break;
03184   case NVPTXISD::Tex2DArrayFloatFloatLevel:
03185     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
03186     break;
03187   case NVPTXISD::Tex2DArrayFloatFloatGrad:
03188     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
03189     break;
03190   case NVPTXISD::Tex2DArrayS32S32:
03191     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
03192     break;
03193   case NVPTXISD::Tex2DArrayS32Float:
03194     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
03195     break;
03196   case NVPTXISD::Tex2DArrayS32FloatLevel:
03197     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
03198     break;
03199   case NVPTXISD::Tex2DArrayS32FloatGrad:
03200     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
03201     break;
03202   case NVPTXISD::Tex2DArrayU32S32:
03203     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
03204     break;
03205   case NVPTXISD::Tex2DArrayU32Float:
03206     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
03207     break;
03208   case NVPTXISD::Tex2DArrayU32FloatLevel:
03209     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
03210     break;
03211   case NVPTXISD::Tex2DArrayU32FloatGrad:
03212     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
03213     break;
03214   case NVPTXISD::Tex3DFloatS32:
03215     Opc = NVPTX::TEX_3D_F32_S32;
03216     break;
03217   case NVPTXISD::Tex3DFloatFloat:
03218     Opc = NVPTX::TEX_3D_F32_F32;
03219     break;
03220   case NVPTXISD::Tex3DFloatFloatLevel:
03221     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
03222     break;
03223   case NVPTXISD::Tex3DFloatFloatGrad:
03224     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
03225     break;
03226   case NVPTXISD::Tex3DS32S32:
03227     Opc = NVPTX::TEX_3D_S32_S32;
03228     break;
03229   case NVPTXISD::Tex3DS32Float:
03230     Opc = NVPTX::TEX_3D_S32_F32;
03231     break;
03232   case NVPTXISD::Tex3DS32FloatLevel:
03233     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
03234     break;
03235   case NVPTXISD::Tex3DS32FloatGrad:
03236     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
03237     break;
03238   case NVPTXISD::Tex3DU32S32:
03239     Opc = NVPTX::TEX_3D_U32_S32;
03240     break;
03241   case NVPTXISD::Tex3DU32Float:
03242     Opc = NVPTX::TEX_3D_U32_F32;
03243     break;
03244   case NVPTXISD::Tex3DU32FloatLevel:
03245     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
03246     break;
03247   case NVPTXISD::Tex3DU32FloatGrad:
03248     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
03249     break;
03250   case NVPTXISD::TexCubeFloatFloat:
03251     Opc = NVPTX::TEX_CUBE_F32_F32;
03252     break;
03253   case NVPTXISD::TexCubeFloatFloatLevel:
03254     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
03255     break;
03256   case NVPTXISD::TexCubeS32Float:
03257     Opc = NVPTX::TEX_CUBE_S32_F32;
03258     break;
03259   case NVPTXISD::TexCubeS32FloatLevel:
03260     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
03261     break;
03262   case NVPTXISD::TexCubeU32Float:
03263     Opc = NVPTX::TEX_CUBE_U32_F32;
03264     break;
03265   case NVPTXISD::TexCubeU32FloatLevel:
03266     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
03267     break;
03268   case NVPTXISD::TexCubeArrayFloatFloat:
03269     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
03270     break;
03271   case NVPTXISD::TexCubeArrayFloatFloatLevel:
03272     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
03273     break;
03274   case NVPTXISD::TexCubeArrayS32Float:
03275     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
03276     break;
03277   case NVPTXISD::TexCubeArrayS32FloatLevel:
03278     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
03279     break;
03280   case NVPTXISD::TexCubeArrayU32Float:
03281     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
03282     break;
03283   case NVPTXISD::TexCubeArrayU32FloatLevel:
03284     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
03285     break;
03286   case NVPTXISD::Tld4R2DFloatFloat:
03287     Opc = NVPTX::TLD4_R_2D_F32_F32;
03288     break;
03289   case NVPTXISD::Tld4G2DFloatFloat:
03290     Opc = NVPTX::TLD4_G_2D_F32_F32;
03291     break;
03292   case NVPTXISD::Tld4B2DFloatFloat:
03293     Opc = NVPTX::TLD4_B_2D_F32_F32;
03294     break;
03295   case NVPTXISD::Tld4A2DFloatFloat:
03296     Opc = NVPTX::TLD4_A_2D_F32_F32;
03297     break;
03298   case NVPTXISD::Tld4R2DS64Float:
03299     Opc = NVPTX::TLD4_R_2D_S32_F32;
03300     break;
03301   case NVPTXISD::Tld4G2DS64Float:
03302     Opc = NVPTX::TLD4_G_2D_S32_F32;
03303     break;
03304   case NVPTXISD::Tld4B2DS64Float:
03305     Opc = NVPTX::TLD4_B_2D_S32_F32;
03306     break;
03307   case NVPTXISD::Tld4A2DS64Float:
03308     Opc = NVPTX::TLD4_A_2D_S32_F32;
03309     break;
03310   case NVPTXISD::Tld4R2DU64Float:
03311     Opc = NVPTX::TLD4_R_2D_U32_F32;
03312     break;
03313   case NVPTXISD::Tld4G2DU64Float:
03314     Opc = NVPTX::TLD4_G_2D_U32_F32;
03315     break;
03316   case NVPTXISD::Tld4B2DU64Float:
03317     Opc = NVPTX::TLD4_B_2D_U32_F32;
03318     break;
03319   case NVPTXISD::Tld4A2DU64Float:
03320     Opc = NVPTX::TLD4_A_2D_U32_F32;
03321     break;
03322   case NVPTXISD::TexUnified1DFloatS32:
03323     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
03324     break;
03325   case NVPTXISD::TexUnified1DFloatFloat:
03326     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
03327     break;
03328   case NVPTXISD::TexUnified1DFloatFloatLevel:
03329     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
03330     break;
03331   case NVPTXISD::TexUnified1DFloatFloatGrad:
03332     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
03333     break;
03334   case NVPTXISD::TexUnified1DS32S32:
03335     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
03336     break;
03337   case NVPTXISD::TexUnified1DS32Float:
03338     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
03339     break;
03340   case NVPTXISD::TexUnified1DS32FloatLevel:
03341     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
03342     break;
03343   case NVPTXISD::TexUnified1DS32FloatGrad:
03344     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
03345     break;
03346   case NVPTXISD::TexUnified1DU32S32:
03347     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
03348     break;
03349   case NVPTXISD::TexUnified1DU32Float:
03350     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
03351     break;
03352   case NVPTXISD::TexUnified1DU32FloatLevel:
03353     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
03354     break;
03355   case NVPTXISD::TexUnified1DU32FloatGrad:
03356     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
03357     break;
03358   case NVPTXISD::TexUnified1DArrayFloatS32:
03359     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
03360     break;
03361   case NVPTXISD::TexUnified1DArrayFloatFloat:
03362     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
03363     break;
03364   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
03365     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
03366     break;
03367   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
03368     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
03369     break;
03370   case NVPTXISD::TexUnified1DArrayS32S32:
03371     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
03372     break;
03373   case NVPTXISD::TexUnified1DArrayS32Float:
03374     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
03375     break;
03376   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
03377     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
03378     break;
03379   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
03380     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
03381     break;
03382   case NVPTXISD::TexUnified1DArrayU32S32:
03383     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
03384     break;
03385   case NVPTXISD::TexUnified1DArrayU32Float:
03386     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
03387     break;
03388   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
03389     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
03390     break;
03391   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
03392     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
03393     break;
03394   case NVPTXISD::TexUnified2DFloatS32:
03395     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
03396     break;
03397   case NVPTXISD::TexUnified2DFloatFloat:
03398     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
03399     break;
03400   case NVPTXISD::TexUnified2DFloatFloatLevel:
03401     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
03402     break;
03403   case NVPTXISD::TexUnified2DFloatFloatGrad:
03404     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
03405     break;
03406   case NVPTXISD::TexUnified2DS32S32:
03407     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
03408     break;
03409   case NVPTXISD::TexUnified2DS32Float:
03410     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
03411     break;
03412   case NVPTXISD::TexUnified2DS32FloatLevel:
03413     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
03414     break;
03415   case NVPTXISD::TexUnified2DS32FloatGrad:
03416     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
03417     break;
03418   case NVPTXISD::TexUnified2DU32S32:
03419     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
03420     break;
03421   case NVPTXISD::TexUnified2DU32Float:
03422     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
03423     break;
03424   case NVPTXISD::TexUnified2DU32FloatLevel:
03425     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
03426     break;
03427   case NVPTXISD::TexUnified2DU32FloatGrad:
03428     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
03429     break;
03430   case NVPTXISD::TexUnified2DArrayFloatS32:
03431     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
03432     break;
03433   case NVPTXISD::TexUnified2DArrayFloatFloat:
03434     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
03435     break;
03436   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
03437     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
03438     break;
03439   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
03440     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
03441     break;
03442   case NVPTXISD::TexUnified2DArrayS32S32:
03443     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
03444     break;
03445   case NVPTXISD::TexUnified2DArrayS32Float:
03446     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
03447     break;
03448   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
03449     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
03450     break;
03451   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
03452     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
03453     break;
03454   case NVPTXISD::TexUnified2DArrayU32S32:
03455     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
03456     break;
03457   case NVPTXISD::TexUnified2DArrayU32Float:
03458     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
03459     break;
03460   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
03461     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
03462     break;
03463   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
03464     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
03465     break;
03466   case NVPTXISD::TexUnified3DFloatS32:
03467     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
03468     break;
03469   case NVPTXISD::TexUnified3DFloatFloat:
03470     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
03471     break;
03472   case NVPTXISD::TexUnified3DFloatFloatLevel:
03473     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
03474     break;
03475   case NVPTXISD::TexUnified3DFloatFloatGrad:
03476     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
03477     break;
03478   case NVPTXISD::TexUnified3DS32S32:
03479     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
03480     break;
03481   case NVPTXISD::TexUnified3DS32Float:
03482     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
03483     break;
03484   case NVPTXISD::TexUnified3DS32FloatLevel:
03485     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
03486     break;
03487   case NVPTXISD::TexUnified3DS32FloatGrad:
03488     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
03489     break;
03490   case NVPTXISD::TexUnified3DU32S32:
03491     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
03492     break;
03493   case NVPTXISD::TexUnified3DU32Float:
03494     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
03495     break;
03496   case NVPTXISD::TexUnified3DU32FloatLevel:
03497     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
03498     break;
03499   case NVPTXISD::TexUnified3DU32FloatGrad:
03500     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
03501     break;
03502   case NVPTXISD::TexUnifiedCubeFloatFloat:
03503     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
03504     break;
03505   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
03506     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
03507     break;
03508   case NVPTXISD::TexUnifiedCubeS32Float:
03509     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
03510     break;
03511   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
03512     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
03513     break;
03514   case NVPTXISD::TexUnifiedCubeU32Float:
03515     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
03516     break;
03517   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
03518     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
03519     break;
03520   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
03521     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
03522     break;
03523   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
03524     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
03525     break;
03526   case NVPTXISD::TexUnifiedCubeArrayS32Float:
03527     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
03528     break;
03529   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
03530     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
03531     break;
03532   case NVPTXISD::TexUnifiedCubeArrayU32Float:
03533     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
03534     break;
03535   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
03536     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
03537     break;
03538   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
03539     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
03540     break;
03541   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
03542     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
03543     break;
03544   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
03545     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
03546     break;
03547   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
03548     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
03549     break;
03550   case NVPTXISD::Tld4UnifiedR2DS64Float:
03551     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
03552     break;
03553   case NVPTXISD::Tld4UnifiedG2DS64Float:
03554     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
03555     break;
03556   case NVPTXISD::Tld4UnifiedB2DS64Float:
03557     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
03558     break;
03559   case NVPTXISD::Tld4UnifiedA2DS64Float:
03560     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
03561     break;
03562   case NVPTXISD::Tld4UnifiedR2DU64Float:
03563     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
03564     break;
03565   case NVPTXISD::Tld4UnifiedG2DU64Float:
03566     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
03567     break;
03568   case NVPTXISD::Tld4UnifiedB2DU64Float:
03569     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
03570     break;
03571   case NVPTXISD::Tld4UnifiedA2DU64Float:
03572     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
03573     break;
03574   }
03575 
03576   // Copy over operands
03577   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
03578     Ops.push_back(N->getOperand(i));
03579   }
03580 
03581   Ops.push_back(Chain);
03582   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
03583   return Ret;
03584 }
03585 
03586 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
03587   SDValue Chain = N->getOperand(0);
03588   SDValue TexHandle = N->getOperand(1);
03589   SDNode *Ret = nullptr;
03590   unsigned Opc = 0;
03591   SmallVector<SDValue, 8> Ops;
03592   switch (N->getOpcode()) {
03593   default: return nullptr;
03594   case NVPTXISD::Suld1DI8Clamp:
03595     Opc = NVPTX::SULD_1D_I8_CLAMP;
03596     Ops.push_back(TexHandle);
03597     Ops.push_back(N->getOperand(2));
03598     Ops.push_back(Chain);
03599     break;
03600   case NVPTXISD::Suld1DI16Clamp:
03601     Opc = NVPTX::SULD_1D_I16_CLAMP;
03602     Ops.push_back(TexHandle);
03603     Ops.push_back(N->getOperand(2));
03604     Ops.push_back(Chain);
03605     break;
03606   case NVPTXISD::Suld1DI32Clamp:
03607     Opc = NVPTX::SULD_1D_I32_CLAMP;
03608     Ops.push_back(TexHandle);
03609     Ops.push_back(N->getOperand(2));
03610     Ops.push_back(Chain);
03611     break;
03612   case NVPTXISD::Suld1DI64Clamp:
03613     Opc = NVPTX::SULD_1D_I64_CLAMP;
03614     Ops.push_back(TexHandle);
03615     Ops.push_back(N->getOperand(2));
03616     Ops.push_back(Chain);
03617     break;
03618   case NVPTXISD::Suld1DV2I8Clamp:
03619     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
03620     Ops.push_back(TexHandle);
03621     Ops.push_back(N->getOperand(2));
03622     Ops.push_back(Chain);
03623     break;
03624   case NVPTXISD::Suld1DV2I16Clamp:
03625     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
03626     Ops.push_back(TexHandle);
03627     Ops.push_back(N->getOperand(2));
03628     Ops.push_back(Chain);
03629     break;
03630   case NVPTXISD::Suld1DV2I32Clamp:
03631     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
03632     Ops.push_back(TexHandle);
03633     Ops.push_back(N->getOperand(2));
03634     Ops.push_back(Chain);
03635     break;
03636   case NVPTXISD::Suld1DV2I64Clamp:
03637     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
03638     Ops.push_back(TexHandle);
03639     Ops.push_back(N->getOperand(2));
03640     Ops.push_back(Chain);
03641     break;
03642   case NVPTXISD::Suld1DV4I8Clamp:
03643     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
03644     Ops.push_back(TexHandle);
03645     Ops.push_back(N->getOperand(2));
03646     Ops.push_back(Chain);
03647     break;
03648   case NVPTXISD::Suld1DV4I16Clamp:
03649     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
03650     Ops.push_back(TexHandle);
03651     Ops.push_back(N->getOperand(2));
03652     Ops.push_back(Chain);
03653     break;
03654   case NVPTXISD::Suld1DV4I32Clamp:
03655     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
03656     Ops.push_back(TexHandle);
03657     Ops.push_back(N->getOperand(2));
03658     Ops.push_back(Chain);
03659     break;
03660   case NVPTXISD::Suld1DArrayI8Clamp:
03661     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
03662     Ops.push_back(TexHandle);
03663     Ops.push_back(N->getOperand(2));
03664     Ops.push_back(N->getOperand(3));
03665     Ops.push_back(Chain);
03666     break;
03667   case NVPTXISD::Suld1DArrayI16Clamp:
03668     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
03669     Ops.push_back(TexHandle);
03670     Ops.push_back(N->getOperand(2));
03671     Ops.push_back(N->getOperand(3));
03672     Ops.push_back(Chain);
03673     break;
03674   case NVPTXISD::Suld1DArrayI32Clamp:
03675     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
03676     Ops.push_back(TexHandle);
03677     Ops.push_back(N->getOperand(2));
03678     Ops.push_back(N->getOperand(3));
03679     Ops.push_back(Chain);
03680     break;
03681   case NVPTXISD::Suld1DArrayI64Clamp:
03682     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
03683     Ops.push_back(TexHandle);
03684     Ops.push_back(N->getOperand(2));
03685     Ops.push_back(N->getOperand(3));
03686     Ops.push_back(Chain);
03687     break;
03688   case NVPTXISD::Suld1DArrayV2I8Clamp:
03689     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
03690     Ops.push_back(TexHandle);
03691     Ops.push_back(N->getOperand(2));
03692     Ops.push_back(N->getOperand(3));
03693     Ops.push_back(Chain);
03694     break;
03695   case NVPTXISD::Suld1DArrayV2I16Clamp:
03696     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
03697     Ops.push_back(TexHandle);
03698     Ops.push_back(N->getOperand(2));
03699     Ops.push_back(N->getOperand(3));
03700     Ops.push_back(Chain);
03701     break;
03702   case NVPTXISD::Suld1DArrayV2I32Clamp:
03703     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
03704     Ops.push_back(TexHandle);
03705     Ops.push_back(N->getOperand(2));
03706     Ops.push_back(N->getOperand(3));
03707     Ops.push_back(Chain);
03708     break;
03709   case NVPTXISD::Suld1DArrayV2I64Clamp:
03710     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
03711     Ops.push_back(TexHandle);
03712     Ops.push_back(N->getOperand(2));
03713     Ops.push_back(N->getOperand(3));
03714     Ops.push_back(Chain);
03715     break;
03716   case NVPTXISD::Suld1DArrayV4I8Clamp:
03717     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
03718     Ops.push_back(TexHandle);
03719     Ops.push_back(N->getOperand(2));
03720     Ops.push_back(N->getOperand(3));
03721     Ops.push_back(Chain);
03722     break;
03723   case NVPTXISD::Suld1DArrayV4I16Clamp:
03724     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
03725     Ops.push_back(TexHandle);
03726     Ops.push_back(N->getOperand(2));
03727     Ops.push_back(N->getOperand(3));
03728     Ops.push_back(Chain);
03729     break;
03730   case NVPTXISD::Suld1DArrayV4I32Clamp:
03731     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
03732     Ops.push_back(TexHandle);
03733     Ops.push_back(N->getOperand(2));
03734     Ops.push_back(N->getOperand(3));
03735     Ops.push_back(Chain);
03736     break;
03737   case NVPTXISD::Suld2DI8Clamp:
03738     Opc = NVPTX::SULD_2D_I8_CLAMP;
03739     Ops.push_back(TexHandle);
03740     Ops.push_back(N->getOperand(2));
03741     Ops.push_back(N->getOperand(3));
03742     Ops.push_back(Chain);
03743     break;
03744   case NVPTXISD::Suld2DI16Clamp:
03745     Opc = NVPTX::SULD_2D_I16_CLAMP;
03746     Ops.push_back(TexHandle);
03747     Ops.push_back(N->getOperand(2));
03748     Ops.push_back(N->getOperand(3));
03749     Ops.push_back(Chain);
03750     break;
03751   case NVPTXISD::Suld2DI32Clamp:
03752     Opc = NVPTX::SULD_2D_I32_CLAMP;
03753     Ops.push_back(TexHandle);
03754     Ops.push_back(N->getOperand(2));
03755     Ops.push_back(N->getOperand(3));
03756     Ops.push_back(Chain);
03757     break;
03758   case NVPTXISD::Suld2DI64Clamp:
03759     Opc = NVPTX::SULD_2D_I64_CLAMP;
03760     Ops.push_back(TexHandle);
03761     Ops.push_back(N->getOperand(2));
03762     Ops.push_back(N->getOperand(3));
03763     Ops.push_back(Chain);
03764     break;
03765   case NVPTXISD::Suld2DV2I8Clamp:
03766     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
03767     Ops.push_back(TexHandle);
03768     Ops.push_back(N->getOperand(2));
03769     Ops.push_back(N->getOperand(3));
03770     Ops.push_back(Chain);
03771     break;
03772   case NVPTXISD::Suld2DV2I16Clamp:
03773     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
03774     Ops.push_back(TexHandle);
03775     Ops.push_back(N->getOperand(2));
03776     Ops.push_back(N->getOperand(3));
03777     Ops.push_back(Chain);
03778     break;
03779   case NVPTXISD::Suld2DV2I32Clamp:
03780     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
03781     Ops.push_back(TexHandle);
03782     Ops.push_back(N->getOperand(2));
03783     Ops.push_back(N->getOperand(3));
03784     Ops.push_back(Chain);
03785     break;
03786   case NVPTXISD::Suld2DV2I64Clamp:
03787     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
03788     Ops.push_back(TexHandle);
03789     Ops.push_back(N->getOperand(2));
03790     Ops.push_back(N->getOperand(3));
03791     Ops.push_back(Chain);
03792     break;
03793   case NVPTXISD::Suld2DV4I8Clamp:
03794     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
03795     Ops.push_back(TexHandle);
03796     Ops.push_back(N->getOperand(2));
03797     Ops.push_back(N->getOperand(3));
03798     Ops.push_back(Chain);
03799     break;
03800   case NVPTXISD::Suld2DV4I16Clamp:
03801     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
03802     Ops.push_back(TexHandle);
03803     Ops.push_back(N->getOperand(2));
03804     Ops.push_back(N->getOperand(3));
03805     Ops.push_back(Chain);
03806     break;
03807   case NVPTXISD::Suld2DV4I32Clamp:
03808     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
03809     Ops.push_back(TexHandle);
03810     Ops.push_back(N->getOperand(2));
03811     Ops.push_back(N->getOperand(3));
03812     Ops.push_back(Chain);
03813     break;
03814   case NVPTXISD::Suld2DArrayI8Clamp:
03815     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
03816     Ops.push_back(TexHandle);
03817     Ops.push_back(N->getOperand(2));
03818     Ops.push_back(N->getOperand(3));
03819     Ops.push_back(N->getOperand(4));
03820     Ops.push_back(Chain);
03821     break;
03822   case NVPTXISD::Suld2DArrayI16Clamp:
03823     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
03824     Ops.push_back(TexHandle);
03825     Ops.push_back(N->getOperand(2));
03826     Ops.push_back(N->getOperand(3));
03827     Ops.push_back(N->getOperand(4));
03828     Ops.push_back(Chain);
03829     break;
03830   case NVPTXISD::Suld2DArrayI32Clamp:
03831     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
03832     Ops.push_back(TexHandle);
03833     Ops.push_back(N->getOperand(2));
03834     Ops.push_back(N->getOperand(3));
03835     Ops.push_back(N->getOperand(4));
03836     Ops.push_back(Chain);
03837     break;
03838   case NVPTXISD::Suld2DArrayI64Clamp:
03839     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
03840     Ops.push_back(TexHandle);
03841     Ops.push_back(N->getOperand(2));
03842     Ops.push_back(N->getOperand(3));
03843     Ops.push_back(N->getOperand(4));
03844     Ops.push_back(Chain);
03845     break;
03846   case NVPTXISD::Suld2DArrayV2I8Clamp:
03847     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
03848     Ops.push_back(TexHandle);
03849     Ops.push_back(N->getOperand(2));
03850     Ops.push_back(N->getOperand(3));
03851     Ops.push_back(N->getOperand(4));
03852     Ops.push_back(Chain);
03853     break;
03854   case NVPTXISD::Suld2DArrayV2I16Clamp:
03855     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
03856     Ops.push_back(TexHandle);
03857     Ops.push_back(N->getOperand(2));
03858     Ops.push_back(N->getOperand(3));
03859     Ops.push_back(N->getOperand(4));
03860     Ops.push_back(Chain);
03861     break;
03862   case NVPTXISD::Suld2DArrayV2I32Clamp:
03863     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
03864     Ops.push_back(TexHandle);
03865     Ops.push_back(N->getOperand(2));
03866     Ops.push_back(N->getOperand(3));
03867     Ops.push_back(N->getOperand(4));
03868     Ops.push_back(Chain);
03869     break;
03870   case NVPTXISD::Suld2DArrayV2I64Clamp:
03871     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
03872     Ops.push_back(TexHandle);
03873     Ops.push_back(N->getOperand(2));
03874     Ops.push_back(N->getOperand(3));
03875     Ops.push_back(N->getOperand(4));
03876     Ops.push_back(Chain);
03877     break;
03878   case NVPTXISD::Suld2DArrayV4I8Clamp:
03879     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
03880     Ops.push_back(TexHandle);
03881     Ops.push_back(N->getOperand(2));
03882     Ops.push_back(N->getOperand(3));
03883     Ops.push_back(N->getOperand(4));
03884     Ops.push_back(Chain);
03885     break;
03886   case NVPTXISD::Suld2DArrayV4I16Clamp:
03887     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
03888     Ops.push_back(TexHandle);
03889     Ops.push_back(N->getOperand(2));
03890     Ops.push_back(N->getOperand(3));
03891     Ops.push_back(N->getOperand(4));
03892     Ops.push_back(Chain);
03893     break;
03894   case NVPTXISD::Suld2DArrayV4I32Clamp:
03895     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
03896     Ops.push_back(TexHandle);
03897     Ops.push_back(N->getOperand(2));
03898     Ops.push_back(N->getOperand(3));
03899     Ops.push_back(N->getOperand(4));
03900     Ops.push_back(Chain);
03901     break;
03902   case NVPTXISD::Suld3DI8Clamp:
03903     Opc = NVPTX::SULD_3D_I8_CLAMP;
03904     Ops.push_back(TexHandle);
03905     Ops.push_back(N->getOperand(2));
03906     Ops.push_back(N->getOperand(3));
03907     Ops.push_back(N->getOperand(4));
03908     Ops.push_back(Chain);
03909     break;
03910   case NVPTXISD::Suld3DI16Clamp:
03911     Opc = NVPTX::SULD_3D_I16_CLAMP;
03912     Ops.push_back(TexHandle);
03913     Ops.push_back(N->getOperand(2));
03914     Ops.push_back(N->getOperand(3));
03915     Ops.push_back(N->getOperand(4));
03916     Ops.push_back(Chain);
03917     break;
03918   case NVPTXISD::Suld3DI32Clamp:
03919     Opc = NVPTX::SULD_3D_I32_CLAMP;
03920     Ops.push_back(TexHandle);
03921     Ops.push_back(N->getOperand(2));
03922     Ops.push_back(N->getOperand(3));
03923     Ops.push_back(N->getOperand(4));
03924     Ops.push_back(Chain);
03925     break;
03926   case NVPTXISD::Suld3DI64Clamp:
03927     Opc = NVPTX::SULD_3D_I64_CLAMP;
03928     Ops.push_back(TexHandle);
03929     Ops.push_back(N->getOperand(2));
03930     Ops.push_back(N->getOperand(3));
03931     Ops.push_back(N->getOperand(4));
03932     Ops.push_back(Chain);
03933     break;
03934   case NVPTXISD::Suld3DV2I8Clamp:
03935     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
03936     Ops.push_back(TexHandle);
03937     Ops.push_back(N->getOperand(2));
03938     Ops.push_back(N->getOperand(3));
03939     Ops.push_back(N->getOperand(4));
03940     Ops.push_back(Chain);
03941     break;
03942   case NVPTXISD::Suld3DV2I16Clamp:
03943     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
03944     Ops.push_back(TexHandle);
03945     Ops.push_back(N->getOperand(2));
03946     Ops.push_back(N->getOperand(3));
03947     Ops.push_back(N->getOperand(4));
03948     Ops.push_back(Chain);
03949     break;
03950   case NVPTXISD::Suld3DV2I32Clamp:
03951     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
03952     Ops.push_back(TexHandle);
03953     Ops.push_back(N->getOperand(2));
03954     Ops.push_back(N->getOperand(3));
03955     Ops.push_back(N->getOperand(4));
03956     Ops.push_back(Chain);
03957     break;
03958   case NVPTXISD::Suld3DV2I64Clamp:
03959     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
03960     Ops.push_back(TexHandle);
03961     Ops.push_back(N->getOperand(2));
03962     Ops.push_back(N->getOperand(3));
03963     Ops.push_back(N->getOperand(4));
03964     Ops.push_back(Chain);
03965     break;
03966   case NVPTXISD::Suld3DV4I8Clamp:
03967     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
03968     Ops.push_back(TexHandle);
03969     Ops.push_back(N->getOperand(2));
03970     Ops.push_back(N->getOperand(3));
03971     Ops.push_back(N->getOperand(4));
03972     Ops.push_back(Chain);
03973     break;
03974   case NVPTXISD::Suld3DV4I16Clamp:
03975     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
03976     Ops.push_back(TexHandle);
03977     Ops.push_back(N->getOperand(2));
03978     Ops.push_back(N->getOperand(3));
03979     Ops.push_back(N->getOperand(4));
03980     Ops.push_back(Chain);
03981     break;
03982   case NVPTXISD::Suld3DV4I32Clamp:
03983     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
03984     Ops.push_back(TexHandle);
03985     Ops.push_back(N->getOperand(2));
03986     Ops.push_back(N->getOperand(3));
03987     Ops.push_back(N->getOperand(4));
03988     Ops.push_back(Chain);
03989     break;
03990   case NVPTXISD::Suld1DI8Trap:
03991     Opc = NVPTX::SULD_1D_I8_TRAP;
03992     Ops.push_back(TexHandle);
03993     Ops.push_back(N->getOperand(2));
03994     Ops.push_back(Chain);
03995     break;
03996   case NVPTXISD::Suld1DI16Trap:
03997     Opc = NVPTX::SULD_1D_I16_TRAP;
03998     Ops.push_back(TexHandle);
03999     Ops.push_back(N->getOperand(2));
04000     Ops.push_back(Chain);
04001     break;
04002   case NVPTXISD::Suld1DI32Trap:
04003     Opc = NVPTX::SULD_1D_I32_TRAP;
04004     Ops.push_back(TexHandle);
04005     Ops.push_back(N->getOperand(2));
04006     Ops.push_back(Chain);
04007     break;
04008   case NVPTXISD::Suld1DI64Trap:
04009     Opc = NVPTX::SULD_1D_I64_TRAP;
04010     Ops.push_back(TexHandle);
04011     Ops.push_back(N->getOperand(2));
04012     Ops.push_back(Chain);
04013     break;
04014   case NVPTXISD::Suld1DV2I8Trap:
04015     Opc = NVPTX::SULD_1D_V2I8_TRAP;
04016     Ops.push_back(TexHandle);
04017     Ops.push_back(N->getOperand(2));
04018     Ops.push_back(Chain);
04019     break;
04020   case NVPTXISD::Suld1DV2I16Trap:
04021     Opc = NVPTX::SULD_1D_V2I16_TRAP;
04022     Ops.push_back(TexHandle);
04023     Ops.push_back(N->getOperand(2));
04024     Ops.push_back(Chain);
04025     break;
04026   case NVPTXISD::Suld1DV2I32Trap:
04027     Opc = NVPTX::SULD_1D_V2I32_TRAP;
04028     Ops.push_back(TexHandle);
04029     Ops.push_back(N->getOperand(2));
04030     Ops.push_back(Chain);
04031     break;
04032   case NVPTXISD::Suld1DV2I64Trap:
04033     Opc = NVPTX::SULD_1D_V2I64_TRAP;
04034     Ops.push_back(TexHandle);
04035     Ops.push_back(N->getOperand(2));
04036     Ops.push_back(Chain);
04037     break;
04038   case NVPTXISD::Suld1DV4I8Trap:
04039     Opc = NVPTX::SULD_1D_V4I8_TRAP;
04040     Ops.push_back(TexHandle);
04041     Ops.push_back(N->getOperand(2));
04042     Ops.push_back(Chain);
04043     break;
04044   case NVPTXISD::Suld1DV4I16Trap:
04045     Opc = NVPTX::SULD_1D_V4I16_TRAP;
04046     Ops.push_back(TexHandle);
04047     Ops.push_back(N->getOperand(2));
04048     Ops.push_back(Chain);
04049     break;
04050   case NVPTXISD::Suld1DV4I32Trap:
04051     Opc = NVPTX::SULD_1D_V4I32_TRAP;
04052     Ops.push_back(TexHandle);
04053     Ops.push_back(N->getOperand(2));
04054     Ops.push_back(Chain);
04055     break;
04056   case NVPTXISD::Suld1DArrayI8Trap:
04057     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
04058     Ops.push_back(TexHandle);
04059     Ops.push_back(N->getOperand(2));
04060     Ops.push_back(N->getOperand(3));
04061     Ops.push_back(Chain);
04062     break;
04063   case NVPTXISD::Suld1DArrayI16Trap:
04064     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
04065     Ops.push_back(TexHandle);
04066     Ops.push_back(N->getOperand(2));
04067     Ops.push_back(N->getOperand(3));
04068     Ops.push_back(Chain);
04069     break;
04070   case NVPTXISD::Suld1DArrayI32Trap:
04071     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
04072     Ops.push_back(TexHandle);
04073     Ops.push_back(N->getOperand(2));
04074     Ops.push_back(N->getOperand(3));
04075     Ops.push_back(Chain);
04076     break;
04077   case NVPTXISD::Suld1DArrayI64Trap:
04078     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
04079     Ops.push_back(TexHandle);
04080     Ops.push_back(N->getOperand(2));
04081     Ops.push_back(N->getOperand(3));
04082     Ops.push_back(Chain);
04083     break;
04084   case NVPTXISD::Suld1DArrayV2I8Trap:
04085     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
04086     Ops.push_back(TexHandle);
04087     Ops.push_back(N->getOperand(2));
04088     Ops.push_back(N->getOperand(3));
04089     Ops.push_back(Chain);
04090     break;
04091   case NVPTXISD::Suld1DArrayV2I16Trap:
04092     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
04093     Ops.push_back(TexHandle);
04094     Ops.push_back(N->getOperand(2));
04095     Ops.push_back(N->getOperand(3));
04096     Ops.push_back(Chain);
04097     break;
04098   case NVPTXISD::Suld1DArrayV2I32Trap:
04099     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
04100     Ops.push_back(TexHandle);
04101     Ops.push_back(N->getOperand(2));
04102     Ops.push_back(N->getOperand(3));
04103     Ops.push_back(Chain);
04104     break;
04105   case NVPTXISD::Suld1DArrayV2I64Trap:
04106     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
04107     Ops.push_back(TexHandle);
04108     Ops.push_back(N->getOperand(2));
04109     Ops.push_back(N->getOperand(3));
04110     Ops.push_back(Chain);
04111     break;
04112   case NVPTXISD::Suld1DArrayV4I8Trap:
04113     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
04114     Ops.push_back(TexHandle);
04115     Ops.push_back(N->getOperand(2));
04116     Ops.push_back(N->getOperand(3));
04117     Ops.push_back(Chain);
04118     break;
04119   case NVPTXISD::Suld1DArrayV4I16Trap:
04120     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
04121     Ops.push_back(TexHandle);
04122     Ops.push_back(N->getOperand(2));
04123     Ops.push_back(N->getOperand(3));
04124     Ops.push_back(Chain);
04125     break;
04126   case NVPTXISD::Suld1DArrayV4I32Trap:
04127     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
04128     Ops.push_back(TexHandle);
04129     Ops.push_back(N->getOperand(2));
04130     Ops.push_back(N->getOperand(3));
04131     Ops.push_back(Chain);
04132     break;
04133   case NVPTXISD::Suld2DI8Trap:
04134     Opc = NVPTX::SULD_2D_I8_TRAP;
04135     Ops.push_back(TexHandle);
04136     Ops.push_back(N->getOperand(2));
04137     Ops.push_back(N->getOperand(3));
04138     Ops.push_back(Chain);
04139     break;
04140   case NVPTXISD::Suld2DI16Trap:
04141     Opc = NVPTX::SULD_2D_I16_TRAP;
04142     Ops.push_back(TexHandle);
04143     Ops.push_back(N->getOperand(2));
04144     Ops.push_back(N->getOperand(3));
04145     Ops.push_back(Chain);
04146     break;
04147   case NVPTXISD::Suld2DI32Trap:
04148     Opc = NVPTX::SULD_2D_I32_TRAP;
04149     Ops.push_back(TexHandle);
04150     Ops.push_back(N->getOperand(2));
04151     Ops.push_back(N->getOperand(3));
04152     Ops.push_back(Chain);
04153     break;
04154   case NVPTXISD::Suld2DI64Trap:
04155     Opc = NVPTX::SULD_2D_I64_TRAP;
04156     Ops.push_back(TexHandle);
04157     Ops.push_back(N->getOperand(2));
04158     Ops.push_back(N->getOperand(3));
04159     Ops.push_back(Chain);
04160     break;
04161   case NVPTXISD::Suld2DV2I8Trap:
04162     Opc = NVPTX::SULD_2D_V2I8_TRAP;
04163     Ops.push_back(TexHandle);
04164     Ops.push_back(N->getOperand(2));
04165     Ops.push_back(N->getOperand(3));
04166     Ops.push_back(Chain);
04167     break;
04168   case NVPTXISD::Suld2DV2I16Trap:
04169     Opc = NVPTX::SULD_2D_V2I16_TRAP;
04170     Ops.push_back(TexHandle);
04171     Ops.push_back(N->getOperand(2));
04172     Ops.push_back(N->getOperand(3));
04173     Ops.push_back(Chain);
04174     break;
04175   case NVPTXISD::Suld2DV2I32Trap:
04176     Opc = NVPTX::SULD_2D_V2I32_TRAP;
04177     Ops.push_back(TexHandle);
04178     Ops.push_back(N->getOperand(2));
04179     Ops.push_back(N->getOperand(3));
04180     Ops.push_back(Chain);
04181     break;
04182   case NVPTXISD::Suld2DV2I64Trap:
04183     Opc = NVPTX::SULD_2D_V2I64_TRAP;
04184     Ops.push_back(TexHandle);
04185     Ops.push_back(N->getOperand(2));
04186     Ops.push_back(N->getOperand(3));
04187     Ops.push_back(Chain);
04188     break;
04189   case NVPTXISD::Suld2DV4I8Trap:
04190     Opc = NVPTX::SULD_2D_V4I8_TRAP;
04191     Ops.push_back(TexHandle);
04192     Ops.push_back(N->getOperand(2));
04193     Ops.push_back(N->getOperand(3));
04194     Ops.push_back(Chain);
04195     break;
04196   case NVPTXISD::Suld2DV4I16Trap:
04197     Opc = NVPTX::SULD_2D_V4I16_TRAP;
04198     Ops.push_back(TexHandle);
04199     Ops.push_back(N->getOperand(2));
04200     Ops.push_back(N->getOperand(3));
04201     Ops.push_back(Chain);
04202     break;
04203   case NVPTXISD::Suld2DV4I32Trap:
04204     Opc = NVPTX::SULD_2D_V4I32_TRAP;
04205     Ops.push_back(TexHandle);
04206     Ops.push_back(N->getOperand(2));
04207     Ops.push_back(N->getOperand(3));
04208     Ops.push_back(Chain);
04209     break;
04210   case NVPTXISD::Suld2DArrayI8Trap:
04211     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
04212     Ops.push_back(TexHandle);
04213     Ops.push_back(N->getOperand(2));
04214     Ops.push_back(N->getOperand(3));
04215     Ops.push_back(N->getOperand(4));
04216     Ops.push_back(Chain);
04217     break;
04218   case NVPTXISD::Suld2DArrayI16Trap:
04219     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
04220     Ops.push_back(TexHandle);
04221     Ops.push_back(N->getOperand(2));
04222     Ops.push_back(N->getOperand(3));
04223     Ops.push_back(N->getOperand(4));
04224     Ops.push_back(Chain);
04225     break;
04226   case NVPTXISD::Suld2DArrayI32Trap:
04227     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
04228     Ops.push_back(TexHandle);
04229     Ops.push_back(N->getOperand(2));
04230     Ops.push_back(N->getOperand(3));
04231     Ops.push_back(N->getOperand(4));
04232     Ops.push_back(Chain);
04233     break;
04234   case NVPTXISD::Suld2DArrayI64Trap:
04235     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
04236     Ops.push_back(TexHandle);
04237     Ops.push_back(N->getOperand(2));
04238     Ops.push_back(N->getOperand(3));
04239     Ops.push_back(N->getOperand(4));
04240     Ops.push_back(Chain);
04241     break;
04242   case NVPTXISD::Suld2DArrayV2I8Trap:
04243     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
04244     Ops.push_back(TexHandle);
04245     Ops.push_back(N->getOperand(2));
04246     Ops.push_back(N->getOperand(3));
04247     Ops.push_back(N->getOperand(4));
04248     Ops.push_back(Chain);
04249     break;
04250   case NVPTXISD::Suld2DArrayV2I16Trap:
04251     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
04252     Ops.push_back(TexHandle);
04253     Ops.push_back(N->getOperand(2));
04254     Ops.push_back(N->getOperand(3));
04255     Ops.push_back(N->getOperand(4));
04256     Ops.push_back(Chain);
04257     break;
04258   case NVPTXISD::Suld2DArrayV2I32Trap:
04259     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
04260     Ops.push_back(TexHandle);
04261     Ops.push_back(N->getOperand(2));
04262     Ops.push_back(N->getOperand(3));
04263     Ops.push_back(N->getOperand(4));
04264     Ops.push_back(Chain);
04265     break;
04266   case NVPTXISD::Suld2DArrayV2I64Trap:
04267     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
04268     Ops.push_back(TexHandle);
04269     Ops.push_back(N->getOperand(2));
04270     Ops.push_back(N->getOperand(3));
04271     Ops.push_back(N->getOperand(4));
04272     Ops.push_back(Chain);
04273     break;
04274   case NVPTXISD::Suld2DArrayV4I8Trap:
04275     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
04276     Ops.push_back(TexHandle);
04277     Ops.push_back(N->getOperand(2));
04278     Ops.push_back(N->getOperand(3));
04279     Ops.push_back(N->getOperand(4));
04280     Ops.push_back(Chain);
04281     break;
04282   case NVPTXISD::Suld2DArrayV4I16Trap:
04283     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
04284     Ops.push_back(TexHandle);
04285     Ops.push_back(N->getOperand(2));
04286     Ops.push_back(N->getOperand(3));
04287     Ops.push_back(N->getOperand(4));
04288     Ops.push_back(Chain);
04289     break;
04290   case NVPTXISD::Suld2DArrayV4I32Trap:
04291     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
04292     Ops.push_back(TexHandle);
04293     Ops.push_back(N->getOperand(2));
04294     Ops.push_back(N->getOperand(3));
04295     Ops.push_back(N->getOperand(4));
04296     Ops.push_back(Chain);
04297     break;
04298   case NVPTXISD::Suld3DI8Trap:
04299     Opc = NVPTX::SULD_3D_I8_TRAP;
04300     Ops.push_back(TexHandle);
04301     Ops.push_back(N->getOperand(2));
04302     Ops.push_back(N->getOperand(3));
04303     Ops.push_back(N->getOperand(4));
04304     Ops.push_back(Chain);
04305     break;
04306   case NVPTXISD::Suld3DI16Trap:
04307     Opc = NVPTX::SULD_3D_I16_TRAP;
04308     Ops.push_back(TexHandle);
04309     Ops.push_back(N->getOperand(2));
04310     Ops.push_back(N->getOperand(3));
04311     Ops.push_back(N->getOperand(4));
04312     Ops.push_back(Chain);
04313     break;
04314   case NVPTXISD::Suld3DI32Trap:
04315     Opc = NVPTX::SULD_3D_I32_TRAP;
04316     Ops.push_back(TexHandle);
04317     Ops.push_back(N->getOperand(2));
04318     Ops.push_back(N->getOperand(3));
04319     Ops.push_back(N->getOperand(4));
04320     Ops.push_back(Chain);
04321     break;
04322   case NVPTXISD::Suld3DI64Trap:
04323     Opc = NVPTX::SULD_3D_I64_TRAP;
04324     Ops.push_back(TexHandle);
04325     Ops.push_back(N->getOperand(2));
04326     Ops.push_back(N->getOperand(3));
04327     Ops.push_back(N->getOperand(4));
04328     Ops.push_back(Chain);
04329     break;
04330   case NVPTXISD::Suld3DV2I8Trap:
04331     Opc = NVPTX::SULD_3D_V2I8_TRAP;
04332     Ops.push_back(TexHandle);
04333     Ops.push_back(N->getOperand(2));
04334     Ops.push_back(N->getOperand(3));
04335     Ops.push_back(N->getOperand(4));
04336     Ops.push_back(Chain);
04337     break;
04338   case NVPTXISD::Suld3DV2I16Trap:
04339     Opc = NVPTX::SULD_3D_V2I16_TRAP;
04340     Ops.push_back(TexHandle);
04341     Ops.push_back(N->getOperand(2));
04342     Ops.push_back(N->getOperand(3));
04343     Ops.push_back(N->getOperand(4));
04344     Ops.push_back(Chain);
04345     break;
04346   case NVPTXISD::Suld3DV2I32Trap:
04347     Opc = NVPTX::SULD_3D_V2I32_TRAP;
04348     Ops.push_back(TexHandle);
04349     Ops.push_back(N->getOperand(2));
04350     Ops.push_back(N->getOperand(3));
04351     Ops.push_back(N->getOperand(4));
04352     Ops.push_back(Chain);
04353     break;
04354   case NVPTXISD::Suld3DV2I64Trap:
04355     Opc = NVPTX::SULD_3D_V2I64_TRAP;
04356     Ops.push_back(TexHandle);
04357     Ops.push_back(N->getOperand(2));
04358     Ops.push_back(N->getOperand(3));
04359     Ops.push_back(N->getOperand(4));
04360     Ops.push_back(Chain);
04361     break;
04362   case NVPTXISD::Suld3DV4I8Trap:
04363     Opc = NVPTX::SULD_3D_V4I8_TRAP;
04364     Ops.push_back(TexHandle);
04365     Ops.push_back(N->getOperand(2));
04366     Ops.push_back(N->getOperand(3));
04367     Ops.push_back(N->getOperand(4));
04368     Ops.push_back(Chain);
04369     break;
04370   case NVPTXISD::Suld3DV4I16Trap:
04371     Opc = NVPTX::SULD_3D_V4I16_TRAP;
04372     Ops.push_back(TexHandle);
04373     Ops.push_back(N->getOperand(2));
04374     Ops.push_back(N->getOperand(3));
04375     Ops.push_back(N->getOperand(4));
04376     Ops.push_back(Chain);
04377     break;
04378   case NVPTXISD::Suld3DV4I32Trap:
04379     Opc = NVPTX::SULD_3D_V4I32_TRAP;
04380     Ops.push_back(TexHandle);
04381     Ops.push_back(N->getOperand(2));
04382     Ops.push_back(N->getOperand(3));
04383     Ops.push_back(N->getOperand(4));
04384     Ops.push_back(Chain);
04385     break;
04386   case NVPTXISD::Suld1DI8Zero:
04387     Opc = NVPTX::SULD_1D_I8_ZERO;
04388     Ops.push_back(TexHandle);
04389     Ops.push_back(N->getOperand(2));
04390     Ops.push_back(Chain);
04391     break;
04392   case NVPTXISD::Suld1DI16Zero:
04393     Opc = NVPTX::SULD_1D_I16_ZERO;
04394     Ops.push_back(TexHandle);
04395     Ops.push_back(N->getOperand(2));
04396     Ops.push_back(Chain);
04397     break;
04398   case NVPTXISD::Suld1DI32Zero:
04399     Opc = NVPTX::SULD_1D_I32_ZERO;
04400     Ops.push_back(TexHandle);
04401     Ops.push_back(N->getOperand(2));
04402     Ops.push_back(Chain);
04403     break;
04404   case NVPTXISD::Suld1DI64Zero:
04405     Opc = NVPTX::SULD_1D_I64_ZERO;
04406     Ops.push_back(TexHandle);
04407     Ops.push_back(N->getOperand(2));
04408     Ops.push_back(Chain);
04409     break;
04410   case NVPTXISD::Suld1DV2I8Zero:
04411     Opc = NVPTX::SULD_1D_V2I8_ZERO;
04412     Ops.push_back(TexHandle);
04413     Ops.push_back(N->getOperand(2));
04414     Ops.push_back(Chain);
04415     break;
04416   case NVPTXISD::Suld1DV2I16Zero:
04417     Opc = NVPTX::SULD_1D_V2I16_ZERO;
04418     Ops.push_back(TexHandle);
04419     Ops.push_back(N->getOperand(2));
04420     Ops.push_back(Chain);
04421     break;
04422   case NVPTXISD::Suld1DV2I32Zero:
04423     Opc = NVPTX::SULD_1D_V2I32_ZERO;
04424     Ops.push_back(TexHandle);
04425     Ops.push_back(N->getOperand(2));
04426     Ops.push_back(Chain);
04427     break;
04428   case NVPTXISD::Suld1DV2I64Zero:
04429     Opc = NVPTX::SULD_1D_V2I64_ZERO;
04430     Ops.push_back(TexHandle);
04431     Ops.push_back(N->getOperand(2));
04432     Ops.push_back(Chain);
04433     break;
04434   case NVPTXISD::Suld1DV4I8Zero:
04435     Opc = NVPTX::SULD_1D_V4I8_ZERO;
04436     Ops.push_back(TexHandle);
04437     Ops.push_back(N->getOperand(2));
04438     Ops.push_back(Chain);
04439     break;
04440   case NVPTXISD::Suld1DV4I16Zero:
04441     Opc = NVPTX::SULD_1D_V4I16_ZERO;
04442     Ops.push_back(TexHandle);
04443     Ops.push_back(N->getOperand(2));
04444     Ops.push_back(Chain);
04445     break;
04446   case NVPTXISD::Suld1DV4I32Zero:
04447     Opc = NVPTX::SULD_1D_V4I32_ZERO;
04448     Ops.push_back(TexHandle);
04449     Ops.push_back(N->getOperand(2));
04450     Ops.push_back(Chain);
04451     break;
04452   case NVPTXISD::Suld1DArrayI8Zero:
04453     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
04454     Ops.push_back(TexHandle);
04455     Ops.push_back(N->getOperand(2));
04456     Ops.push_back(N->getOperand(3));
04457     Ops.push_back(Chain);
04458     break;
04459   case NVPTXISD::Suld1DArrayI16Zero:
04460     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
04461     Ops.push_back(TexHandle);
04462     Ops.push_back(N->getOperand(2));
04463     Ops.push_back(N->getOperand(3));
04464     Ops.push_back(Chain);
04465     break;
04466   case NVPTXISD::Suld1DArrayI32Zero:
04467     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
04468     Ops.push_back(TexHandle);
04469     Ops.push_back(N->getOperand(2));
04470     Ops.push_back(N->getOperand(3));
04471     Ops.push_back(Chain);
04472     break;
04473   case NVPTXISD::Suld1DArrayI64Zero:
04474     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
04475     Ops.push_back(TexHandle);
04476     Ops.push_back(N->getOperand(2));
04477     Ops.push_back(N->getOperand(3));
04478     Ops.push_back(Chain);
04479     break;
04480   case NVPTXISD::Suld1DArrayV2I8Zero:
04481     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
04482     Ops.push_back(TexHandle);
04483     Ops.push_back(N->getOperand(2));
04484     Ops.push_back(N->getOperand(3));
04485     Ops.push_back(Chain);
04486     break;
04487   case NVPTXISD::Suld1DArrayV2I16Zero:
04488     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
04489     Ops.push_back(TexHandle);
04490     Ops.push_back(N->getOperand(2));
04491     Ops.push_back(N->getOperand(3));
04492     Ops.push_back(Chain);
04493     break;
04494   case NVPTXISD::Suld1DArrayV2I32Zero:
04495     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
04496     Ops.push_back(TexHandle);
04497     Ops.push_back(N->getOperand(2));
04498     Ops.push_back(N->getOperand(3));
04499     Ops.push_back(Chain);
04500     break;
04501   case NVPTXISD::Suld1DArrayV2I64Zero:
04502     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
04503     Ops.push_back(TexHandle);
04504     Ops.push_back(N->getOperand(2));
04505     Ops.push_back(N->getOperand(3));
04506     Ops.push_back(Chain);
04507     break;
04508   case NVPTXISD::Suld1DArrayV4I8Zero:
04509     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
04510     Ops.push_back(TexHandle);
04511     Ops.push_back(N->getOperand(2));
04512     Ops.push_back(N->getOperand(3));
04513     Ops.push_back(Chain);
04514     break;
04515   case NVPTXISD::Suld1DArrayV4I16Zero:
04516     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
04517     Ops.push_back(TexHandle);
04518     Ops.push_back(N->getOperand(2));
04519     Ops.push_back(N->getOperand(3));
04520     Ops.push_back(Chain);
04521     break;
04522   case NVPTXISD::Suld1DArrayV4I32Zero:
04523     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
04524     Ops.push_back(TexHandle);
04525     Ops.push_back(N->getOperand(2));
04526     Ops.push_back(N->getOperand(3));
04527     Ops.push_back(Chain);
04528     break;
04529   case NVPTXISD::Suld2DI8Zero:
04530     Opc = NVPTX::SULD_2D_I8_ZERO;
04531     Ops.push_back(TexHandle);
04532     Ops.push_back(N->getOperand(2));
04533     Ops.push_back(N->getOperand(3));
04534     Ops.push_back(Chain);
04535     break;
04536   case NVPTXISD::Suld2DI16Zero:
04537     Opc = NVPTX::SULD_2D_I16_ZERO;
04538     Ops.push_back(TexHandle);
04539     Ops.push_back(N->getOperand(2));
04540     Ops.push_back(N->getOperand(3));
04541     Ops.push_back(Chain);
04542     break;
04543   case NVPTXISD::Suld2DI32Zero:
04544     Opc = NVPTX::SULD_2D_I32_ZERO;
04545     Ops.push_back(TexHandle);
04546     Ops.push_back(N->getOperand(2));
04547     Ops.push_back(N->getOperand(3));
04548     Ops.push_back(Chain);
04549     break;
04550   case NVPTXISD::Suld2DI64Zero:
04551     Opc = NVPTX::SULD_2D_I64_ZERO;
04552     Ops.push_back(TexHandle);
04553     Ops.push_back(N->getOperand(2));
04554     Ops.push_back(N->getOperand(3));
04555     Ops.push_back(Chain);
04556     break;
04557   case NVPTXISD::Suld2DV2I8Zero:
04558     Opc = NVPTX::SULD_2D_V2I8_ZERO;
04559     Ops.push_back(TexHandle);
04560     Ops.push_back(N->getOperand(2));
04561     Ops.push_back(N->getOperand(3));
04562     Ops.push_back(Chain);
04563     break;
04564   case NVPTXISD::Suld2DV2I16Zero:
04565     Opc = NVPTX::SULD_2D_V2I16_ZERO;
04566     Ops.push_back(TexHandle);
04567     Ops.push_back(N->getOperand(2));
04568     Ops.push_back(N->getOperand(3));
04569     Ops.push_back(Chain);
04570     break;
04571   case NVPTXISD::Suld2DV2I32Zero:
04572     Opc = NVPTX::SULD_2D_V2I32_ZERO;
04573     Ops.push_back(TexHandle);
04574     Ops.push_back(N->getOperand(2));
04575     Ops.push_back(N->getOperand(3));
04576     Ops.push_back(Chain);
04577     break;
04578   case NVPTXISD::Suld2DV2I64Zero:
04579     Opc = NVPTX::SULD_2D_V2I64_ZERO;
04580     Ops.push_back(TexHandle);
04581     Ops.push_back(N->getOperand(2));
04582     Ops.push_back(N->getOperand(3));
04583     Ops.push_back(Chain);
04584     break;
04585   case NVPTXISD::Suld2DV4I8Zero:
04586     Opc = NVPTX::SULD_2D_V4I8_ZERO;
04587     Ops.push_back(TexHandle);
04588     Ops.push_back(N->getOperand(2));
04589     Ops.push_back(N->getOperand(3));
04590     Ops.push_back(Chain);
04591     break;
04592   case NVPTXISD::Suld2DV4I16Zero:
04593     Opc = NVPTX::SULD_2D_V4I16_ZERO;
04594     Ops.push_back(TexHandle);
04595     Ops.push_back(N->getOperand(2));
04596     Ops.push_back(N->getOperand(3));
04597     Ops.push_back(Chain);
04598     break;
04599   case NVPTXISD::Suld2DV4I32Zero:
04600     Opc = NVPTX::SULD_2D_V4I32_ZERO;
04601     Ops.push_back(TexHandle);
04602     Ops.push_back(N->getOperand(2));
04603     Ops.push_back(N->getOperand(3));
04604     Ops.push_back(Chain);
04605     break;
04606   case NVPTXISD::Suld2DArrayI8Zero:
04607     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
04608     Ops.push_back(TexHandle);
04609     Ops.push_back(N->getOperand(2));
04610     Ops.push_back(N->getOperand(3));
04611     Ops.push_back(N->getOperand(4));
04612     Ops.push_back(Chain);
04613     break;
04614   case NVPTXISD::Suld2DArrayI16Zero:
04615     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
04616     Ops.push_back(TexHandle);
04617     Ops.push_back(N->getOperand(2));
04618     Ops.push_back(N->getOperand(3));
04619     Ops.push_back(N->getOperand(4));
04620     Ops.push_back(Chain);
04621     break;
04622   case NVPTXISD::Suld2DArrayI32Zero:
04623     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
04624     Ops.push_back(TexHandle);
04625     Ops.push_back(N->getOperand(2));
04626     Ops.push_back(N->getOperand(3));
04627     Ops.push_back(N->getOperand(4));
04628     Ops.push_back(Chain);
04629     break;
04630   case NVPTXISD::Suld2DArrayI64Zero:
04631     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
04632     Ops.push_back(TexHandle);
04633     Ops.push_back(N->getOperand(2));
04634     Ops.push_back(N->getOperand(3));
04635     Ops.push_back(N->getOperand(4));
04636     Ops.push_back(Chain);
04637     break;
04638   case NVPTXISD::Suld2DArrayV2I8Zero:
04639     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
04640     Ops.push_back(TexHandle);
04641     Ops.push_back(N->getOperand(2));
04642     Ops.push_back(N->getOperand(3));
04643     Ops.push_back(N->getOperand(4));
04644     Ops.push_back(Chain);
04645     break;
04646   case NVPTXISD::Suld2DArrayV2I16Zero:
04647     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
04648     Ops.push_back(TexHandle);
04649     Ops.push_back(N->getOperand(2));
04650     Ops.push_back(N->getOperand(3));
04651     Ops.push_back(N->getOperand(4));
04652     Ops.push_back(Chain);
04653     break;
04654   case NVPTXISD::Suld2DArrayV2I32Zero:
04655     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
04656     Ops.push_back(TexHandle);
04657     Ops.push_back(N->getOperand(2));
04658     Ops.push_back(N->getOperand(3));
04659     Ops.push_back(N->getOperand(4));
04660     Ops.push_back(Chain);
04661     break;
04662   case NVPTXISD::Suld2DArrayV2I64Zero:
04663     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
04664     Ops.push_back(TexHandle);
04665     Ops.push_back(N->getOperand(2));
04666     Ops.push_back(N->getOperand(3));
04667     Ops.push_back(N->getOperand(4));
04668     Ops.push_back(Chain);
04669     break;
04670   case NVPTXISD::Suld2DArrayV4I8Zero:
04671     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
04672     Ops.push_back(TexHandle);
04673     Ops.push_back(N->getOperand(2));
04674     Ops.push_back(N->getOperand(3));
04675     Ops.push_back(N->getOperand(4));
04676     Ops.push_back(Chain);
04677     break;
04678   case NVPTXISD::Suld2DArrayV4I16Zero:
04679     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
04680     Ops.push_back(TexHandle);
04681     Ops.push_back(N->getOperand(2));
04682     Ops.push_back(N->getOperand(3));
04683     Ops.push_back(N->getOperand(4));
04684     Ops.push_back(Chain);
04685     break;
04686   case NVPTXISD::Suld2DArrayV4I32Zero:
04687     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
04688     Ops.push_back(TexHandle);
04689     Ops.push_back(N->getOperand(2));
04690     Ops.push_back(N->getOperand(3));
04691     Ops.push_back(N->getOperand(4));
04692     Ops.push_back(Chain);
04693     break;
04694   case NVPTXISD::Suld3DI8Zero:
04695     Opc = NVPTX::SULD_3D_I8_ZERO;
04696     Ops.push_back(TexHandle);
04697     Ops.push_back(N->getOperand(2));
04698     Ops.push_back(N->getOperand(3));
04699     Ops.push_back(N->getOperand(4));
04700     Ops.push_back(Chain);
04701     break;
04702   case NVPTXISD::Suld3DI16Zero:
04703     Opc = NVPTX::SULD_3D_I16_ZERO;
04704     Ops.push_back(TexHandle);
04705     Ops.push_back(N->getOperand(2));
04706     Ops.push_back(N->getOperand(3));
04707     Ops.push_back(N->getOperand(4));
04708     Ops.push_back(Chain);
04709     break;
04710   case NVPTXISD::Suld3DI32Zero:
04711     Opc = NVPTX::SULD_3D_I32_ZERO;
04712     Ops.push_back(TexHandle);
04713     Ops.push_back(N->getOperand(2));
04714     Ops.push_back(N->getOperand(3));
04715     Ops.push_back(N->getOperand(4));
04716     Ops.push_back(Chain);
04717     break;
04718   case NVPTXISD::Suld3DI64Zero:
04719     Opc = NVPTX::SULD_3D_I64_ZERO;
04720     Ops.push_back(TexHandle);
04721     Ops.push_back(N->getOperand(2));
04722     Ops.push_back(N->getOperand(3));
04723     Ops.push_back(N->getOperand(4));
04724     Ops.push_back(Chain);
04725     break;
04726   case NVPTXISD::Suld3DV2I8Zero:
04727     Opc = NVPTX::SULD_3D_V2I8_ZERO;
04728     Ops.push_back(TexHandle);
04729     Ops.push_back(N->getOperand(2));
04730     Ops.push_back(N->getOperand(3));
04731     Ops.push_back(N->getOperand(4));
04732     Ops.push_back(Chain);
04733     break;
04734   case NVPTXISD::Suld3DV2I16Zero:
04735     Opc = NVPTX::SULD_3D_V2I16_ZERO;
04736     Ops.push_back(TexHandle);
04737     Ops.push_back(N->getOperand(2));
04738     Ops.push_back(N->getOperand(3));
04739     Ops.push_back(N->getOperand(4));
04740     Ops.push_back(Chain);
04741     break;
04742   case NVPTXISD::Suld3DV2I32Zero:
04743     Opc = NVPTX::SULD_3D_V2I32_ZERO;
04744     Ops.push_back(TexHandle);
04745     Ops.push_back(N->getOperand(2));
04746     Ops.push_back(N->getOperand(3));
04747     Ops.push_back(N->getOperand(4));
04748     Ops.push_back(Chain);
04749     break;
04750   case NVPTXISD::Suld3DV2I64Zero:
04751     Opc = NVPTX::SULD_3D_V2I64_ZERO;
04752     Ops.push_back(TexHandle);
04753     Ops.push_back(N->getOperand(2));
04754     Ops.push_back(N->getOperand(3));
04755     Ops.push_back(N->getOperand(4));
04756     Ops.push_back(Chain);
04757     break;
04758   case NVPTXISD::Suld3DV4I8Zero:
04759     Opc = NVPTX::SULD_3D_V4I8_ZERO;
04760     Ops.push_back(TexHandle);
04761     Ops.push_back(N->getOperand(2));
04762     Ops.push_back(N->getOperand(3));
04763     Ops.push_back(N->getOperand(4));
04764     Ops.push_back(Chain);
04765     break;
04766   case NVPTXISD::Suld3DV4I16Zero:
04767     Opc = NVPTX::SULD_3D_V4I16_ZERO;
04768     Ops.push_back(TexHandle);
04769     Ops.push_back(N->getOperand(2));
04770     Ops.push_back(N->getOperand(3));
04771     Ops.push_back(N->getOperand(4));
04772     Ops.push_back(Chain);
04773     break;
04774   case NVPTXISD::Suld3DV4I32Zero:
04775     Opc = NVPTX::SULD_3D_V4I32_ZERO;
04776     Ops.push_back(TexHandle);
04777     Ops.push_back(N->getOperand(2));
04778     Ops.push_back(N->getOperand(3));
04779     Ops.push_back(N->getOperand(4));
04780     Ops.push_back(Chain);
04781     break;
04782   }
04783   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
04784   return Ret;
04785 }
04786 
04787 
04788 /// SelectBFE - Look for instruction sequences that can be made more efficient
04789 /// by using the 'bfe' (bit-field extract) PTX instruction
04790 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
04791   SDLoc DL(N);
04792   SDValue LHS = N->getOperand(0);
04793   SDValue RHS = N->getOperand(1);
04794   SDValue Len;
04795   SDValue Start;
04796   SDValue Val;
04797   bool IsSigned = false;
04798 
04799   if (N->getOpcode() == ISD::AND) {
04800     // Canonicalize the operands
04801     // We want 'and %val, %mask'
04802     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
04803       std::swap(LHS, RHS);
04804     }
04805 
04806     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
04807     if (!Mask) {
04808       // We need a constant mask on the RHS of the AND
04809       return NULL;
04810     }
04811 
04812     // Extract the mask bits
04813     uint64_t MaskVal = Mask->getZExtValue();
04814     if (!isMask_64(MaskVal)) {
04815       // We *could* handle shifted masks here, but doing so would require an
04816       // 'and' operation to fix up the low-order bits so we would trade
04817       // shr+and for bfe+and, which has the same throughput
04818       return NULL;
04819     }
04820 
04821     // How many bits are in our mask?
04822     uint64_t NumBits = countTrailingOnes(MaskVal);
04823     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
04824 
04825     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
04826       // We have a 'srl/and' pair, extract the effective start bit and length
04827       Val = LHS.getNode()->getOperand(0);
04828       Start = LHS.getNode()->getOperand(1);
04829       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
04830       if (StartConst) {
04831         uint64_t StartVal = StartConst->getZExtValue();
04832         // How many "good" bits do we have left?  "good" is defined here as bits
04833         // that exist in the original value, not shifted in.
04834         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
04835         if (NumBits > GoodBits) {
04836           // Do not handle the case where bits have been shifted in. In theory
04837           // we could handle this, but the cost is likely higher than just
04838           // emitting the srl/and pair.
04839           return NULL;
04840         }
04841         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
04842       } else {
04843         // Do not handle the case where the shift amount (can be zero if no srl
04844         // was found) is not constant. We could handle this case, but it would
04845         // require run-time logic that would be more expensive than just
04846         // emitting the srl/and pair.
04847         return NULL;
04848       }
04849     } else {
04850       // Do not handle the case where the LHS of the and is not a shift. While
04851       // it would be trivial to handle this case, it would just transform
04852       // 'and' -> 'bfe', but 'and' has higher-throughput.
04853       return NULL;
04854     }
04855   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
04856     if (LHS->getOpcode() == ISD::AND) {
04857       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
04858       if (!ShiftCnst) {
04859         // Shift amount must be constant
04860         return NULL;
04861       }
04862 
04863       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
04864 
04865       SDValue AndLHS = LHS->getOperand(0);
04866       SDValue AndRHS = LHS->getOperand(1);
04867 
04868       // Canonicalize the AND to have the mask on the RHS
04869       if (isa<ConstantSDNode>(AndLHS)) {
04870         std::swap(AndLHS, AndRHS);
04871       }
04872 
04873       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
04874       if (!MaskCnst) {
04875         // Mask must be constant
04876         return NULL;
04877       }
04878 
04879       uint64_t MaskVal = MaskCnst->getZExtValue();
04880       uint64_t NumZeros;
04881       uint64_t NumBits;
04882       if (isMask_64(MaskVal)) {
04883         NumZeros = 0;
04884         // The number of bits in the result bitfield will be the number of
04885         // trailing ones (the AND) minus the number of bits we shift off
04886         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
04887       } else if (isShiftedMask_64(MaskVal)) {
04888         NumZeros = countTrailingZeros(MaskVal);
04889         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
04890         // The number of bits in the result bitfield will be the number of
04891         // trailing zeros plus the number of set bits in the mask minus the
04892         // number of bits we shift off
04893         NumBits = NumZeros + NumOnes - ShiftAmt;
04894       } else {
04895         // This is not a mask we can handle
04896         return NULL;
04897       }
04898 
04899       if (ShiftAmt < NumZeros) {
04900         // Handling this case would require extra logic that would make this
04901         // transformation non-profitable
04902         return NULL;
04903       }
04904 
04905       Val = AndLHS;
04906       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
04907       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
04908     } else if (LHS->getOpcode() == ISD::SHL) {
04909       // Here, we have a pattern like:
04910       //
04911       // (sra (shl val, NN), MM)
04912       // or
04913       // (srl (shl val, NN), MM)
04914       //
04915       // If MM >= NN, we can efficiently optimize this with bfe
04916       Val = LHS->getOperand(0);
04917 
04918       SDValue ShlRHS = LHS->getOperand(1);
04919       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
04920       if (!ShlCnst) {
04921         // Shift amount must be constant
04922         return NULL;
04923       }
04924       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
04925 
04926       SDValue ShrRHS = RHS;
04927       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
04928       if (!ShrCnst) {
04929         // Shift amount must be constant
04930         return NULL;
04931       }
04932       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
04933 
04934       // To avoid extra codegen and be profitable, we need Outer >= Inner
04935       if (OuterShiftAmt < InnerShiftAmt) {
04936         return NULL;
04937       }
04938 
04939       // If the outer shift is more than the type size, we have no bitfield to
04940       // extract (since we also check that the inner shift is <= the outer shift
04941       // then this also implies that the inner shift is < the type size)
04942       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
04943         return NULL;
04944       }
04945 
04946       Start =
04947         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
04948       Len =
04949         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
04950                                   OuterShiftAmt, DL, MVT::i32);
04951 
04952       if (N->getOpcode() == ISD::SRA) {
04953         // If we have a arithmetic right shift, we need to use the signed bfe
04954         // variant
04955         IsSigned = true;
04956       }
04957     } else {
04958       // No can do...
04959       return NULL;
04960     }
04961   } else {
04962     // No can do...
04963     return NULL;
04964   }
04965 
04966 
04967   unsigned Opc;
04968   // For the BFE operations we form here from "and" and "srl", always use the
04969   // unsigned variants.
04970   if (Val.getValueType() == MVT::i32) {
04971     if (IsSigned) {
04972       Opc = NVPTX::BFE_S32rii;
04973     } else {
04974       Opc = NVPTX::BFE_U32rii;
04975     }
04976   } else if (Val.getValueType() == MVT::i64) {
04977     if (IsSigned) {
04978       Opc = NVPTX::BFE_S64rii;
04979     } else {
04980       Opc = NVPTX::BFE_U64rii;
04981     }
04982   } else {
04983     // We cannot handle this type
04984     return NULL;
04985   }
04986 
04987   SDValue Ops[] = {
04988     Val, Start, Len
04989   };
04990 
04991   return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
04992 }
04993 
04994 // SelectDirectAddr - Match a direct address for DAG.
04995 // A direct address could be a globaladdress or externalsymbol.
04996 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
04997   // Return true if TGA or ES.
04998   if (N.getOpcode() == ISD::TargetGlobalAddress ||
04999       N.getOpcode() == ISD::TargetExternalSymbol) {
05000     Address = N;
05001     return true;
05002   }
05003   if (N.getOpcode() == NVPTXISD::Wrapper) {
05004     Address = N.getOperand(0);
05005     return true;
05006   }
05007   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
05008     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
05009     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
05010       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
05011         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
05012   }
05013   return false;
05014 }
05015 
05016 // symbol+offset
05017 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
05018     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
05019   if (Addr.getOpcode() == ISD::ADD) {
05020     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
05021       SDValue base = Addr.getOperand(0);
05022       if (SelectDirectAddr(base, Base)) {
05023         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
05024                                            mvt);
05025         return true;
05026       }
05027     }
05028   }
05029   return false;
05030 }
05031 
05032 // symbol+offset
05033 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
05034                                      SDValue &Base, SDValue &Offset) {
05035   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
05036 }
05037 
05038 // symbol+offset
05039 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
05040                                        SDValue &Base, SDValue &Offset) {
05041   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
05042 }
05043 
05044 // register+offset
05045 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
05046     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
05047   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
05048     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
05049     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
05050     return true;
05051   }
05052   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
05053       Addr.getOpcode() == ISD::TargetGlobalAddress)
05054     return false; // direct calls.
05055 
05056   if (Addr.getOpcode() == ISD::ADD) {
05057     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
05058       return false;
05059     }
05060     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
05061       if (FrameIndexSDNode *FIN =
05062               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
05063         // Constant offset from frame ref.
05064         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
05065       else
05066         Base = Addr.getOperand(0);
05067       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
05068                                          mvt);
05069       return true;
05070     }
05071   }
05072   return false;
05073 }
05074 
05075 // register+offset
05076 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
05077                                      SDValue &Base, SDValue &Offset) {
05078   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
05079 }
05080 
05081 // register+offset
05082 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
05083                                        SDValue &Base, SDValue &Offset) {
05084   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
05085 }
05086 
05087 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
05088                                                  unsigned int spN) const {
05089   const Value *Src = nullptr;
05090   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
05091     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
05092       return true;
05093     Src = mN->getMemOperand()->getValue();
05094   }
05095   if (!Src)
05096     return false;
05097   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
05098     return (PT->getAddressSpace() == spN);
05099   return false;
05100 }
05101 
05102 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
05103 /// inline asm expressions.
05104 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
05105     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
05106   SDValue Op0, Op1;
05107   switch (ConstraintID) {
05108   default:
05109     return true;
05110   case InlineAsm::Constraint_m: // memory
05111     if (SelectDirectAddr(Op, Op0)) {
05112       OutOps.push_back(Op0);
05113       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
05114       return false;
05115     }
05116     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
05117       OutOps.push_back(Op0);
05118       OutOps.push_back(Op1);
05119       return false;
05120     }
05121     break;
05122   }
05123   return true;
05124 }