LLVM  3.7.0
NVPTXISelLowering.cpp
Go to the documentation of this file.
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelLowering.h"
15 #include "NVPTX.h"
16 #include "NVPTXTargetMachine.h"
17 #include "NVPTXTargetObjectFile.h"
18 #include "NVPTXUtilities.h"
19 #include "llvm/CodeGen/Analysis.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/GlobalValue.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/MC/MCSectionELF.h"
34 #include "llvm/Support/Debug.h"
38 #include <sstream>
39 
40 #undef DEBUG_TYPE
41 #define DEBUG_TYPE "nvptx-lower"
42 
43 using namespace llvm;
44 
45 static unsigned int uniqueCallSite = 0;
46 
48  "nvptx-sched4reg",
49  cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
50 
51 static cl::opt<unsigned>
53  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
54  " 1: do it 2: do it aggressively"),
55  cl::init(2));
56 
57 static bool IsPTXVectorType(MVT VT) {
58  switch (VT.SimpleTy) {
59  default:
60  return false;
61  case MVT::v2i1:
62  case MVT::v4i1:
63  case MVT::v2i8:
64  case MVT::v4i8:
65  case MVT::v2i16:
66  case MVT::v4i16:
67  case MVT::v2i32:
68  case MVT::v4i32:
69  case MVT::v2i64:
70  case MVT::v2f32:
71  case MVT::v4f32:
72  case MVT::v2f64:
73  return true;
74  }
75 }
76 
77 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
78 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
79 /// into their primitive components.
80 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
81 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
82 /// LowerCall, and LowerReturn.
83 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
84  Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
86  uint64_t StartingOffset = 0) {
87  SmallVector<EVT, 16> TempVTs;
88  SmallVector<uint64_t, 16> TempOffsets;
89 
90  ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
91  for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
92  EVT VT = TempVTs[i];
93  uint64_t Off = TempOffsets[i];
94  if (VT.isVector())
95  for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
96  ValueVTs.push_back(VT.getVectorElementType());
97  if (Offsets)
98  Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
99  }
100  else {
101  ValueVTs.push_back(VT);
102  if (Offsets)
103  Offsets->push_back(Off);
104  }
105  }
106 }
107 
108 // NVPTXTargetLowering Constructor.
110  const NVPTXSubtarget &STI)
111  : TargetLowering(TM), nvTM(&TM), STI(STI) {
112 
113  // always lower memset, memcpy, and memmove intrinsics to load/store
114  // instructions, rather
115  // then generating calls to memset, mempcy or memmove.
116  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
117  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
118  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
119 
122 
123  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
124  // condition branches.
125  setJumpIsExpensive(true);
126 
127  // By default, use the Source scheduling
128  if (sched4reg)
130  else
132 
133  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
134  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
135  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
136  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
137  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
138  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
139 
140  // Operations not directly supported by NVPTX.
155  // Some SIGN_EXTEND_INREG can be done using cvt instruction.
156  // For others we will expand to a SHL/SRA pair.
162 
169 
170  if (STI.hasROT64()) {
173  } else {
176  }
177  if (STI.hasROT32()) {
180  } else {
183  }
184 
192 
193  // Indirect branch is not supported.
194  // This also disables Jump Table creation.
197 
200 
201  // We want to legalize constant related memmove and memcopy
202  // intrinsics.
204 
205  // Turn FP extload into load/fextend
215  // Turn FP truncstore into trunc + store.
216  // FIXME: vector types should also be expanded
220 
221  // PTX does not support load / store predicate registers
224 
225  for (MVT VT : MVT::integer_valuetypes()) {
229  }
230 
231  // This is legal in NVPTX
234 
235  // TRAP can be lowered to PTX trap
237 
240 
241  // Register custom handling for vector loads/stores
242  for (MVT VT : MVT::vector_valuetypes()) {
243  if (IsPTXVectorType(VT)) {
247  }
248  }
249 
250  // Custom handling for i8 intrinsics
252 
268 
269  // PTX does not directly support SELP of i1, so promote to i32 first
271 
272  // We have some custom DAG combine patterns for these nodes
278 
279  // Now deduce the information based on the above mentioned
280  // actions
282 }
283 
284 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
285  switch ((NVPTXISD::NodeType)Opcode) {
287  break;
288  case NVPTXISD::CALL:
289  return "NVPTXISD::CALL";
290  case NVPTXISD::RET_FLAG:
291  return "NVPTXISD::RET_FLAG";
293  return "NVPTXISD::LOAD_PARAM";
294  case NVPTXISD::Wrapper:
295  return "NVPTXISD::Wrapper";
297  return "NVPTXISD::DeclareParam";
299  return "NVPTXISD::DeclareScalarParam";
301  return "NVPTXISD::DeclareRet";
303  return "NVPTXISD::DeclareScalarRet";
305  return "NVPTXISD::DeclareRetParam";
306  case NVPTXISD::PrintCall:
307  return "NVPTXISD::PrintCall";
309  return "NVPTXISD::PrintCallUni";
310  case NVPTXISD::LoadParam:
311  return "NVPTXISD::LoadParam";
313  return "NVPTXISD::LoadParamV2";
315  return "NVPTXISD::LoadParamV4";
317  return "NVPTXISD::StoreParam";
319  return "NVPTXISD::StoreParamV2";
321  return "NVPTXISD::StoreParamV4";
323  return "NVPTXISD::StoreParamS32";
325  return "NVPTXISD::StoreParamU32";
327  return "NVPTXISD::CallArgBegin";
328  case NVPTXISD::CallArg:
329  return "NVPTXISD::CallArg";
331  return "NVPTXISD::LastCallArg";
333  return "NVPTXISD::CallArgEnd";
334  case NVPTXISD::CallVoid:
335  return "NVPTXISD::CallVoid";
336  case NVPTXISD::CallVal:
337  return "NVPTXISD::CallVal";
339  return "NVPTXISD::CallSymbol";
340  case NVPTXISD::Prototype:
341  return "NVPTXISD::Prototype";
342  case NVPTXISD::MoveParam:
343  return "NVPTXISD::MoveParam";
345  return "NVPTXISD::StoreRetval";
347  return "NVPTXISD::StoreRetvalV2";
349  return "NVPTXISD::StoreRetvalV4";
351  return "NVPTXISD::PseudoUseParam";
352  case NVPTXISD::RETURN:
353  return "NVPTXISD::RETURN";
355  return "NVPTXISD::CallSeqBegin";
357  return "NVPTXISD::CallSeqEnd";
359  return "NVPTXISD::CallPrototype";
360  case NVPTXISD::LoadV2:
361  return "NVPTXISD::LoadV2";
362  case NVPTXISD::LoadV4:
363  return "NVPTXISD::LoadV4";
364  case NVPTXISD::LDGV2:
365  return "NVPTXISD::LDGV2";
366  case NVPTXISD::LDGV4:
367  return "NVPTXISD::LDGV4";
368  case NVPTXISD::LDUV2:
369  return "NVPTXISD::LDUV2";
370  case NVPTXISD::LDUV4:
371  return "NVPTXISD::LDUV4";
372  case NVPTXISD::StoreV2:
373  return "NVPTXISD::StoreV2";
374  case NVPTXISD::StoreV4:
375  return "NVPTXISD::StoreV4";
377  return "NVPTXISD::FUN_SHFL_CLAMP";
379  return "NVPTXISD::FUN_SHFR_CLAMP";
380  case NVPTXISD::IMAD:
381  return "NVPTXISD::IMAD";
382  case NVPTXISD::Dummy:
383  return "NVPTXISD::Dummy";
385  return "NVPTXISD::MUL_WIDE_SIGNED";
387  return "NVPTXISD::MUL_WIDE_UNSIGNED";
388  case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
389  case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
391  return "NVPTXISD::Tex1DFloatFloatLevel";
393  return "NVPTXISD::Tex1DFloatFloatGrad";
394  case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
395  case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
397  return "NVPTXISD::Tex1DS32FloatLevel";
399  return "NVPTXISD::Tex1DS32FloatGrad";
400  case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
401  case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
403  return "NVPTXISD::Tex1DU32FloatLevel";
405  return "NVPTXISD::Tex1DU32FloatGrad";
406  case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
407  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
409  return "NVPTXISD::Tex1DArrayFloatFloatLevel";
411  return "NVPTXISD::Tex1DArrayFloatFloatGrad";
412  case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
413  case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
415  return "NVPTXISD::Tex1DArrayS32FloatLevel";
417  return "NVPTXISD::Tex1DArrayS32FloatGrad";
418  case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
419  case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
421  return "NVPTXISD::Tex1DArrayU32FloatLevel";
423  return "NVPTXISD::Tex1DArrayU32FloatGrad";
424  case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
425  case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
427  return "NVPTXISD::Tex2DFloatFloatLevel";
429  return "NVPTXISD::Tex2DFloatFloatGrad";
430  case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
431  case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
433  return "NVPTXISD::Tex2DS32FloatLevel";
435  return "NVPTXISD::Tex2DS32FloatGrad";
436  case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
437  case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
439  return "NVPTXISD::Tex2DU32FloatLevel";
441  return "NVPTXISD::Tex2DU32FloatGrad";
442  case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
443  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
445  return "NVPTXISD::Tex2DArrayFloatFloatLevel";
447  return "NVPTXISD::Tex2DArrayFloatFloatGrad";
448  case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
449  case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
451  return "NVPTXISD::Tex2DArrayS32FloatLevel";
453  return "NVPTXISD::Tex2DArrayS32FloatGrad";
454  case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
455  case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
457  return "NVPTXISD::Tex2DArrayU32FloatLevel";
459  return "NVPTXISD::Tex2DArrayU32FloatGrad";
460  case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
461  case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
463  return "NVPTXISD::Tex3DFloatFloatLevel";
465  return "NVPTXISD::Tex3DFloatFloatGrad";
466  case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
467  case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
469  return "NVPTXISD::Tex3DS32FloatLevel";
471  return "NVPTXISD::Tex3DS32FloatGrad";
472  case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
473  case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
475  return "NVPTXISD::Tex3DU32FloatLevel";
477  return "NVPTXISD::Tex3DU32FloatGrad";
478  case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
480  return "NVPTXISD::TexCubeFloatFloatLevel";
481  case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
483  return "NVPTXISD::TexCubeS32FloatLevel";
484  case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
486  return "NVPTXISD::TexCubeU32FloatLevel";
488  return "NVPTXISD::TexCubeArrayFloatFloat";
490  return "NVPTXISD::TexCubeArrayFloatFloatLevel";
492  return "NVPTXISD::TexCubeArrayS32Float";
494  return "NVPTXISD::TexCubeArrayS32FloatLevel";
496  return "NVPTXISD::TexCubeArrayU32Float";
498  return "NVPTXISD::TexCubeArrayU32FloatLevel";
500  return "NVPTXISD::Tld4R2DFloatFloat";
502  return "NVPTXISD::Tld4G2DFloatFloat";
504  return "NVPTXISD::Tld4B2DFloatFloat";
506  return "NVPTXISD::Tld4A2DFloatFloat";
508  return "NVPTXISD::Tld4R2DS64Float";
510  return "NVPTXISD::Tld4G2DS64Float";
512  return "NVPTXISD::Tld4B2DS64Float";
514  return "NVPTXISD::Tld4A2DS64Float";
516  return "NVPTXISD::Tld4R2DU64Float";
518  return "NVPTXISD::Tld4G2DU64Float";
520  return "NVPTXISD::Tld4B2DU64Float";
522  return "NVPTXISD::Tld4A2DU64Float";
523 
525  return "NVPTXISD::TexUnified1DFloatS32";
527  return "NVPTXISD::TexUnified1DFloatFloat";
529  return "NVPTXISD::TexUnified1DFloatFloatLevel";
531  return "NVPTXISD::TexUnified1DFloatFloatGrad";
533  return "NVPTXISD::TexUnified1DS32S32";
535  return "NVPTXISD::TexUnified1DS32Float";
537  return "NVPTXISD::TexUnified1DS32FloatLevel";
539  return "NVPTXISD::TexUnified1DS32FloatGrad";
541  return "NVPTXISD::TexUnified1DU32S32";
543  return "NVPTXISD::TexUnified1DU32Float";
545  return "NVPTXISD::TexUnified1DU32FloatLevel";
547  return "NVPTXISD::TexUnified1DU32FloatGrad";
549  return "NVPTXISD::TexUnified1DArrayFloatS32";
551  return "NVPTXISD::TexUnified1DArrayFloatFloat";
553  return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
555  return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
557  return "NVPTXISD::TexUnified1DArrayS32S32";
559  return "NVPTXISD::TexUnified1DArrayS32Float";
561  return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
563  return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
565  return "NVPTXISD::TexUnified1DArrayU32S32";
567  return "NVPTXISD::TexUnified1DArrayU32Float";
569  return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
571  return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
573  return "NVPTXISD::TexUnified2DFloatS32";
575  return "NVPTXISD::TexUnified2DFloatFloat";
577  return "NVPTXISD::TexUnified2DFloatFloatLevel";
579  return "NVPTXISD::TexUnified2DFloatFloatGrad";
581  return "NVPTXISD::TexUnified2DS32S32";
583  return "NVPTXISD::TexUnified2DS32Float";
585  return "NVPTXISD::TexUnified2DS32FloatLevel";
587  return "NVPTXISD::TexUnified2DS32FloatGrad";
589  return "NVPTXISD::TexUnified2DU32S32";
591  return "NVPTXISD::TexUnified2DU32Float";
593  return "NVPTXISD::TexUnified2DU32FloatLevel";
595  return "NVPTXISD::TexUnified2DU32FloatGrad";
597  return "NVPTXISD::TexUnified2DArrayFloatS32";
599  return "NVPTXISD::TexUnified2DArrayFloatFloat";
601  return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
603  return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
605  return "NVPTXISD::TexUnified2DArrayS32S32";
607  return "NVPTXISD::TexUnified2DArrayS32Float";
609  return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
611  return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
613  return "NVPTXISD::TexUnified2DArrayU32S32";
615  return "NVPTXISD::TexUnified2DArrayU32Float";
617  return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
619  return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
621  return "NVPTXISD::TexUnified3DFloatS32";
623  return "NVPTXISD::TexUnified3DFloatFloat";
625  return "NVPTXISD::TexUnified3DFloatFloatLevel";
627  return "NVPTXISD::TexUnified3DFloatFloatGrad";
629  return "NVPTXISD::TexUnified3DS32S32";
631  return "NVPTXISD::TexUnified3DS32Float";
633  return "NVPTXISD::TexUnified3DS32FloatLevel";
635  return "NVPTXISD::TexUnified3DS32FloatGrad";
637  return "NVPTXISD::TexUnified3DU32S32";
639  return "NVPTXISD::TexUnified3DU32Float";
641  return "NVPTXISD::TexUnified3DU32FloatLevel";
643  return "NVPTXISD::TexUnified3DU32FloatGrad";
645  return "NVPTXISD::TexUnifiedCubeFloatFloat";
647  return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
649  return "NVPTXISD::TexUnifiedCubeS32Float";
651  return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
653  return "NVPTXISD::TexUnifiedCubeU32Float";
655  return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
657  return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
659  return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
661  return "NVPTXISD::TexUnifiedCubeArrayS32Float";
663  return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
665  return "NVPTXISD::TexUnifiedCubeArrayU32Float";
667  return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
669  return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
671  return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
673  return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
675  return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
677  return "NVPTXISD::Tld4UnifiedR2DS64Float";
679  return "NVPTXISD::Tld4UnifiedG2DS64Float";
681  return "NVPTXISD::Tld4UnifiedB2DS64Float";
683  return "NVPTXISD::Tld4UnifiedA2DS64Float";
685  return "NVPTXISD::Tld4UnifiedR2DU64Float";
687  return "NVPTXISD::Tld4UnifiedG2DU64Float";
689  return "NVPTXISD::Tld4UnifiedB2DU64Float";
691  return "NVPTXISD::Tld4UnifiedA2DU64Float";
692 
693  case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
694  case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
695  case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
696  case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
697  case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
698  case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
699  case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
700  case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
701  case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
702  case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
703  case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
704 
705  case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
706  case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
707  case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
708  case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
709  case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
710  case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
711  case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
712  case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
713  case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
714  case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
715  case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
716 
717  case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
718  case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
719  case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
720  case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
721  case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
722  case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
723  case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
724  case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
725  case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
726  case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
727  case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
728 
729  case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
730  case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
731  case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
732  case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
733  case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
734  case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
735  case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
736  case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
737  case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
738  case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
739  case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
740 
741  case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
742  case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
743  case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
744  case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
745  case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
746  case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
747  case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
748  case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
749  case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
750  case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
751  case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
752 
753  case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
754  case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
755  case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
756  case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
757  case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
758  case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
759  case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
760  case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
761  case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
762  case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
763  case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
764 
765  case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
766  case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
767  case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
768  case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
769  case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
770  case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
771  case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
772  case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
773  case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
774  case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
775  case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
776 
777  case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
778  case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
779  case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
780  case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
781  case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
782  case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
783  case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
784  case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
785  case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
786  case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
787  case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
788 
789  case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
790  case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
791  case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
792  case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
793  case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
794  case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
795  case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
796  case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
797  case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
798  case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
799  case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
800 
801  case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
802  case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
803  case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
804  case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
805  case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
806  case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
807  case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
808  case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
809  case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
810  case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
811  case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
812 
813  case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
814  case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
815  case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
816  case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
817  case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
818  case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
819  case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
820  case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
821  case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
822  case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
823  case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
824 
825  case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
826  case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
827  case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
828  case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
829  case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
830  case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
831  case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
832  case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
833  case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
834  case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
835  case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
836 
837  case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
838  case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
839  case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
840  case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
841  case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
842  case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
843  case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
844  case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
845  case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
846  case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
847  case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
848 
849  case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
850  case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
851  case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
852  case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
853  case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
854  case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
855  case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
856  case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
857  case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
858  case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
859  case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
860 
861  case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
862  case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
863  case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
864  case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
865  case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
866  case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
867  case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
868  case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
869  case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
870  case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
871  case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
872  }
873  return nullptr;
874 }
875 
878  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
879  return TypeSplitVector;
880 
882 }
883 
884 SDValue
886  SDLoc dl(Op);
887  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
888  auto PtrVT = getPointerTy(DAG.getDataLayout());
889  Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
890  return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
891 }
892 
894  const DataLayout &DL, Type *retTy, const ArgListTy &Args,
895  const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
896  const ImmutableCallSite *CS) const {
897  auto PtrVT = getPointerTy(DL);
898 
899  bool isABI = (STI.getSmVersion() >= 20);
900  assert(isABI && "Non-ABI compilation is not supported");
901  if (!isABI)
902  return "";
903 
904  std::stringstream O;
905  O << "prototype_" << uniqueCallSite << " : .callprototype ";
906 
907  if (retTy->getTypeID() == Type::VoidTyID) {
908  O << "()";
909  } else {
910  O << "(";
911  if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
912  unsigned size = 0;
913  if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
914  size = ITy->getBitWidth();
915  if (size < 32)
916  size = 32;
917  } else {
918  assert(retTy->isFloatingPointTy() &&
919  "Floating point type expected here");
920  size = retTy->getPrimitiveSizeInBits();
921  }
922 
923  O << ".param .b" << size << " _";
924  } else if (isa<PointerType>(retTy)) {
925  O << ".param .b" << PtrVT.getSizeInBits() << " _";
926  } else if ((retTy->getTypeID() == Type::StructTyID) ||
927  isa<VectorType>(retTy)) {
928  auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
929  O << ".param .align " << retAlignment << " .b8 _["
930  << DL.getTypeAllocSize(retTy) << "]";
931  } else {
932  llvm_unreachable("Unknown return type");
933  }
934  O << ") ";
935  }
936  O << "_ (";
937 
938  bool first = true;
939 
940  unsigned OIdx = 0;
941  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
942  Type *Ty = Args[i].Ty;
943  if (!first) {
944  O << ", ";
945  }
946  first = false;
947 
948  if (!Outs[OIdx].Flags.isByVal()) {
949  if (Ty->isAggregateType() || Ty->isVectorTy()) {
950  unsigned align = 0;
951  const CallInst *CallI = cast<CallInst>(CS->getInstruction());
952  // +1 because index 0 is reserved for return type alignment
953  if (!llvm::getAlign(*CallI, i + 1, align))
954  align = DL.getABITypeAlignment(Ty);
955  unsigned sz = DL.getTypeAllocSize(Ty);
956  O << ".param .align " << align << " .b8 ";
957  O << "_";
958  O << "[" << sz << "]";
959  // update the index for Outs
960  SmallVector<EVT, 16> vtparts;
961  ComputeValueVTs(*this, DL, Ty, vtparts);
962  if (unsigned len = vtparts.size())
963  OIdx += len - 1;
964  continue;
965  }
966  // i8 types in IR will be i16 types in SDAG
967  assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
968  (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
969  "type mismatch between callee prototype and arguments");
970  // scalar type
971  unsigned sz = 0;
972  if (isa<IntegerType>(Ty)) {
973  sz = cast<IntegerType>(Ty)->getBitWidth();
974  if (sz < 32)
975  sz = 32;
976  } else if (isa<PointerType>(Ty))
977  sz = PtrVT.getSizeInBits();
978  else
979  sz = Ty->getPrimitiveSizeInBits();
980  O << ".param .b" << sz << " ";
981  O << "_";
982  continue;
983  }
984  const PointerType *PTy = dyn_cast<PointerType>(Ty);
985  assert(PTy && "Param with byval attribute should be a pointer type");
986  Type *ETy = PTy->getElementType();
987 
988  unsigned align = Outs[OIdx].Flags.getByValAlign();
989  unsigned sz = DL.getTypeAllocSize(ETy);
990  O << ".param .align " << align << " .b8 ";
991  O << "_";
992  O << "[" << sz << "]";
993  }
994  O << ");";
995  return O.str();
996 }
997 
998 unsigned
999 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
1000  const ImmutableCallSite *CS,
1001  Type *Ty,
1002  unsigned Idx) const {
1003  unsigned Align = 0;
1004  const Value *DirectCallee = CS->getCalledFunction();
1005 
1006  if (!DirectCallee) {
1007  // We don't have a direct function symbol, but that may be because of
1008  // constant cast instructions in the call.
1009  const Instruction *CalleeI = CS->getInstruction();
1010  assert(CalleeI && "Call target is not a function or derived value?");
1011 
1012  // With bitcast'd call targets, the instruction will be the call
1013  if (isa<CallInst>(CalleeI)) {
1014  // Check if we have call alignment metadata
1015  if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1016  return Align;
1017 
1018  const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1019  // Ignore any bitcast instructions
1020  while(isa<ConstantExpr>(CalleeV)) {
1021  const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
1022  if (!CE->isCast())
1023  break;
1024  // Look through the bitcast
1025  CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1026  }
1027 
1028  // We have now looked past all of the bitcasts. Do we finally have a
1029  // Function?
1030  if (isa<Function>(CalleeV))
1031  DirectCallee = CalleeV;
1032  }
1033  }
1034 
1035  // Check for function alignment information if we found that the
1036  // ultimate target is a Function
1037  if (DirectCallee)
1038  if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
1039  return Align;
1040 
1041  // Call is indirect or alignment information is not available, fall back to
1042  // the ABI type alignment
1043  auto &DL = CS->getCaller()->getParent()->getDataLayout();
1044  return DL.getABITypeAlignment(Ty);
1045 }
1046 
1048  SmallVectorImpl<SDValue> &InVals) const {
1049  SelectionDAG &DAG = CLI.DAG;
1050  SDLoc dl = CLI.DL;
1052  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1054  SDValue Chain = CLI.Chain;
1055  SDValue Callee = CLI.Callee;
1056  bool &isTailCall = CLI.IsTailCall;
1057  ArgListTy &Args = CLI.getArgs();
1058  Type *retTy = CLI.RetTy;
1059  ImmutableCallSite *CS = CLI.CS;
1060 
1061  bool isABI = (STI.getSmVersion() >= 20);
1062  assert(isABI && "Non-ABI compilation is not supported");
1063  if (!isABI)
1064  return Chain;
1065  MachineFunction &MF = DAG.getMachineFunction();
1066  const Function *F = MF.getFunction();
1067  auto &DL = MF.getDataLayout();
1068 
1069  SDValue tempChain = Chain;
1070  Chain = DAG.getCALLSEQ_START(Chain,
1071  DAG.getIntPtrConstant(uniqueCallSite, dl, true),
1072  dl);
1073  SDValue InFlag = Chain.getValue(1);
1074 
1075  unsigned paramCount = 0;
1076  // Args.size() and Outs.size() need not match.
1077  // Outs.size() will be larger
1078  // * if there is an aggregate argument with multiple fields (each field
1079  // showing up separately in Outs)
1080  // * if there is a vector argument with more than typical vector-length
1081  // elements (generally if more than 4) where each vector element is
1082  // individually present in Outs.
1083  // So a different index should be used for indexing into Outs/OutVals.
1084  // See similar issue in LowerFormalArguments.
1085  unsigned OIdx = 0;
1086  // Declare the .params or .reg need to pass values
1087  // to the function
1088  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1089  EVT VT = Outs[OIdx].VT;
1090  Type *Ty = Args[i].Ty;
1091 
1092  if (!Outs[OIdx].Flags.isByVal()) {
1093  if (Ty->isAggregateType()) {
1094  // aggregate
1095  SmallVector<EVT, 16> vtparts;
1097  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
1098  0);
1099 
1100  unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1101  // declare .param .align <align> .b8 .param<n>[<size>];
1102  unsigned sz = DL.getTypeAllocSize(Ty);
1103  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1104  SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
1105  MVT::i32),
1106  DAG.getConstant(paramCount, dl, MVT::i32),
1107  DAG.getConstant(sz, dl, MVT::i32),
1108  InFlag };
1109  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1110  DeclareParamOps);
1111  InFlag = Chain.getValue(1);
1112  for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1113  EVT elemtype = vtparts[j];
1114  unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
1115  if (elemtype.isInteger() && (sz < 8))
1116  sz = 8;
1117  SDValue StVal = OutVals[OIdx];
1118  if (elemtype.getSizeInBits() < 16) {
1119  StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
1120  }
1121  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1122  SDValue CopyParamOps[] = { Chain,
1123  DAG.getConstant(paramCount, dl, MVT::i32),
1124  DAG.getConstant(Offsets[j], dl, MVT::i32),
1125  StVal, InFlag };
1126  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1127  CopyParamVTs, CopyParamOps,
1128  elemtype, MachinePointerInfo(),
1129  ArgAlign);
1130  InFlag = Chain.getValue(1);
1131  ++OIdx;
1132  }
1133  if (vtparts.size() > 0)
1134  --OIdx;
1135  ++paramCount;
1136  continue;
1137  }
1138  if (Ty->isVectorTy()) {
1139  EVT ObjectVT = getValueType(DL, Ty);
1140  unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1141  // declare .param .align <align> .b8 .param<n>[<size>];
1142  unsigned sz = DL.getTypeAllocSize(Ty);
1143  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1144  SDValue DeclareParamOps[] = { Chain,
1145  DAG.getConstant(align, dl, MVT::i32),
1146  DAG.getConstant(paramCount, dl, MVT::i32),
1147  DAG.getConstant(sz, dl, MVT::i32),
1148  InFlag };
1149  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1150  DeclareParamOps);
1151  InFlag = Chain.getValue(1);
1152  unsigned NumElts = ObjectVT.getVectorNumElements();
1153  EVT EltVT = ObjectVT.getVectorElementType();
1154  EVT MemVT = EltVT;
1155  bool NeedExtend = false;
1156  if (EltVT.getSizeInBits() < 16) {
1157  NeedExtend = true;
1158  EltVT = MVT::i16;
1159  }
1160 
1161  // V1 store
1162  if (NumElts == 1) {
1163  SDValue Elt = OutVals[OIdx++];
1164  if (NeedExtend)
1165  Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
1166 
1167  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1168  SDValue CopyParamOps[] = { Chain,
1169  DAG.getConstant(paramCount, dl, MVT::i32),
1170  DAG.getConstant(0, dl, MVT::i32), Elt,
1171  InFlag };
1172  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1173  CopyParamVTs, CopyParamOps,
1174  MemVT, MachinePointerInfo());
1175  InFlag = Chain.getValue(1);
1176  } else if (NumElts == 2) {
1177  SDValue Elt0 = OutVals[OIdx++];
1178  SDValue Elt1 = OutVals[OIdx++];
1179  if (NeedExtend) {
1180  Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
1181  Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
1182  }
1183 
1184  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1185  SDValue CopyParamOps[] = { Chain,
1186  DAG.getConstant(paramCount, dl, MVT::i32),
1187  DAG.getConstant(0, dl, MVT::i32), Elt0,
1188  Elt1, InFlag };
1190  CopyParamVTs, CopyParamOps,
1191  MemVT, MachinePointerInfo());
1192  InFlag = Chain.getValue(1);
1193  } else {
1194  unsigned curOffset = 0;
1195  // V4 stores
1196  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
1197  // the
1198  // vector will be expanded to a power of 2 elements, so we know we can
1199  // always round up to the next multiple of 4 when creating the vector
1200  // stores.
1201  // e.g. 4 elem => 1 st.v4
1202  // 6 elem => 2 st.v4
1203  // 8 elem => 2 st.v4
1204  // 11 elem => 3 st.v4
1205  unsigned VecSize = 4;
1206  if (EltVT.getSizeInBits() == 64)
1207  VecSize = 2;
1208 
1209  // This is potentially only part of a vector, so assume all elements
1210  // are packed together.
1211  unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
1212 
1213  for (unsigned i = 0; i < NumElts; i += VecSize) {
1214  // Get values
1215  SDValue StoreVal;
1217  Ops.push_back(Chain);
1218  Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
1219  Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
1220 
1221  unsigned Opc = NVPTXISD::StoreParamV2;
1222 
1223  StoreVal = OutVals[OIdx++];
1224  if (NeedExtend)
1225  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1226  Ops.push_back(StoreVal);
1227 
1228  if (i + 1 < NumElts) {
1229  StoreVal = OutVals[OIdx++];
1230  if (NeedExtend)
1231  StoreVal =
1232  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1233  } else {
1234  StoreVal = DAG.getUNDEF(EltVT);
1235  }
1236  Ops.push_back(StoreVal);
1237 
1238  if (VecSize == 4) {
1239  Opc = NVPTXISD::StoreParamV4;
1240  if (i + 2 < NumElts) {
1241  StoreVal = OutVals[OIdx++];
1242  if (NeedExtend)
1243  StoreVal =
1244  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1245  } else {
1246  StoreVal = DAG.getUNDEF(EltVT);
1247  }
1248  Ops.push_back(StoreVal);
1249 
1250  if (i + 3 < NumElts) {
1251  StoreVal = OutVals[OIdx++];
1252  if (NeedExtend)
1253  StoreVal =
1254  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1255  } else {
1256  StoreVal = DAG.getUNDEF(EltVT);
1257  }
1258  Ops.push_back(StoreVal);
1259  }
1260 
1261  Ops.push_back(InFlag);
1262 
1263  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1264  Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
1265  MemVT, MachinePointerInfo());
1266  InFlag = Chain.getValue(1);
1267  curOffset += PerStoreOffset;
1268  }
1269  }
1270  ++paramCount;
1271  --OIdx;
1272  continue;
1273  }
1274  // Plain scalar
1275  // for ABI, declare .param .b<size> .param<n>;
1276  unsigned sz = VT.getSizeInBits();
1277  bool needExtend = false;
1278  if (VT.isInteger()) {
1279  if (sz < 16)
1280  needExtend = true;
1281  if (sz < 32)
1282  sz = 32;
1283  }
1284  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1285  SDValue DeclareParamOps[] = { Chain,
1286  DAG.getConstant(paramCount, dl, MVT::i32),
1287  DAG.getConstant(sz, dl, MVT::i32),
1288  DAG.getConstant(0, dl, MVT::i32), InFlag };
1289  Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
1290  DeclareParamOps);
1291  InFlag = Chain.getValue(1);
1292  SDValue OutV = OutVals[OIdx];
1293  if (needExtend) {
1294  // zext/sext i1 to i16
1295  unsigned opc = ISD::ZERO_EXTEND;
1296  if (Outs[OIdx].Flags.isSExt())
1297  opc = ISD::SIGN_EXTEND;
1298  OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
1299  }
1300  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1301  SDValue CopyParamOps[] = { Chain,
1302  DAG.getConstant(paramCount, dl, MVT::i32),
1303  DAG.getConstant(0, dl, MVT::i32), OutV,
1304  InFlag };
1305 
1306  unsigned opcode = NVPTXISD::StoreParam;
1307  if (Outs[OIdx].Flags.isZExt())
1308  opcode = NVPTXISD::StoreParamU32;
1309  else if (Outs[OIdx].Flags.isSExt())
1310  opcode = NVPTXISD::StoreParamS32;
1311  Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
1312  VT, MachinePointerInfo());
1313 
1314  InFlag = Chain.getValue(1);
1315  ++paramCount;
1316  continue;
1317  }
1318  // struct or vector
1319  SmallVector<EVT, 16> vtparts;
1321  const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
1322  assert(PTy && "Type of a byval parameter should be pointer");
1323  ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
1324  vtparts, &Offsets, 0);
1325 
1326  // declare .param .align <align> .b8 .param<n>[<size>];
1327  unsigned sz = Outs[OIdx].Flags.getByValSize();
1328  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1329  unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1330  // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
1331  // so we don't need to worry about natural alignment or not.
1332  // See TargetLowering::LowerCallTo().
1333  SDValue DeclareParamOps[] = {
1334  Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
1335  DAG.getConstant(paramCount, dl, MVT::i32),
1336  DAG.getConstant(sz, dl, MVT::i32), InFlag
1337  };
1338  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1339  DeclareParamOps);
1340  InFlag = Chain.getValue(1);
1341  for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1342  EVT elemtype = vtparts[j];
1343  int curOffset = Offsets[j];
1344  unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
1345  auto PtrVT = getPointerTy(DAG.getDataLayout());
1346  SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
1347  DAG.getConstant(curOffset, dl, PtrVT));
1348  SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
1349  MachinePointerInfo(), false, false, false,
1350  PartAlign);
1351  if (elemtype.getSizeInBits() < 16) {
1352  theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
1353  }
1354  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1355  SDValue CopyParamOps[] = { Chain,
1356  DAG.getConstant(paramCount, dl, MVT::i32),
1357  DAG.getConstant(curOffset, dl, MVT::i32),
1358  theVal, InFlag };
1359  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
1360  CopyParamOps, elemtype,
1361  MachinePointerInfo());
1362 
1363  InFlag = Chain.getValue(1);
1364  }
1365  ++paramCount;
1366  }
1367 
1369  unsigned retAlignment = 0;
1370 
1371  // Handle Result
1372  if (Ins.size() > 0) {
1373  SmallVector<EVT, 16> resvtparts;
1374  ComputeValueVTs(*this, DL, retTy, resvtparts);
1375 
1376  // Declare
1377  // .param .align 16 .b8 retval0[<size-in-bytes>], or
1378  // .param .b<size-in-bits> retval0
1379  unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
1380  // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
1381  // these three types to match the logic in
1382  // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
1383  // Plus, this behavior is consistent with nvcc's.
1384  if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
1385  retTy->isPointerTy()) {
1386  // Scalar needs to be at least 32bit wide
1387  if (resultsz < 32)
1388  resultsz = 32;
1389  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1390  SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1391  DAG.getConstant(resultsz, dl, MVT::i32),
1392  DAG.getConstant(0, dl, MVT::i32), InFlag };
1393  Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
1394  DeclareRetOps);
1395  InFlag = Chain.getValue(1);
1396  } else {
1397  retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
1398  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1399  SDValue DeclareRetOps[] = { Chain,
1400  DAG.getConstant(retAlignment, dl, MVT::i32),
1401  DAG.getConstant(resultsz / 8, dl, MVT::i32),
1402  DAG.getConstant(0, dl, MVT::i32), InFlag };
1403  Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
1404  DeclareRetOps);
1405  InFlag = Chain.getValue(1);
1406  }
1407  }
1408 
1409  if (!Func) {
1410  // This is indirect function call case : PTX requires a prototype of the
1411  // form
1412  // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
1413  // to be emitted, and the label has to used as the last arg of call
1414  // instruction.
1415  // The prototype is embedded in a string and put as the operand for a
1416  // CallPrototype SDNode which will print out to the value of the string.
1417  SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1418  std::string Proto =
1419  getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
1420  const char *ProtoStr =
1421  nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
1422  SDValue ProtoOps[] = {
1423  Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
1424  };
1425  Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
1426  InFlag = Chain.getValue(1);
1427  }
1428  // Op to just print "call"
1429  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1430  SDValue PrintCallOps[] = {
1431  Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
1432  };
1433  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
1434  dl, PrintCallVTs, PrintCallOps);
1435  InFlag = Chain.getValue(1);
1436 
1437  // Ops to print out the function name
1438  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1439  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1440  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
1441  InFlag = Chain.getValue(1);
1442 
1443  // Ops to print out the param list
1444  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1445  SDValue CallArgBeginOps[] = { Chain, InFlag };
1446  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
1447  CallArgBeginOps);
1448  InFlag = Chain.getValue(1);
1449 
1450  for (unsigned i = 0, e = paramCount; i != e; ++i) {
1451  unsigned opcode;
1452  if (i == (e - 1))
1453  opcode = NVPTXISD::LastCallArg;
1454  else
1455  opcode = NVPTXISD::CallArg;
1456  SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1457  SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1458  DAG.getConstant(i, dl, MVT::i32), InFlag };
1459  Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
1460  InFlag = Chain.getValue(1);
1461  }
1462  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1463  SDValue CallArgEndOps[] = { Chain,
1464  DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
1465  InFlag };
1466  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
1467  InFlag = Chain.getValue(1);
1468 
1469  if (!Func) {
1470  SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1471  SDValue PrototypeOps[] = { Chain,
1473  InFlag };
1474  Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
1475  InFlag = Chain.getValue(1);
1476  }
1477 
1478  // Generate loads from param memory/moves from registers for result
1479  if (Ins.size() > 0) {
1480  if (retTy && retTy->isVectorTy()) {
1481  EVT ObjectVT = getValueType(DL, retTy);
1482  unsigned NumElts = ObjectVT.getVectorNumElements();
1483  EVT EltVT = ObjectVT.getVectorElementType();
1484  assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
1485  ObjectVT) == NumElts &&
1486  "Vector was not scalarized");
1487  unsigned sz = EltVT.getSizeInBits();
1488  bool needTruncate = sz < 8;
1489 
1490  if (NumElts == 1) {
1491  // Just a simple load
1492  SmallVector<EVT, 4> LoadRetVTs;
1493  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1494  // If loading i1/i8 result, generate
1495  // load.b8 i16
1496  // if i1
1497  // trunc i16 to i1
1498  LoadRetVTs.push_back(MVT::i16);
1499  } else
1500  LoadRetVTs.push_back(EltVT);
1501  LoadRetVTs.push_back(MVT::Other);
1502  LoadRetVTs.push_back(MVT::Glue);
1503  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1504  DAG.getConstant(0, dl, MVT::i32), InFlag};
1505  SDValue retval = DAG.getMemIntrinsicNode(
1506  NVPTXISD::LoadParam, dl,
1507  DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1508  Chain = retval.getValue(1);
1509  InFlag = retval.getValue(2);
1510  SDValue Ret0 = retval;
1511  if (needTruncate)
1512  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
1513  InVals.push_back(Ret0);
1514  } else if (NumElts == 2) {
1515  // LoadV2
1516  SmallVector<EVT, 4> LoadRetVTs;
1517  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1518  // If loading i1/i8 result, generate
1519  // load.b8 i16
1520  // if i1
1521  // trunc i16 to i1
1522  LoadRetVTs.push_back(MVT::i16);
1523  LoadRetVTs.push_back(MVT::i16);
1524  } else {
1525  LoadRetVTs.push_back(EltVT);
1526  LoadRetVTs.push_back(EltVT);
1527  }
1528  LoadRetVTs.push_back(MVT::Other);
1529  LoadRetVTs.push_back(MVT::Glue);
1530  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1531  DAG.getConstant(0, dl, MVT::i32), InFlag};
1532  SDValue retval = DAG.getMemIntrinsicNode(
1534  DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1535  Chain = retval.getValue(2);
1536  InFlag = retval.getValue(3);
1537  SDValue Ret0 = retval.getValue(0);
1538  SDValue Ret1 = retval.getValue(1);
1539  if (needTruncate) {
1540  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
1541  InVals.push_back(Ret0);
1542  Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
1543  InVals.push_back(Ret1);
1544  } else {
1545  InVals.push_back(Ret0);
1546  InVals.push_back(Ret1);
1547  }
1548  } else {
1549  // Split into N LoadV4
1550  unsigned Ofst = 0;
1551  unsigned VecSize = 4;
1552  unsigned Opc = NVPTXISD::LoadParamV4;
1553  if (EltVT.getSizeInBits() == 64) {
1554  VecSize = 2;
1555  Opc = NVPTXISD::LoadParamV2;
1556  }
1557  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
1558  for (unsigned i = 0; i < NumElts; i += VecSize) {
1559  SmallVector<EVT, 8> LoadRetVTs;
1560  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1561  // If loading i1/i8 result, generate
1562  // load.b8 i16
1563  // if i1
1564  // trunc i16 to i1
1565  for (unsigned j = 0; j < VecSize; ++j)
1566  LoadRetVTs.push_back(MVT::i16);
1567  } else {
1568  for (unsigned j = 0; j < VecSize; ++j)
1569  LoadRetVTs.push_back(EltVT);
1570  }
1571  LoadRetVTs.push_back(MVT::Other);
1572  LoadRetVTs.push_back(MVT::Glue);
1573  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1574  DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
1575  SDValue retval = DAG.getMemIntrinsicNode(
1576  Opc, dl, DAG.getVTList(LoadRetVTs),
1577  LoadRetOps, EltVT, MachinePointerInfo());
1578  if (VecSize == 2) {
1579  Chain = retval.getValue(2);
1580  InFlag = retval.getValue(3);
1581  } else {
1582  Chain = retval.getValue(4);
1583  InFlag = retval.getValue(5);
1584  }
1585 
1586  for (unsigned j = 0; j < VecSize; ++j) {
1587  if (i + j >= NumElts)
1588  break;
1589  SDValue Elt = retval.getValue(j);
1590  if (needTruncate)
1591  Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
1592  InVals.push_back(Elt);
1593  }
1594  Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1595  }
1596  }
1597  } else {
1600  ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
1601  assert(VTs.size() == Ins.size() && "Bad value decomposition");
1602  unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
1603  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
1604  unsigned sz = VTs[i].getSizeInBits();
1605  unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
1606  bool needTruncate = sz < 8;
1607  if (VTs[i].isInteger() && (sz < 8))
1608  sz = 8;
1609 
1610  SmallVector<EVT, 4> LoadRetVTs;
1611  EVT TheLoadType = VTs[i];
1612  if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
1613  // This is for integer types only, and specifically not for
1614  // aggregates.
1615  LoadRetVTs.push_back(MVT::i32);
1616  TheLoadType = MVT::i32;
1617  } else if (sz < 16) {
1618  // If loading i1/i8 result, generate
1619  // load i8 (-> i16)
1620  // trunc i16 to i1/i8
1621  LoadRetVTs.push_back(MVT::i16);
1622  } else
1623  LoadRetVTs.push_back(Ins[i].VT);
1624  LoadRetVTs.push_back(MVT::Other);
1625  LoadRetVTs.push_back(MVT::Glue);
1626 
1627  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1628  DAG.getConstant(Offsets[i], dl, MVT::i32),
1629  InFlag};
1630  SDValue retval = DAG.getMemIntrinsicNode(
1631  NVPTXISD::LoadParam, dl,
1632  DAG.getVTList(LoadRetVTs), LoadRetOps,
1633  TheLoadType, MachinePointerInfo(), AlignI);
1634  Chain = retval.getValue(1);
1635  InFlag = retval.getValue(2);
1636  SDValue Ret0 = retval.getValue(0);
1637  if (needTruncate)
1638  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
1639  InVals.push_back(Ret0);
1640  }
1641  }
1642  }
1643 
1644  Chain = DAG.getCALLSEQ_END(Chain,
1645  DAG.getIntPtrConstant(uniqueCallSite, dl, true),
1646  DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
1647  true),
1648  InFlag, dl);
1649  uniqueCallSite++;
1650 
1651  // set isTailCall to false for now, until we figure out how to express
1652  // tail call optimization in PTX
1653  isTailCall = false;
1654  return Chain;
1655 }
1656 
1657 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
1658 // (see LegalizeDAG.cpp). This is slow and uses local memory.
1659 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
1660 SDValue
1661 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
1662  SDNode *Node = Op.getNode();
1663  SDLoc dl(Node);
1665  unsigned NumOperands = Node->getNumOperands();
1666  for (unsigned i = 0; i < NumOperands; ++i) {
1667  SDValue SubOp = Node->getOperand(i);
1668  EVT VVT = SubOp.getNode()->getValueType(0);
1669  EVT EltVT = VVT.getVectorElementType();
1670  unsigned NumSubElem = VVT.getVectorNumElements();
1671  for (unsigned j = 0; j < NumSubElem; ++j) {
1672  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
1673  DAG.getIntPtrConstant(j, dl)));
1674  }
1675  }
1676  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
1677 }
1678 
1679 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
1680 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1681 /// amount, or
1682 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1683 /// amount.
1684 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
1685  SelectionDAG &DAG) const {
1686  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1687  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
1688 
1689  EVT VT = Op.getValueType();
1690  unsigned VTBits = VT.getSizeInBits();
1691  SDLoc dl(Op);
1692  SDValue ShOpLo = Op.getOperand(0);
1693  SDValue ShOpHi = Op.getOperand(1);
1694  SDValue ShAmt = Op.getOperand(2);
1695  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
1696 
1697  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1698 
1699  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1700  // {dHi, dLo} = {aHi, aLo} >> Amt
1701  // dHi = aHi >> Amt
1702  // dLo = shf.r.clamp aLo, aHi, Amt
1703 
1704  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1705  SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
1706  ShAmt);
1707 
1708  SDValue Ops[2] = { Lo, Hi };
1709  return DAG.getMergeValues(Ops, dl);
1710  }
1711  else {
1712 
1713  // {dHi, dLo} = {aHi, aLo} >> Amt
1714  // - if (Amt>=size) then
1715  // dLo = aHi >> (Amt-size)
1716  // dHi = aHi >> Amt (this is either all 0 or all 1)
1717  // else
1718  // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
1719  // dHi = aHi >> Amt
1720 
1721  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1722  DAG.getConstant(VTBits, dl, MVT::i32),
1723  ShAmt);
1724  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
1725  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1726  DAG.getConstant(VTBits, dl, MVT::i32));
1727  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
1728  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1729  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
1730 
1731  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1732  DAG.getConstant(VTBits, dl, MVT::i32),
1733  ISD::SETGE);
1734  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1735  SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1736 
1737  SDValue Ops[2] = { Lo, Hi };
1738  return DAG.getMergeValues(Ops, dl);
1739  }
1740 }
1741 
1742 /// LowerShiftLeftParts - Lower SHL_PARTS, which
1743 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1744 /// amount, or
1745 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1746 /// amount.
1747 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
1748  SelectionDAG &DAG) const {
1749  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1750  assert(Op.getOpcode() == ISD::SHL_PARTS);
1751 
1752  EVT VT = Op.getValueType();
1753  unsigned VTBits = VT.getSizeInBits();
1754  SDLoc dl(Op);
1755  SDValue ShOpLo = Op.getOperand(0);
1756  SDValue ShOpHi = Op.getOperand(1);
1757  SDValue ShAmt = Op.getOperand(2);
1758 
1759  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1760 
1761  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1762  // {dHi, dLo} = {aHi, aLo} << Amt
1763  // dHi = shf.l.clamp aLo, aHi, Amt
1764  // dLo = aLo << Amt
1765 
1766  SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
1767  ShAmt);
1768  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1769 
1770  SDValue Ops[2] = { Lo, Hi };
1771  return DAG.getMergeValues(Ops, dl);
1772  }
1773  else {
1774 
1775  // {dHi, dLo} = {aHi, aLo} << Amt
1776  // - if (Amt>=size) then
1777  // dLo = aLo << Amt (all 0)
1778  // dLo = aLo << (Amt-size)
1779  // else
1780  // dLo = aLo << Amt
1781  // dHi = (aHi << Amt) | (aLo >> (size-Amt))
1782 
1783  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1784  DAG.getConstant(VTBits, dl, MVT::i32),
1785  ShAmt);
1786  SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1787  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1788  DAG.getConstant(VTBits, dl, MVT::i32));
1789  SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1790  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1791  SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1792 
1793  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1794  DAG.getConstant(VTBits, dl, MVT::i32),
1795  ISD::SETGE);
1796  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1797  SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1798 
1799  SDValue Ops[2] = { Lo, Hi };
1800  return DAG.getMergeValues(Ops, dl);
1801  }
1802 }
1803 
1804 SDValue
1806  switch (Op.getOpcode()) {
1807  case ISD::RETURNADDR:
1808  return SDValue();
1809  case ISD::FRAMEADDR:
1810  return SDValue();
1811  case ISD::GlobalAddress:
1812  return LowerGlobalAddress(Op, DAG);
1814  return Op;
1815  case ISD::BUILD_VECTOR:
1817  return Op;
1818  case ISD::CONCAT_VECTORS:
1819  return LowerCONCAT_VECTORS(Op, DAG);
1820  case ISD::STORE:
1821  return LowerSTORE(Op, DAG);
1822  case ISD::LOAD:
1823  return LowerLOAD(Op, DAG);
1824  case ISD::SHL_PARTS:
1825  return LowerShiftLeftParts(Op, DAG);
1826  case ISD::SRA_PARTS:
1827  case ISD::SRL_PARTS:
1828  return LowerShiftRightParts(Op, DAG);
1829  case ISD::SELECT:
1830  return LowerSelect(Op, DAG);
1831  default:
1832  llvm_unreachable("Custom lowering not defined for operation");
1833  }
1834 }
1835 
1836 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
1837  SDValue Op0 = Op->getOperand(0);
1838  SDValue Op1 = Op->getOperand(1);
1839  SDValue Op2 = Op->getOperand(2);
1840  SDLoc DL(Op.getNode());
1841 
1842  assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
1843 
1844  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
1845  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
1846  SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
1847  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
1848 
1849  return Trunc;
1850 }
1851 
1852 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1853  if (Op.getValueType() == MVT::i1)
1854  return LowerLOADi1(Op, DAG);
1855  else
1856  return SDValue();
1857 }
1858 
1859 // v = ld i1* addr
1860 // =>
1861 // v1 = ld i8* addr (-> i16)
1862 // v = trunc i16 to i1
1863 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
1864  SDNode *Node = Op.getNode();
1865  LoadSDNode *LD = cast<LoadSDNode>(Node);
1866  SDLoc dl(Node);
1867  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
1868  assert(Node->getValueType(0) == MVT::i1 &&
1869  "Custom lowering for i1 load only");
1870  SDValue newLD =
1871  DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
1872  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
1873  LD->isInvariant(), LD->getAlignment());
1874  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
1875  // The legalizer (the caller) is expecting two values from the legalized
1876  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
1877  // in LegalizeDAG.cpp which also uses MergeValues.
1878  SDValue Ops[] = { result, LD->getChain() };
1879  return DAG.getMergeValues(Ops, dl);
1880 }
1881 
1882 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1883  EVT ValVT = Op.getOperand(1).getValueType();
1884  if (ValVT == MVT::i1)
1885  return LowerSTOREi1(Op, DAG);
1886  else if (ValVT.isVector())
1887  return LowerSTOREVector(Op, DAG);
1888  else
1889  return SDValue();
1890 }
1891 
1892 SDValue
1893 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
1894  SDNode *N = Op.getNode();
1895  SDValue Val = N->getOperand(1);
1896  SDLoc DL(N);
1897  EVT ValVT = Val.getValueType();
1898 
1899  if (ValVT.isVector()) {
1900  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1901  // legal. We can (and should) split that into 2 stores of <2 x double> here
1902  // but I'm leaving that as a TODO for now.
1903  if (!ValVT.isSimple())
1904  return SDValue();
1905  switch (ValVT.getSimpleVT().SimpleTy) {
1906  default:
1907  return SDValue();
1908  case MVT::v2i8:
1909  case MVT::v2i16:
1910  case MVT::v2i32:
1911  case MVT::v2i64:
1912  case MVT::v2f32:
1913  case MVT::v2f64:
1914  case MVT::v4i8:
1915  case MVT::v4i16:
1916  case MVT::v4i32:
1917  case MVT::v4f32:
1918  // This is a "native" vector type
1919  break;
1920  }
1921 
1922  MemSDNode *MemSD = cast<MemSDNode>(N);
1923  const DataLayout &TD = DAG.getDataLayout();
1924 
1925  unsigned Align = MemSD->getAlignment();
1926  unsigned PrefAlign =
1927  TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
1928  if (Align < PrefAlign) {
1929  // This store is not sufficiently aligned, so bail out and let this vector
1930  // store be scalarized. Note that we may still be able to emit smaller
1931  // vector stores. For example, if we are storing a <4 x float> with an
1932  // alignment of 8, this check will fail but the legalizer will try again
1933  // with 2 x <2 x float>, which will succeed with an alignment of 8.
1934  return SDValue();
1935  }
1936 
1937  unsigned Opcode = 0;
1938  EVT EltVT = ValVT.getVectorElementType();
1939  unsigned NumElts = ValVT.getVectorNumElements();
1940 
1941  // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
1942  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
1943  // stored type to i16 and propagate the "real" type as the memory type.
1944  bool NeedExt = false;
1945  if (EltVT.getSizeInBits() < 16)
1946  NeedExt = true;
1947 
1948  switch (NumElts) {
1949  default:
1950  return SDValue();
1951  case 2:
1952  Opcode = NVPTXISD::StoreV2;
1953  break;
1954  case 4: {
1955  Opcode = NVPTXISD::StoreV4;
1956  break;
1957  }
1958  }
1959 
1961 
1962  // First is the chain
1963  Ops.push_back(N->getOperand(0));
1964 
1965  // Then the split values
1966  for (unsigned i = 0; i < NumElts; ++i) {
1967  SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
1968  DAG.getIntPtrConstant(i, DL));
1969  if (NeedExt)
1970  ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
1971  Ops.push_back(ExtVal);
1972  }
1973 
1974  // Then any remaining arguments
1975  Ops.append(N->op_begin() + 2, N->op_end());
1976 
1977  SDValue NewSt = DAG.getMemIntrinsicNode(
1978  Opcode, DL, DAG.getVTList(MVT::Other), Ops,
1979  MemSD->getMemoryVT(), MemSD->getMemOperand());
1980 
1981  //return DCI.CombineTo(N, NewSt, true);
1982  return NewSt;
1983  }
1984 
1985  return SDValue();
1986 }
1987 
1988 // st i1 v, addr
1989 // =>
1990 // v1 = zxt v to i16
1991 // st.u8 i16, addr
1992 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
1993  SDNode *Node = Op.getNode();
1994  SDLoc dl(Node);
1995  StoreSDNode *ST = cast<StoreSDNode>(Node);
1996  SDValue Tmp1 = ST->getChain();
1997  SDValue Tmp2 = ST->getBasePtr();
1998  SDValue Tmp3 = ST->getValue();
1999  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
2000  unsigned Alignment = ST->getAlignment();
2001  bool isVolatile = ST->isVolatile();
2002  bool isNonTemporal = ST->isNonTemporal();
2003  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
2004  SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
2005  ST->getPointerInfo(), MVT::i8, isNonTemporal,
2006  isVolatile, Alignment);
2007  return Result;
2008 }
2009 
2010 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
2011  int idx, EVT v) const {
2012  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
2013  std::stringstream suffix;
2014  suffix << idx;
2015  *name += suffix.str();
2016  return DAG.getTargetExternalSymbol(name->c_str(), v);
2017 }
2018 
2019 SDValue
2020 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
2021  std::string ParamSym;
2022  raw_string_ostream ParamStr(ParamSym);
2023 
2024  ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
2025  ParamStr.flush();
2026 
2027  std::string *SavedStr =
2028  nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
2029  return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
2030 }
2031 
2032 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
2033  return getExtSymb(DAG, ".HLPPARAM", idx);
2034 }
2035 
2036 // Check to see if the kernel argument is image*_t or sampler_t
2037 
2038 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
2039  static const char *const specialTypes[] = { "struct._image2d_t",
2040  "struct._image3d_t",
2041  "struct._sampler_t" };
2042 
2043  const Type *Ty = arg->getType();
2044  const PointerType *PTy = dyn_cast<PointerType>(Ty);
2045 
2046  if (!PTy)
2047  return false;
2048 
2049  if (!context)
2050  return false;
2051 
2052  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
2053  const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
2054 
2055  for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
2056  if (TypeName == specialTypes[i])
2057  return true;
2058 
2059  return false;
2060 }
2061 
2063  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2065  SmallVectorImpl<SDValue> &InVals) const {
2066  MachineFunction &MF = DAG.getMachineFunction();
2067  const DataLayout &DL = DAG.getDataLayout();
2068  auto PtrVT = getPointerTy(DAG.getDataLayout());
2069 
2070  const Function *F = MF.getFunction();
2071  const AttributeSet &PAL = F->getAttributes();
2072  const TargetLowering *TLI = STI.getTargetLowering();
2073 
2074  SDValue Root = DAG.getRoot();
2075  std::vector<SDValue> OutChains;
2076 
2077  bool isKernel = llvm::isKernelFunction(*F);
2078  bool isABI = (STI.getSmVersion() >= 20);
2079  assert(isABI && "Non-ABI compilation is not supported");
2080  if (!isABI)
2081  return Chain;
2082 
2083  std::vector<Type *> argTypes;
2084  std::vector<const Argument *> theArgs;
2085  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2086  I != E; ++I) {
2087  theArgs.push_back(I);
2088  argTypes.push_back(I->getType());
2089  }
2090  // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
2091  // Ins.size() will be larger
2092  // * if there is an aggregate argument with multiple fields (each field
2093  // showing up separately in Ins)
2094  // * if there is a vector argument with more than typical vector-length
2095  // elements (generally if more than 4) where each vector element is
2096  // individually present in Ins.
2097  // So a different index should be used for indexing into Ins.
2098  // See similar issue in LowerCall.
2099  unsigned InsIdx = 0;
2100 
2101  int idx = 0;
2102  for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2103  Type *Ty = argTypes[i];
2104 
2105  // If the kernel argument is image*_t or sampler_t, convert it to
2106  // a i32 constant holding the parameter position. This can later
2107  // matched in the AsmPrinter to output the correct mangled name.
2108  if (isImageOrSamplerVal(
2109  theArgs[i],
2110  (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
2111  : nullptr))) {
2112  assert(isKernel && "Only kernels can have image/sampler params");
2113  InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
2114  continue;
2115  }
2116 
2117  if (theArgs[i]->use_empty()) {
2118  // argument is dead
2119  if (Ty->isAggregateType()) {
2120  SmallVector<EVT, 16> vtparts;
2121 
2122  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
2123  assert(vtparts.size() > 0 && "empty aggregate type not expected");
2124  for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2125  ++parti) {
2126  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2127  ++InsIdx;
2128  }
2129  if (vtparts.size() > 0)
2130  --InsIdx;
2131  continue;
2132  }
2133  if (Ty->isVectorTy()) {
2134  EVT ObjectVT = getValueType(DL, Ty);
2135  unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
2136  for (unsigned parti = 0; parti < NumRegs; ++parti) {
2137  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2138  ++InsIdx;
2139  }
2140  if (NumRegs > 0)
2141  --InsIdx;
2142  continue;
2143  }
2144  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2145  continue;
2146  }
2147 
2148  // In the following cases, assign a node order of "idx+1"
2149  // to newly created nodes. The SDNodes for params have to
2150  // appear in the same order as their order of appearance
2151  // in the original function. "idx+1" holds that order.
2152  if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
2153  if (Ty->isAggregateType()) {
2154  SmallVector<EVT, 16> vtparts;
2155  SmallVector<uint64_t, 16> offsets;
2156 
2157  // NOTE: Here, we lose the ability to issue vector loads for vectors
2158  // that are a part of a struct. This should be investigated in the
2159  // future.
2160  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
2161  0);
2162  assert(vtparts.size() > 0 && "empty aggregate type not expected");
2163  bool aggregateIsPacked = false;
2164  if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
2165  aggregateIsPacked = STy->isPacked();
2166 
2167  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2168  for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2169  ++parti) {
2170  EVT partVT = vtparts[parti];
2171  Value *srcValue = Constant::getNullValue(
2172  PointerType::get(partVT.getTypeForEVT(F->getContext()),
2174  SDValue srcAddr =
2175  DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
2176  DAG.getConstant(offsets[parti], dl, PtrVT));
2177  unsigned partAlign = aggregateIsPacked
2178  ? 1
2179  : DL.getABITypeAlignment(
2180  partVT.getTypeForEVT(F->getContext()));
2181  SDValue p;
2182  if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
2183  ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2185  p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
2186  MachinePointerInfo(srcValue), partVT, false,
2187  false, false, partAlign);
2188  } else {
2189  p = DAG.getLoad(partVT, dl, Root, srcAddr,
2190  MachinePointerInfo(srcValue), false, false, false,
2191  partAlign);
2192  }
2193  if (p.getNode())
2194  p.getNode()->setIROrder(idx + 1);
2195  InVals.push_back(p);
2196  ++InsIdx;
2197  }
2198  if (vtparts.size() > 0)
2199  --InsIdx;
2200  continue;
2201  }
2202  if (Ty->isVectorTy()) {
2203  EVT ObjectVT = getValueType(DL, Ty);
2204  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2205  unsigned NumElts = ObjectVT.getVectorNumElements();
2206  assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
2207  "Vector was not scalarized");
2208  EVT EltVT = ObjectVT.getVectorElementType();
2209 
2210  // V1 load
2211  // f32 = load ...
2212  if (NumElts == 1) {
2213  // We only have one element, so just directly load it
2215  EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2216  SDValue P = DAG.getLoad(
2217  EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
2218  true,
2219  DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
2220  if (P.getNode())
2221  P.getNode()->setIROrder(idx + 1);
2222 
2223  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2224  P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
2225  InVals.push_back(P);
2226  ++InsIdx;
2227  } else if (NumElts == 2) {
2228  // V2 load
2229  // f32,f32 = load ...
2230  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
2232  VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2233  SDValue P = DAG.getLoad(
2234  VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
2235  true,
2236  DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2237  if (P.getNode())
2238  P.getNode()->setIROrder(idx + 1);
2239 
2240  SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2241  DAG.getIntPtrConstant(0, dl));
2242  SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2243  DAG.getIntPtrConstant(1, dl));
2244 
2245  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
2246  Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
2247  Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
2248  }
2249 
2250  InVals.push_back(Elt0);
2251  InVals.push_back(Elt1);
2252  InsIdx += 2;
2253  } else {
2254  // V4 loads
2255  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
2256  // the
2257  // vector will be expanded to a power of 2 elements, so we know we can
2258  // always round up to the next multiple of 4 when creating the vector
2259  // loads.
2260  // e.g. 4 elem => 1 ld.v4
2261  // 6 elem => 2 ld.v4
2262  // 8 elem => 2 ld.v4
2263  // 11 elem => 3 ld.v4
2264  unsigned VecSize = 4;
2265  if (EltVT.getSizeInBits() == 64) {
2266  VecSize = 2;
2267  }
2268  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2269  unsigned Ofst = 0;
2270  for (unsigned i = 0; i < NumElts; i += VecSize) {
2271  Value *SrcValue = Constant::getNullValue(
2272  PointerType::get(VecVT.getTypeForEVT(F->getContext()),
2274  SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
2275  DAG.getConstant(Ofst, dl, PtrVT));
2276  SDValue P = DAG.getLoad(
2277  VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
2278  false, true,
2279  DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2280  if (P.getNode())
2281  P.getNode()->setIROrder(idx + 1);
2282 
2283  for (unsigned j = 0; j < VecSize; ++j) {
2284  if (i + j >= NumElts)
2285  break;
2286  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2287  DAG.getIntPtrConstant(j, dl));
2288  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2289  Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
2290  InVals.push_back(Elt);
2291  }
2292  Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2293  }
2294  InsIdx += NumElts;
2295  }
2296 
2297  if (NumElts > 0)
2298  --InsIdx;
2299  continue;
2300  }
2301  // A plain scalar.
2302  EVT ObjectVT = getValueType(DL, Ty);
2303  // If ABI, load from the param symbol
2304  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2306  ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2307  SDValue p;
2308  if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
2309  ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2311  p = DAG.getExtLoad(
2312  ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
2313  ObjectVT, false, false, false,
2314  DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2315  } else {
2316  p = DAG.getLoad(
2317  Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
2318  false, false,
2319  DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2320  }
2321  if (p.getNode())
2322  p.getNode()->setIROrder(idx + 1);
2323  InVals.push_back(p);
2324  continue;
2325  }
2326 
2327  // Param has ByVal attribute
2328  // Return MoveParam(param symbol).
2329  // Ideally, the param symbol can be returned directly,
2330  // but when SDNode builder decides to use it in a CopyToReg(),
2331  // machine instruction fails because TargetExternalSymbol
2332  // (not lowered) is target dependent, and CopyToReg assumes
2333  // the source is lowered.
2334  EVT ObjectVT = getValueType(DL, Ty);
2335  assert(ObjectVT == Ins[InsIdx].VT &&
2336  "Ins type did not match function type");
2337  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2338  SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
2339  if (p.getNode())
2340  p.getNode()->setIROrder(idx + 1);
2341  if (isKernel)
2342  InVals.push_back(p);
2343  else {
2344  SDValue p2 = DAG.getNode(
2345  ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
2346  DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
2347  InVals.push_back(p2);
2348  }
2349  }
2350 
2351  // Clang will check explicit VarArg and issue error if any. However, Clang
2352  // will let code with
2353  // implicit var arg like f() pass. See bug 617733.
2354  // We treat this case as if the arg list is empty.
2355  // if (F.isVarArg()) {
2356  // assert(0 && "VarArg not supported yet!");
2357  //}
2358 
2359  if (!OutChains.empty())
2360  DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
2361 
2362  return Chain;
2363 }
2364 
2365 
2366 SDValue
2368  bool isVarArg,
2369  const SmallVectorImpl<ISD::OutputArg> &Outs,
2370  const SmallVectorImpl<SDValue> &OutVals,
2371  SDLoc dl, SelectionDAG &DAG) const {
2372  MachineFunction &MF = DAG.getMachineFunction();
2373  const Function *F = MF.getFunction();
2374  Type *RetTy = F->getReturnType();
2375  const DataLayout &TD = DAG.getDataLayout();
2376 
2377  bool isABI = (STI.getSmVersion() >= 20);
2378  assert(isABI && "Non-ABI compilation is not supported");
2379  if (!isABI)
2380  return Chain;
2381 
2382  if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
2383  // If we have a vector type, the OutVals array will be the scalarized
2384  // components and we have combine them into 1 or more vector stores.
2385  unsigned NumElts = VTy->getNumElements();
2386  assert(NumElts == Outs.size() && "Bad scalarization of return value");
2387 
2388  // const_cast can be removed in later LLVM versions
2389  EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
2390  bool NeedExtend = false;
2391  if (EltVT.getSizeInBits() < 16)
2392  NeedExtend = true;
2393 
2394  // V1 store
2395  if (NumElts == 1) {
2396  SDValue StoreVal = OutVals[0];
2397  // We only have one element, so just directly store it
2398  if (NeedExtend)
2399  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
2400  SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
2402  DAG.getVTList(MVT::Other), Ops,
2403  EltVT, MachinePointerInfo());
2404 
2405  } else if (NumElts == 2) {
2406  // V2 store
2407  SDValue StoreVal0 = OutVals[0];
2408  SDValue StoreVal1 = OutVals[1];
2409 
2410  if (NeedExtend) {
2411  StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
2412  StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
2413  }
2414 
2415  SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
2416  StoreVal1 };
2418  DAG.getVTList(MVT::Other), Ops,
2419  EltVT, MachinePointerInfo());
2420  } else {
2421  // V4 stores
2422  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
2423  // vector will be expanded to a power of 2 elements, so we know we can
2424  // always round up to the next multiple of 4 when creating the vector
2425  // stores.
2426  // e.g. 4 elem => 1 st.v4
2427  // 6 elem => 2 st.v4
2428  // 8 elem => 2 st.v4
2429  // 11 elem => 3 st.v4
2430 
2431  unsigned VecSize = 4;
2432  if (OutVals[0].getValueType().getSizeInBits() == 64)
2433  VecSize = 2;
2434 
2435  unsigned Offset = 0;
2436 
2437  EVT VecVT =
2438  EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2439  unsigned PerStoreOffset =
2440  TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2441 
2442  for (unsigned i = 0; i < NumElts; i += VecSize) {
2443  // Get values
2444  SDValue StoreVal;
2446  Ops.push_back(Chain);
2447  Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
2448  unsigned Opc = NVPTXISD::StoreRetvalV2;
2449  EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
2450 
2451  StoreVal = OutVals[i];
2452  if (NeedExtend)
2453  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2454  Ops.push_back(StoreVal);
2455 
2456  if (i + 1 < NumElts) {
2457  StoreVal = OutVals[i + 1];
2458  if (NeedExtend)
2459  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2460  } else {
2461  StoreVal = DAG.getUNDEF(ExtendedVT);
2462  }
2463  Ops.push_back(StoreVal);
2464 
2465  if (VecSize == 4) {
2467  if (i + 2 < NumElts) {
2468  StoreVal = OutVals[i + 2];
2469  if (NeedExtend)
2470  StoreVal =
2471  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2472  } else {
2473  StoreVal = DAG.getUNDEF(ExtendedVT);
2474  }
2475  Ops.push_back(StoreVal);
2476 
2477  if (i + 3 < NumElts) {
2478  StoreVal = OutVals[i + 3];
2479  if (NeedExtend)
2480  StoreVal =
2481  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2482  } else {
2483  StoreVal = DAG.getUNDEF(ExtendedVT);
2484  }
2485  Ops.push_back(StoreVal);
2486  }
2487 
2488  // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
2489  Chain =
2490  DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
2491  EltVT, MachinePointerInfo());
2492  Offset += PerStoreOffset;
2493  }
2494  }
2495  } else {
2496  SmallVector<EVT, 16> ValVTs;
2498  ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
2499  assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
2500 
2501  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2502  SDValue theVal = OutVals[i];
2503  EVT TheValType = theVal.getValueType();
2504  unsigned numElems = 1;
2505  if (TheValType.isVector())
2506  numElems = TheValType.getVectorNumElements();
2507  for (unsigned j = 0, je = numElems; j != je; ++j) {
2508  SDValue TmpVal = theVal;
2509  if (TheValType.isVector())
2510  TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2511  TheValType.getVectorElementType(), TmpVal,
2512  DAG.getIntPtrConstant(j, dl));
2513  EVT TheStoreType = ValVTs[i];
2514  if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
2515  // The following zero-extension is for integer types only, and
2516  // specifically not for aggregates.
2517  TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
2518  TheStoreType = MVT::i32;
2519  }
2520  else if (TmpVal.getValueType().getSizeInBits() < 16)
2521  TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
2522 
2523  SDValue Ops[] = {
2524  Chain,
2525  DAG.getConstant(Offsets[i], dl, MVT::i32),
2526  TmpVal };
2528  DAG.getVTList(MVT::Other), Ops,
2529  TheStoreType,
2530  MachinePointerInfo());
2531  }
2532  }
2533  }
2534 
2535  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
2536 }
2537 
2538 
2540  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2541  SelectionDAG &DAG) const {
2542  if (Constraint.length() > 1)
2543  return;
2544  else
2545  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2546 }
2547 
2548 // NVPTX suuport vector of legal types of any length in Intrinsics because the
2549 // NVPTX specific type legalizer
2550 // will legalize them to the PTX supported length.
2552  if (isTypeLegal(VT))
2553  return true;
2554  if (VT.isVector()) {
2555  MVT eVT = VT.getVectorElementType();
2556  if (isTypeLegal(eVT))
2557  return true;
2558  }
2559  return false;
2560 }
2561 
2562 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
2563  switch (Intrinsic) {
2564  default:
2565  return 0;
2566 
2567  case Intrinsic::nvvm_tex_1d_v4f32_s32:
2568  return NVPTXISD::Tex1DFloatS32;
2569  case Intrinsic::nvvm_tex_1d_v4f32_f32:
2571  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
2573  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
2575  case Intrinsic::nvvm_tex_1d_v4s32_s32:
2576  return NVPTXISD::Tex1DS32S32;
2577  case Intrinsic::nvvm_tex_1d_v4s32_f32:
2578  return NVPTXISD::Tex1DS32Float;
2579  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
2581  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
2583  case Intrinsic::nvvm_tex_1d_v4u32_s32:
2584  return NVPTXISD::Tex1DU32S32;
2585  case Intrinsic::nvvm_tex_1d_v4u32_f32:
2586  return NVPTXISD::Tex1DU32Float;
2587  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
2589  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
2591 
2592  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
2594  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
2596  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
2598  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
2600  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
2602  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
2604  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
2606  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
2608  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
2610  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
2612  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
2614  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
2616 
2617  case Intrinsic::nvvm_tex_2d_v4f32_s32:
2618  return NVPTXISD::Tex2DFloatS32;
2619  case Intrinsic::nvvm_tex_2d_v4f32_f32:
2621  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
2623  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
2625  case Intrinsic::nvvm_tex_2d_v4s32_s32:
2626  return NVPTXISD::Tex2DS32S32;
2627  case Intrinsic::nvvm_tex_2d_v4s32_f32:
2628  return NVPTXISD::Tex2DS32Float;
2629  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
2631  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
2633  case Intrinsic::nvvm_tex_2d_v4u32_s32:
2634  return NVPTXISD::Tex2DU32S32;
2635  case Intrinsic::nvvm_tex_2d_v4u32_f32:
2636  return NVPTXISD::Tex2DU32Float;
2637  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
2639  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
2641 
2642  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
2644  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
2646  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
2648  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
2650  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
2652  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
2654  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
2656  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
2658  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
2660  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
2662  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
2664  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
2666 
2667  case Intrinsic::nvvm_tex_3d_v4f32_s32:
2668  return NVPTXISD::Tex3DFloatS32;
2669  case Intrinsic::nvvm_tex_3d_v4f32_f32:
2671  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
2673  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
2675  case Intrinsic::nvvm_tex_3d_v4s32_s32:
2676  return NVPTXISD::Tex3DS32S32;
2677  case Intrinsic::nvvm_tex_3d_v4s32_f32:
2678  return NVPTXISD::Tex3DS32Float;
2679  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
2681  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
2683  case Intrinsic::nvvm_tex_3d_v4u32_s32:
2684  return NVPTXISD::Tex3DU32S32;
2685  case Intrinsic::nvvm_tex_3d_v4u32_f32:
2686  return NVPTXISD::Tex3DU32Float;
2687  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
2689  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
2691 
2692  case Intrinsic::nvvm_tex_cube_v4f32_f32:
2694  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
2696  case Intrinsic::nvvm_tex_cube_v4s32_f32:
2698  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
2700  case Intrinsic::nvvm_tex_cube_v4u32_f32:
2702  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
2704 
2705  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
2707  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
2709  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
2711  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
2713  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
2715  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
2717 
2718  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
2720  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
2722  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
2724  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
2726  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
2728  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
2730  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
2732  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
2734  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
2736  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
2738  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
2740  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
2742 
2743  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
2745  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
2747  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
2749  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
2751  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
2753  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
2755  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
2757  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
2759  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
2761  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
2763  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
2765  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
2767 
2768  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
2770  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
2772  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
2774  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
2776  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
2778  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
2780  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
2782  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
2784  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
2786  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
2788  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
2790  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
2792 
2793  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
2795  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
2797  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
2799  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
2801  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
2803  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
2805  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
2807  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
2809  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
2811  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
2813  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
2815  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
2817 
2818  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
2820  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
2822  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
2824  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
2826  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
2828  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
2830  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
2832  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
2834  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
2836  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
2838  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
2840  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
2842 
2843  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
2845  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
2847  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
2849  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
2851  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
2853  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
2855  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
2857  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
2859  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
2861  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
2863  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
2865  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
2867 
2868  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
2870  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
2872  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
2874  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
2876  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
2878  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
2880 
2881  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
2883  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
2885  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
2887  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
2889  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
2891  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
2893 
2894  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
2896  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
2898  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
2900  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
2902  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
2904  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
2906  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
2908  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
2910  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
2912  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
2914  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
2916  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
2918  }
2919 }
2920 
2921 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
2922  switch (Intrinsic) {
2923  default:
2924  return 0;
2925  case Intrinsic::nvvm_suld_1d_i8_clamp:
2926  return NVPTXISD::Suld1DI8Clamp;
2927  case Intrinsic::nvvm_suld_1d_i16_clamp:
2928  return NVPTXISD::Suld1DI16Clamp;
2929  case Intrinsic::nvvm_suld_1d_i32_clamp:
2930  return NVPTXISD::Suld1DI32Clamp;
2931  case Intrinsic::nvvm_suld_1d_i64_clamp:
2932  return NVPTXISD::Suld1DI64Clamp;
2933  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
2935  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
2937  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
2939  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
2941  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
2943  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
2945  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
2947  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
2949  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
2951  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
2953  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
2955  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
2957  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
2959  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
2961  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
2963  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
2965  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
2967  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
2969  case Intrinsic::nvvm_suld_2d_i8_clamp:
2970  return NVPTXISD::Suld2DI8Clamp;
2971  case Intrinsic::nvvm_suld_2d_i16_clamp:
2972  return NVPTXISD::Suld2DI16Clamp;
2973  case Intrinsic::nvvm_suld_2d_i32_clamp:
2974  return NVPTXISD::Suld2DI32Clamp;
2975  case Intrinsic::nvvm_suld_2d_i64_clamp:
2976  return NVPTXISD::Suld2DI64Clamp;
2977  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
2979  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
2981  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
2983  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
2985  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
2987  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
2989  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
2991  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
2993  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
2995  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
2997  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
2999  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3001  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3003  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3005  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3007  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3009  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3011  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3013  case Intrinsic::nvvm_suld_3d_i8_clamp:
3014  return NVPTXISD::Suld3DI8Clamp;
3015  case Intrinsic::nvvm_suld_3d_i16_clamp:
3016  return NVPTXISD::Suld3DI16Clamp;
3017  case Intrinsic::nvvm_suld_3d_i32_clamp:
3018  return NVPTXISD::Suld3DI32Clamp;
3019  case Intrinsic::nvvm_suld_3d_i64_clamp:
3020  return NVPTXISD::Suld3DI64Clamp;
3021  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3023  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3025  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3027  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3029  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3031  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3033  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3035  case Intrinsic::nvvm_suld_1d_i8_trap:
3036  return NVPTXISD::Suld1DI8Trap;
3037  case Intrinsic::nvvm_suld_1d_i16_trap:
3038  return NVPTXISD::Suld1DI16Trap;
3039  case Intrinsic::nvvm_suld_1d_i32_trap:
3040  return NVPTXISD::Suld1DI32Trap;
3041  case Intrinsic::nvvm_suld_1d_i64_trap:
3042  return NVPTXISD::Suld1DI64Trap;
3043  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3044  return NVPTXISD::Suld1DV2I8Trap;
3045  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3047  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3049  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3051  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3052  return NVPTXISD::Suld1DV4I8Trap;
3053  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3055  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3057  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3059  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3061  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3063  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3065  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3067  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3069  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3071  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3073  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3075  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3077  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3079  case Intrinsic::nvvm_suld_2d_i8_trap:
3080  return NVPTXISD::Suld2DI8Trap;
3081  case Intrinsic::nvvm_suld_2d_i16_trap:
3082  return NVPTXISD::Suld2DI16Trap;
3083  case Intrinsic::nvvm_suld_2d_i32_trap:
3084  return NVPTXISD::Suld2DI32Trap;
3085  case Intrinsic::nvvm_suld_2d_i64_trap:
3086  return NVPTXISD::Suld2DI64Trap;
3087  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3088  return NVPTXISD::Suld2DV2I8Trap;
3089  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3091  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3093  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3095  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3096  return NVPTXISD::Suld2DV4I8Trap;
3097  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3099  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3101  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3103  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3105  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3107  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3109  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3111  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3113  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3115  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3117  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3119  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3121  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3123  case Intrinsic::nvvm_suld_3d_i8_trap:
3124  return NVPTXISD::Suld3DI8Trap;
3125  case Intrinsic::nvvm_suld_3d_i16_trap:
3126  return NVPTXISD::Suld3DI16Trap;
3127  case Intrinsic::nvvm_suld_3d_i32_trap:
3128  return NVPTXISD::Suld3DI32Trap;
3129  case Intrinsic::nvvm_suld_3d_i64_trap:
3130  return NVPTXISD::Suld3DI64Trap;
3131  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3132  return NVPTXISD::Suld3DV2I8Trap;
3133  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3135  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3137  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3139  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3140  return NVPTXISD::Suld3DV4I8Trap;
3141  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3143  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3145  case Intrinsic::nvvm_suld_1d_i8_zero:
3146  return NVPTXISD::Suld1DI8Zero;
3147  case Intrinsic::nvvm_suld_1d_i16_zero:
3148  return NVPTXISD::Suld1DI16Zero;
3149  case Intrinsic::nvvm_suld_1d_i32_zero:
3150  return NVPTXISD::Suld1DI32Zero;
3151  case Intrinsic::nvvm_suld_1d_i64_zero:
3152  return NVPTXISD::Suld1DI64Zero;
3153  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3154  return NVPTXISD::Suld1DV2I8Zero;
3155  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3157  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3159  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3161  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3162  return NVPTXISD::Suld1DV4I8Zero;
3163  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3165  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3167  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3169  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3171  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3173  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3175  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3177  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3179  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3181  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3183  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3185  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3187  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3189  case Intrinsic::nvvm_suld_2d_i8_zero:
3190  return NVPTXISD::Suld2DI8Zero;
3191  case Intrinsic::nvvm_suld_2d_i16_zero:
3192  return NVPTXISD::Suld2DI16Zero;
3193  case Intrinsic::nvvm_suld_2d_i32_zero:
3194  return NVPTXISD::Suld2DI32Zero;
3195  case Intrinsic::nvvm_suld_2d_i64_zero:
3196  return NVPTXISD::Suld2DI64Zero;
3197  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3198  return NVPTXISD::Suld2DV2I8Zero;
3199  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3201  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3203  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3205  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3206  return NVPTXISD::Suld2DV4I8Zero;
3207  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3209  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3211  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3213  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3215  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3217  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3219  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3221  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3223  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3225  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3227  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3229  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3231  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3233  case Intrinsic::nvvm_suld_3d_i8_zero:
3234  return NVPTXISD::Suld3DI8Zero;
3235  case Intrinsic::nvvm_suld_3d_i16_zero:
3236  return NVPTXISD::Suld3DI16Zero;
3237  case Intrinsic::nvvm_suld_3d_i32_zero:
3238  return NVPTXISD::Suld3DI32Zero;
3239  case Intrinsic::nvvm_suld_3d_i64_zero:
3240  return NVPTXISD::Suld3DI64Zero;
3241  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3242  return NVPTXISD::Suld3DV2I8Zero;
3243  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3245  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3247  case Intrinsic::nvvm_suld_3d_v2i64_zero:
3249  case Intrinsic::nvvm_suld_3d_v4i8_zero:
3250  return NVPTXISD::Suld3DV4I8Zero;
3251  case Intrinsic::nvvm_suld_3d_v4i16_zero:
3253  case Intrinsic::nvvm_suld_3d_v4i32_zero:
3255  }
3256 }
3257 
3258 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
3259 // TgtMemIntrinsic
3260 // because we need the information that is only available in the "Value" type
3261 // of destination
3262 // pointer. In particular, the address space information.
3264  IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
3265  switch (Intrinsic) {
3266  default:
3267  return false;
3268 
3269  case Intrinsic::nvvm_atomic_load_add_f32:
3270  Info.opc = ISD::INTRINSIC_W_CHAIN;
3271  Info.memVT = MVT::f32;
3272  Info.ptrVal = I.getArgOperand(0);
3273  Info.offset = 0;
3274  Info.vol = 0;
3275  Info.readMem = true;
3276  Info.writeMem = true;
3277  Info.align = 0;
3278  return true;
3279 
3280  case Intrinsic::nvvm_atomic_load_inc_32:
3281  case Intrinsic::nvvm_atomic_load_dec_32:
3282  Info.opc = ISD::INTRINSIC_W_CHAIN;
3283  Info.memVT = MVT::i32;
3284  Info.ptrVal = I.getArgOperand(0);
3285  Info.offset = 0;
3286  Info.vol = 0;
3287  Info.readMem = true;
3288  Info.writeMem = true;
3289  Info.align = 0;
3290  return true;
3291 
3292  case Intrinsic::nvvm_ldu_global_i:
3293  case Intrinsic::nvvm_ldu_global_f:
3294  case Intrinsic::nvvm_ldu_global_p: {
3295  auto &DL = I.getModule()->getDataLayout();
3296  Info.opc = ISD::INTRINSIC_W_CHAIN;
3297  if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3298  Info.memVT = getValueType(DL, I.getType());
3299  else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3300  Info.memVT = getPointerTy(DL);
3301  else
3302  Info.memVT = getValueType(DL, I.getType());
3303  Info.ptrVal = I.getArgOperand(0);
3304  Info.offset = 0;
3305  Info.vol = 0;
3306  Info.readMem = true;
3307  Info.writeMem = false;
3308  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3309 
3310  return true;
3311  }
3312  case Intrinsic::nvvm_ldg_global_i:
3313  case Intrinsic::nvvm_ldg_global_f:
3314  case Intrinsic::nvvm_ldg_global_p: {
3315  auto &DL = I.getModule()->getDataLayout();
3316 
3317  Info.opc = ISD::INTRINSIC_W_CHAIN;
3318  if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
3319  Info.memVT = getValueType(DL, I.getType());
3320  else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
3321  Info.memVT = getPointerTy(DL);
3322  else
3323  Info.memVT = getValueType(DL, I.getType());
3324  Info.ptrVal = I.getArgOperand(0);
3325  Info.offset = 0;
3326  Info.vol = 0;
3327  Info.readMem = true;
3328  Info.writeMem = false;
3329  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3330 
3331  return true;
3332  }
3333 
3334  case Intrinsic::nvvm_tex_1d_v4f32_s32:
3335  case Intrinsic::nvvm_tex_1d_v4f32_f32:
3336  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3337  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3338  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3339  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3340  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3341  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3342  case Intrinsic::nvvm_tex_2d_v4f32_s32:
3343  case Intrinsic::nvvm_tex_2d_v4f32_f32:
3344  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3345  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3346  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3347  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3348  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3349  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3350  case Intrinsic::nvvm_tex_3d_v4f32_s32:
3351  case Intrinsic::nvvm_tex_3d_v4f32_f32:
3352  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3353  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3354  case Intrinsic::nvvm_tex_cube_v4f32_f32:
3355  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3356  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3357  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3358  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3359  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3360  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3361  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3362  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3363  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3364  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3365  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3366  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3367  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3368  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3369  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3370  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3371  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3372  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3373  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3374  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3375  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3376  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3377  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3378  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3379  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3380  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3381  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3382  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3383  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3384  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3385  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3386  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3387  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3388  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3389  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
3390  Info.opc = getOpcForTextureInstr(Intrinsic);
3391  Info.memVT = MVT::v4f32;
3392  Info.ptrVal = nullptr;
3393  Info.offset = 0;
3394  Info.vol = 0;
3395  Info.readMem = true;
3396  Info.writeMem = false;
3397  Info.align = 16;
3398  return true;
3399  }
3400  case Intrinsic::nvvm_tex_1d_v4s32_s32:
3401  case Intrinsic::nvvm_tex_1d_v4s32_f32:
3402  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3403  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3404  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3405  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3406  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3407  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3408  case Intrinsic::nvvm_tex_2d_v4s32_s32:
3409  case Intrinsic::nvvm_tex_2d_v4s32_f32:
3410  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3411  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3412  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3413  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3414  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3415  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3416  case Intrinsic::nvvm_tex_3d_v4s32_s32:
3417  case Intrinsic::nvvm_tex_3d_v4s32_f32:
3418  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3419  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3420  case Intrinsic::nvvm_tex_cube_v4s32_f32:
3421  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3422  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3423  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3424  case Intrinsic::nvvm_tex_cube_v4u32_f32:
3425  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3426  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3427  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3428  case Intrinsic::nvvm_tex_1d_v4u32_s32:
3429  case Intrinsic::nvvm_tex_1d_v4u32_f32:
3430  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3431  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3432  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3433  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3434  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3435  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3436  case Intrinsic::nvvm_tex_2d_v4u32_s32:
3437  case Intrinsic::nvvm_tex_2d_v4u32_f32:
3438  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3439  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3440  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3441  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3442  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3443  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3444  case Intrinsic::nvvm_tex_3d_v4u32_s32:
3445  case Intrinsic::nvvm_tex_3d_v4u32_f32:
3446  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3447  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3448  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3449  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3450  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3451  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3452  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3453  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3454  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3455  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3456  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3457  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3458  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3459  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3460  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3461  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3462  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3463  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3464  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3465  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3466  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3467  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3468  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3469  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3470  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3471  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3472  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3473  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3474  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3475  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3476  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3477  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3478  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3479  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3480  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3481  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3482  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3483  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3484  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3485  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3486  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3487  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3488  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3489  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3490  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3491  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3492  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3493  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3494  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3495  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3496  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3497  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3498  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3499  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3500  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3501  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3502  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3503  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3504  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3505  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3506  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3507  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3508  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3509  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3510  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3511  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
3512  Info.opc = getOpcForTextureInstr(Intrinsic);
3513  Info.memVT = MVT::v4i32;
3514  Info.ptrVal = nullptr;
3515  Info.offset = 0;
3516  Info.vol = 0;
3517  Info.readMem = true;
3518  Info.writeMem = false;
3519  Info.align = 16;
3520  return true;
3521  }
3522  case Intrinsic::nvvm_suld_1d_i8_clamp:
3523  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3524  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3525  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3526  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3527  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3528  case Intrinsic::nvvm_suld_2d_i8_clamp:
3529  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3530  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3531  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3532  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3533  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3534  case Intrinsic::nvvm_suld_3d_i8_clamp:
3535  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3536  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3537  case Intrinsic::nvvm_suld_1d_i8_trap:
3538  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3539  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3540  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3541  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3542  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3543  case Intrinsic::nvvm_suld_2d_i8_trap:
3544  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3545  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3546  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3547  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3548  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3549  case Intrinsic::nvvm_suld_3d_i8_trap:
3550  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3551  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3552  case Intrinsic::nvvm_suld_1d_i8_zero:
3553  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3554  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3555  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3556  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3557  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3558  case Intrinsic::nvvm_suld_2d_i8_zero:
3559  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3560  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3561  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3562  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3563  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3564  case Intrinsic::nvvm_suld_3d_i8_zero:
3565  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3566  case Intrinsic::nvvm_suld_3d_v4i8_zero: {
3567  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3568  Info.memVT = MVT::i8;
3569  Info.ptrVal = nullptr;
3570  Info.offset = 0;
3571  Info.vol = 0;
3572  Info.readMem = true;
3573  Info.writeMem = false;
3574  Info.align = 16;
3575  return true;
3576  }
3577  case Intrinsic::nvvm_suld_1d_i16_clamp:
3578  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3579  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3580  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3581  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3582  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3583  case Intrinsic::nvvm_suld_2d_i16_clamp:
3584  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3585  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3586  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3587  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3588  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3589  case Intrinsic::nvvm_suld_3d_i16_clamp:
3590  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3591  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3592  case Intrinsic::nvvm_suld_1d_i16_trap:
3593  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3594  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3595  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3596  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3597  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3598  case Intrinsic::nvvm_suld_2d_i16_trap:
3599  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3600  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3601  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3602  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3603  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3604  case Intrinsic::nvvm_suld_3d_i16_trap:
3605  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3606  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3607  case Intrinsic::nvvm_suld_1d_i16_zero:
3608  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3609  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3610  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3611  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3612  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3613  case Intrinsic::nvvm_suld_2d_i16_zero:
3614  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3615  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3616  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3617  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3618  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3619  case Intrinsic::nvvm_suld_3d_i16_zero:
3620  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3621  case Intrinsic::nvvm_suld_3d_v4i16_zero: {
3622  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3623  Info.memVT = MVT::i16;
3624  Info.ptrVal = nullptr;
3625  Info.offset = 0;
3626  Info.vol = 0;
3627  Info.readMem = true;
3628  Info.writeMem = false;
3629  Info.align = 16;
3630  return true;
3631  }
3632  case Intrinsic::nvvm_suld_1d_i32_clamp:
3633  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3634  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3635  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3636  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3637  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3638  case Intrinsic::nvvm_suld_2d_i32_clamp:
3639  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3640  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3641  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3642  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3643  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3644  case Intrinsic::nvvm_suld_3d_i32_clamp:
3645  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3646  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3647  case Intrinsic::nvvm_suld_1d_i32_trap:
3648  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3649  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3650  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3651  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3652  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3653  case Intrinsic::nvvm_suld_2d_i32_trap:
3654  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3655  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3656  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3657  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3658  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3659  case Intrinsic::nvvm_suld_3d_i32_trap:
3660  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3661  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3662  case Intrinsic::nvvm_suld_1d_i32_zero:
3663  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3664  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3665  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3666  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3667  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3668  case Intrinsic::nvvm_suld_2d_i32_zero:
3669  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3670  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3671  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3672  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3673  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3674  case Intrinsic::nvvm_suld_3d_i32_zero:
3675  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3676  case Intrinsic::nvvm_suld_3d_v4i32_zero: {
3677  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3678  Info.memVT = MVT::i32;
3679  Info.ptrVal = nullptr;
3680  Info.offset = 0;
3681  Info.vol = 0;
3682  Info.readMem = true;
3683  Info.writeMem = false;
3684  Info.align = 16;
3685  return true;
3686  }
3687  case Intrinsic::nvvm_suld_1d_i64_clamp:
3688  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3689  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3690  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3691  case Intrinsic::nvvm_suld_2d_i64_clamp:
3692  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3693  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3694  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3695  case Intrinsic::nvvm_suld_3d_i64_clamp:
3696  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3697  case Intrinsic::nvvm_suld_1d_i64_trap:
3698  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3699  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3700  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3701  case Intrinsic::nvvm_suld_2d_i64_trap:
3702  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3703  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3704  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3705  case Intrinsic::nvvm_suld_3d_i64_trap:
3706  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3707  case Intrinsic::nvvm_suld_1d_i64_zero:
3708  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3709  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3710  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3711  case Intrinsic::nvvm_suld_2d_i64_zero:
3712  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3713  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3714  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3715  case Intrinsic::nvvm_suld_3d_i64_zero:
3716  case Intrinsic::nvvm_suld_3d_v2i64_zero: {
3717  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3718  Info.memVT = MVT::i64;
3719  Info.ptrVal = nullptr;
3720  Info.offset = 0;
3721  Info.vol = 0;
3722  Info.readMem = true;
3723  Info.writeMem = false;
3724  Info.align = 16;
3725  return true;
3726  }
3727  }
3728  return false;
3729 }
3730 
3731 /// isLegalAddressingMode - Return true if the addressing mode represented
3732 /// by AM is legal for this target, for a load/store of the specified type.
3733 /// Used to guide target specific optimizations, like loop strength reduction
3734 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
3735 /// (CodeGenPrepare.cpp)
3737  const AddrMode &AM, Type *Ty,
3738  unsigned AS) const {
3739 
3740  // AddrMode - This represents an addressing mode of:
3741  // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
3742  //
3743  // The legal address modes are
3744  // - [avar]
3745  // - [areg]
3746  // - [areg+immoff]
3747  // - [immAddr]
3748 
3749  if (AM.BaseGV) {
3750  if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
3751  return false;
3752  return true;
3753  }
3754 
3755  switch (AM.Scale) {
3756  case 0: // "r", "r+i" or "i" is allowed
3757  break;
3758  case 1:
3759  if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
3760  return false;
3761  // Otherwise we have r+i.
3762  break;
3763  default:
3764  // No scale > 1 is allowed
3765  return false;
3766  }
3767  return true;
3768 }
3769 
3770 //===----------------------------------------------------------------------===//
3771 // NVPTX Inline Assembly Support
3772 //===----------------------------------------------------------------------===//
3773 
3774 /// getConstraintType - Given a constraint letter, return the type of
3775 /// constraint it is for this target.
3778  if (Constraint.size() == 1) {
3779  switch (Constraint[0]) {
3780  default:
3781  break;
3782  case 'b':
3783  case 'r':
3784  case 'h':
3785  case 'c':
3786  case 'l':
3787  case 'f':
3788  case 'd':
3789  case '0':
3790  case 'N':
3791  return C_RegisterClass;
3792  }
3793  }
3794  return TargetLowering::getConstraintType(Constraint);
3795 }
3796 
3797 std::pair<unsigned, const TargetRegisterClass *>
3799  StringRef Constraint,
3800  MVT VT) const {
3801  if (Constraint.size() == 1) {
3802  switch (Constraint[0]) {
3803  case 'b':
3804  return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
3805  case 'c':
3806  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3807  case 'h':
3808  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3809  case 'r':
3810  return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
3811  case 'l':
3812  case 'N':
3813  return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
3814  case 'f':
3815  return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
3816  case 'd':
3817  return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
3818  }
3819  }
3820  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3821 }
3822 
3823 /// getFunctionAlignment - Return the Log2 alignment of this function.
3825  return 4;
3826 }
3827 
3828 //===----------------------------------------------------------------------===//
3829 // NVPTX DAG Combining
3830 //===----------------------------------------------------------------------===//
3831 
3833  CodeGenOpt::Level OptLevel) const {
3834  const Function *F = MF.getFunction();
3835  const TargetOptions &TO = MF.getTarget().Options;
3836 
3837  // Always honor command-line argument
3838  if (FMAContractLevelOpt.getNumOccurrences() > 0) {
3839  return FMAContractLevelOpt > 0;
3840  } else if (OptLevel == 0) {
3841  // Do not contract if we're not optimizing the code
3842  return false;
3843  } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
3844  // Honor TargetOptions flags that explicitly say fusion is okay
3845  return true;
3846  } else if (F->hasFnAttribute("unsafe-fp-math")) {
3847  // Check for unsafe-fp-math=true coming from Clang
3848  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
3849  StringRef Val = Attr.getValueAsString();
3850  if (Val == "true")
3851  return true;
3852  }
3853 
3854  // We did not have a clear indication that fusion is allowed, so assume not
3855  return false;
3856 }
3857 
3858 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
3859 /// operands N0 and N1. This is a helper for PerformADDCombine that is
3860 /// called with the default operands, and if that fails, with commuted
3861 /// operands.
3864  const NVPTXSubtarget &Subtarget,
3865  CodeGenOpt::Level OptLevel) {
3866  SelectionDAG &DAG = DCI.DAG;
3867  // Skip non-integer, non-scalar case
3868  EVT VT=N0.getValueType();
3869  if (VT.isVector())
3870  return SDValue();
3871 
3872  // fold (add (mul a, b), c) -> (mad a, b, c)
3873  //
3874  if (N0.getOpcode() == ISD::MUL) {
3875  assert (VT.isInteger());
3876  // For integer:
3877  // Since integer multiply-add costs the same as integer multiply
3878  // but is more costly than integer add, do the fusion only when
3879  // the mul is only used in the add.
3880  if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
3881  !N0.getNode()->hasOneUse())
3882  return SDValue();
3883 
3884  // Do the folding
3885  return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
3886  N0.getOperand(0), N0.getOperand(1), N1);
3887  }
3888  else if (N0.getOpcode() == ISD::FMUL) {
3889  if (VT == MVT::f32 || VT == MVT::f64) {
3890  const auto *TLI = static_cast<const NVPTXTargetLowering *>(
3891  &DAG.getTargetLoweringInfo());
3892  if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
3893  return SDValue();
3894 
3895  // For floating point:
3896  // Do the fusion only when the mul has less than 5 uses and all
3897  // are add.
3898  // The heuristic is that if a use is not an add, then that use
3899  // cannot be fused into fma, therefore mul is still needed anyway.
3900  // If there are more than 4 uses, even if they are all add, fusing
3901  // them will increase register pressue.
3902  //
3903  int numUses = 0;
3904  int nonAddCount = 0;
3905  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
3906  UE = N0.getNode()->use_end();
3907  UI != UE; ++UI) {
3908  numUses++;
3909  SDNode *User = *UI;
3910  if (User->getOpcode() != ISD::FADD)
3911  ++nonAddCount;
3912  }
3913  if (numUses >= 5)
3914  return SDValue();
3915  if (nonAddCount) {
3916  int orderNo = N->getIROrder();
3917  int orderNo2 = N0.getNode()->getIROrder();
3918  // simple heuristics here for considering potential register
3919  // pressure, the logics here is that the differnce are used
3920  // to measure the distance between def and use, the longer distance
3921  // more likely cause register pressure.
3922  if (orderNo - orderNo2 < 500)
3923  return SDValue();
3924 
3925  // Now, check if at least one of the FMUL's operands is live beyond the node N,
3926  // which guarantees that the FMA will not increase register pressure at node N.
3927  bool opIsLive = false;
3928  const SDNode *left = N0.getOperand(0).getNode();
3929  const SDNode *right = N0.getOperand(1).getNode();
3930 
3931  if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
3932  opIsLive = true;
3933 
3934  if (!opIsLive)
3935  for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
3936  SDNode *User = *UI;
3937  int orderNo3 = User->getIROrder();
3938  if (orderNo3 > orderNo) {
3939  opIsLive = true;
3940  break;
3941  }
3942  }
3943 
3944  if (!opIsLive)
3945  for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
3946  SDNode *User = *UI;
3947  int orderNo3 = User->getIROrder();
3948  if (orderNo3 > orderNo) {
3949  opIsLive = true;
3950  break;
3951  }
3952  }
3953 
3954  if (!opIsLive)
3955  return SDValue();
3956  }
3957 
3958  return DAG.getNode(ISD::FMA, SDLoc(N), VT,
3959  N0.getOperand(0), N0.getOperand(1), N1);
3960  }
3961  }
3962 
3963  return SDValue();
3964 }
3965 
3966 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3967 ///
3970  const NVPTXSubtarget &Subtarget,
3971  CodeGenOpt::Level OptLevel) {
3972  SDValue N0 = N->getOperand(0);
3973  SDValue N1 = N->getOperand(1);
3974 
3975  // First try with the default operand order.
3976  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
3977  OptLevel);
3978  if (Result.getNode())
3979  return Result;
3980 
3981  // If that didn't work, try again with the operands commuted.
3982  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
3983 }
3984 
3987  // The type legalizer turns a vector load of i8 values into a zextload to i16
3988  // registers, optionally ANY_EXTENDs it (if target type is integer),
3989  // and ANDs off the high 8 bits. Since we turn this load into a
3990  // target-specific DAG node, the DAG combiner fails to eliminate these AND
3991  // nodes. Do that here.
3992  SDValue Val = N->getOperand(0);
3993  SDValue Mask = N->getOperand(1);
3994 
3995  if (isa<ConstantSDNode>(Val)) {
3996  std::swap(Val, Mask);
3997  }
3998 
3999  SDValue AExt;
4000  // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
4001  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4002  AExt = Val;
4003  Val = Val->getOperand(0);
4004  }
4005 
4006  if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
4007  Val = Val->getOperand(0);
4008  }
4009 
4010  if (Val->getOpcode() == NVPTXISD::LoadV2 ||
4011  Val->getOpcode() == NVPTXISD::LoadV4) {
4012  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
4013  if (!MaskCnst) {
4014  // Not an AND with a constant
4015  return SDValue();
4016  }
4017 
4018  uint64_t MaskVal = MaskCnst->getZExtValue();
4019  if (MaskVal != 0xff) {
4020  // Not an AND that chops off top 8 bits
4021  return SDValue();
4022  }
4023 
4024  MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
4025  if (!Mem) {
4026  // Not a MemSDNode?!?
4027  return SDValue();
4028  }
4029 
4030  EVT MemVT = Mem->getMemoryVT();
4031  if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
4032  // We only handle the i8 case
4033  return SDValue();
4034  }
4035 
4036  unsigned ExtType =
4037  cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
4038  getZExtValue();
4039  if (ExtType == ISD::SEXTLOAD) {
4040  // If for some reason the load is a sextload, the and is needed to zero
4041  // out the high 8 bits
4042  return SDValue();
4043  }
4044 
4045  bool AddTo = false;
4046  if (AExt.getNode() != 0) {
4047  // Re-insert the ext as a zext.
4048  Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4049  AExt.getValueType(), Val);
4050  AddTo = true;
4051  }
4052 
4053  // If we get here, the AND is unnecessary. Just replace it with the load
4054  DCI.CombineTo(N, Val, AddTo);
4055  }
4056 
4057  return SDValue();
4058 }
4059 
4061  Signed = 0,
4064 };
4065 
4066 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
4067 /// that can be demoted to \p OptSize bits without loss of information. The
4068 /// signedness of the operand, if determinable, is placed in \p S.
4070  unsigned OptSize,
4071  OperandSignedness &S) {
4072  S = Unknown;
4073 
4074  if (Op.getOpcode() == ISD::SIGN_EXTEND ||
4076  EVT OrigVT = Op.getOperand(0).getValueType();
4077  if (OrigVT.getSizeInBits() <= OptSize) {
4078  S = Signed;
4079  return true;
4080  }
4081  } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
4082  EVT OrigVT = Op.getOperand(0).getValueType();
4083  if (OrigVT.getSizeInBits() <= OptSize) {
4084  S = Unsigned;
4085  return true;
4086  }
4087  }
4088 
4089  return false;
4090 }
4091 
4092 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
4093 /// be demoted to \p OptSize bits without loss of information. If the operands
4094 /// contain a constant, it should appear as the RHS operand. The signedness of
4095 /// the operands is placed in \p IsSigned.
4097  unsigned OptSize,
4098  bool &IsSigned) {
4099 
4100  OperandSignedness LHSSign;
4101 
4102  // The LHS operand must be a demotable op
4103  if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
4104  return false;
4105 
4106  // We should have been able to determine the signedness from the LHS
4107  if (LHSSign == Unknown)
4108  return false;
4109 
4110  IsSigned = (LHSSign == Signed);
4111 
4112  // The RHS can be a demotable op or a constant
4113  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
4114  APInt Val = CI->getAPIntValue();
4115  if (LHSSign == Unsigned) {
4116  if (Val.isIntN(OptSize)) {
4117  return true;
4118  }
4119  return false;
4120  } else {
4121  if (Val.isSignedIntN(OptSize)) {
4122  return true;
4123  }
4124  return false;
4125  }
4126  } else {
4127  OperandSignedness RHSSign;
4128  if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
4129  return false;
4130 
4131  if (LHSSign != RHSSign)
4132  return false;
4133 
4134  return true;
4135  }
4136 }
4137 
4138 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
4139 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
4140 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
4141 /// amount.
4144  EVT MulType = N->getValueType(0);
4145  if (MulType != MVT::i32 && MulType != MVT::i64) {
4146  return SDValue();
4147  }
4148 
4149  SDLoc DL(N);
4150  unsigned OptSize = MulType.getSizeInBits() >> 1;
4151  SDValue LHS = N->getOperand(0);
4152  SDValue RHS = N->getOperand(1);
4153 
4154  // Canonicalize the multiply so the constant (if any) is on the right
4155  if (N->getOpcode() == ISD::MUL) {
4156  if (isa<ConstantSDNode>(LHS)) {
4157  std::swap(LHS, RHS);
4158  }
4159  }
4160 
4161  // If we have a SHL, determine the actual multiply amount
4162  if (N->getOpcode() == ISD::SHL) {
4163  ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
4164  if (!ShlRHS) {
4165  return SDValue();
4166  }
4167 
4168  APInt ShiftAmt = ShlRHS->getAPIntValue();
4169  unsigned BitWidth = MulType.getSizeInBits();
4170  if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
4171  APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
4172  RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
4173  } else {
4174  return SDValue();
4175  }
4176  }
4177 
4178  bool Signed;
4179  // Verify that our operands are demotable
4180  if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
4181  return SDValue();
4182  }
4183 
4184  EVT DemotedVT;
4185  if (MulType == MVT::i32) {
4186  DemotedVT = MVT::i16;
4187  } else {
4188  DemotedVT = MVT::i32;
4189  }
4190 
4191  // Truncate the operands to the correct size. Note that these are just for
4192  // type consistency and will (likely) be eliminated in later phases.
4193  SDValue TruncLHS =
4194  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
4195  SDValue TruncRHS =
4196  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
4197 
4198  unsigned Opc;
4199  if (Signed) {
4201  } else {
4203  }
4204 
4205  return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
4206 }
4207 
4208 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
4211  CodeGenOpt::Level OptLevel) {
4212  if (OptLevel > 0) {
4213  // Try mul.wide combining at OptLevel > 0
4214  SDValue Ret = TryMULWIDECombine(N, DCI);
4215  if (Ret.getNode())
4216  return Ret;
4217  }
4218 
4219  return SDValue();
4220 }
4221 
4222 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
4225  CodeGenOpt::Level OptLevel) {
4226  if (OptLevel > 0) {
4227  // Try mul.wide combining at OptLevel > 0
4228  SDValue Ret = TryMULWIDECombine(N, DCI);
4229  if (Ret.getNode())
4230  return Ret;
4231  }
4232 
4233  return SDValue();
4234 }
4235 
4236 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
4237  DAGCombinerInfo &DCI) const {
4239  switch (N->getOpcode()) {
4240  default: break;
4241  case ISD::ADD:
4242  case ISD::FADD:
4243  return PerformADDCombine(N, DCI, STI, OptLevel);
4244  case ISD::MUL:
4245  return PerformMULCombine(N, DCI, OptLevel);
4246  case ISD::SHL:
4247  return PerformSHLCombine(N, DCI, OptLevel);
4248  case ISD::AND:
4249  return PerformANDCombine(N, DCI);
4250  }
4251  return SDValue();
4252 }
4253 
4254 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
4256  SmallVectorImpl<SDValue> &Results) {
4257  EVT ResVT = N->getValueType(0);
4258  SDLoc DL(N);
4259 
4260  assert(ResVT.isVector() && "Vector load must have vector type");
4261 
4262  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
4263  // legal. We can (and should) split that into 2 loads of <2 x double> here
4264  // but I'm leaving that as a TODO for now.
4265  assert(ResVT.isSimple() && "Can only handle simple types");
4266  switch (ResVT.getSimpleVT().SimpleTy) {
4267  default:
4268  return;
4269  case MVT::v2i8:
4270  case MVT::v2i16:
4271  case MVT::v2i32:
4272  case MVT::v2i64:
4273  case MVT::v2f32:
4274  case MVT::v2f64:
4275  case MVT::v4i8:
4276  case MVT::v4i16:
4277  case MVT::v4i32:
4278  case MVT::v4f32:
4279  // This is a "native" vector type
4280  break;
4281  }
4282 
4283  LoadSDNode *LD = cast<LoadSDNode>(N);
4284 
4285  unsigned Align = LD->getAlignment();
4286  auto &TD = DAG.getDataLayout();
4287  unsigned PrefAlign =
4288  TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
4289  if (Align < PrefAlign) {
4290  // This load is not sufficiently aligned, so bail out and let this vector
4291  // load be scalarized. Note that we may still be able to emit smaller
4292  // vector loads. For example, if we are loading a <4 x float> with an
4293  // alignment of 8, this check will fail but the legalizer will try again
4294  // with 2 x <2 x float>, which will succeed with an alignment of 8.
4295  return;
4296  }
4297 
4298  EVT EltVT = ResVT.getVectorElementType();
4299  unsigned NumElts = ResVT.getVectorNumElements();
4300 
4301  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
4302  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4303  // loaded type to i16 and propagate the "real" type as the memory type.
4304  bool NeedTrunc = false;
4305  if (EltVT.getSizeInBits() < 16) {
4306  EltVT = MVT::i16;
4307  NeedTrunc = true;
4308  }
4309 
4310  unsigned Opcode = 0;
4311  SDVTList LdResVTs;
4312 
4313  switch (NumElts) {
4314  default:
4315  return;
4316  case 2:
4317  Opcode = NVPTXISD::LoadV2;
4318  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4319  break;
4320  case 4: {
4321  Opcode = NVPTXISD::LoadV4;
4322  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4323  LdResVTs = DAG.getVTList(ListVTs);
4324  break;
4325  }
4326  }
4327 
4328  // Copy regular operands
4329  SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
4330 
4331  // The select routine does not have access to the LoadSDNode instance, so
4332  // pass along the extension information
4333  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
4334 
4335  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4336  LD->getMemoryVT(),
4337  LD->getMemOperand());
4338 
4339  SmallVector<SDValue, 4> ScalarRes;
4340 
4341  for (unsigned i = 0; i < NumElts; ++i) {
4342  SDValue Res = NewLD.getValue(i);
4343  if (NeedTrunc)
4344  Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4345  ScalarRes.push_back(Res);
4346  }
4347 
4348  SDValue LoadChain = NewLD.getValue(NumElts);
4349 
4350  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4351 
4352  Results.push_back(BuildVec);
4353  Results.push_back(LoadChain);
4354 }
4355 
4357  SmallVectorImpl<SDValue> &Results) {
4358  SDValue Chain = N->getOperand(0);
4359  SDValue Intrin = N->getOperand(1);
4360  SDLoc DL(N);
4361 
4362  // Get the intrinsic ID
4363  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
4364  switch (IntrinNo) {
4365  default:
4366  return;
4367  case Intrinsic::nvvm_ldg_global_i:
4368  case Intrinsic::nvvm_ldg_global_f:
4369  case Intrinsic::nvvm_ldg_global_p:
4370  case Intrinsic::nvvm_ldu_global_i:
4371  case Intrinsic::nvvm_ldu_global_f:
4372  case Intrinsic::nvvm_ldu_global_p: {
4373  EVT ResVT = N->getValueType(0);
4374 
4375  if (ResVT.isVector()) {
4376  // Vector LDG/LDU
4377 
4378  unsigned NumElts = ResVT.getVectorNumElements();
4379  EVT EltVT = ResVT.getVectorElementType();
4380 
4381  // Since LDU/LDG are target nodes, we cannot rely on DAG type
4382  // legalization.
4383  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4384  // loaded type to i16 and propagate the "real" type as the memory type.
4385  bool NeedTrunc = false;
4386  if (EltVT.getSizeInBits() < 16) {
4387  EltVT = MVT::i16;
4388  NeedTrunc = true;
4389  }
4390 
4391  unsigned Opcode = 0;
4392  SDVTList LdResVTs;
4393 
4394  switch (NumElts) {
4395  default:
4396  return;
4397  case 2:
4398  switch (IntrinNo) {
4399  default:
4400  return;
4401  case Intrinsic::nvvm_ldg_global_i:
4402  case Intrinsic::nvvm_ldg_global_f:
4403  case Intrinsic::nvvm_ldg_global_p:
4404  Opcode = NVPTXISD::LDGV2;
4405  break;
4406  case Intrinsic::nvvm_ldu_global_i:
4407  case Intrinsic::nvvm_ldu_global_f:
4408  case Intrinsic::nvvm_ldu_global_p:
4409  Opcode = NVPTXISD::LDUV2;
4410  break;
4411  }
4412  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4413  break;
4414  case 4: {
4415  switch (IntrinNo) {
4416  default:
4417  return;
4418  case Intrinsic::nvvm_ldg_global_i:
4419  case Intrinsic::nvvm_ldg_global_f:
4420  case Intrinsic::nvvm_ldg_global_p:
4421  Opcode = NVPTXISD::LDGV4;
4422  break;
4423  case Intrinsic::nvvm_ldu_global_i:
4424  case Intrinsic::nvvm_ldu_global_f:
4425  case Intrinsic::nvvm_ldu_global_p:
4426  Opcode = NVPTXISD::LDUV4;
4427  break;
4428  }
4429  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4430  LdResVTs = DAG.getVTList(ListVTs);
4431  break;
4432  }
4433  }
4434 
4435  SmallVector<SDValue, 8> OtherOps;
4436 
4437  // Copy regular operands
4438 
4439  OtherOps.push_back(Chain); // Chain
4440  // Skip operand 1 (intrinsic ID)
4441  // Others
4442  OtherOps.append(N->op_begin() + 2, N->op_end());
4443 
4444  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4445 
4446  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4447  MemSD->getMemoryVT(),
4448  MemSD->getMemOperand());
4449 
4450  SmallVector<SDValue, 4> ScalarRes;
4451 
4452  for (unsigned i = 0; i < NumElts; ++i) {
4453  SDValue Res = NewLD.getValue(i);
4454  if (NeedTrunc)
4455  Res =
4456  DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4457  ScalarRes.push_back(Res);
4458  }
4459 
4460  SDValue LoadChain = NewLD.getValue(NumElts);
4461 
4462  SDValue BuildVec =
4463  DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4464 
4465  Results.push_back(BuildVec);
4466  Results.push_back(LoadChain);
4467  } else {
4468  // i8 LDG/LDU
4469  assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
4470  "Custom handling of non-i8 ldu/ldg?");
4471 
4472  // Just copy all operands as-is
4473  SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
4474 
4475  // Force output to i16
4476  SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
4477 
4478  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4479 
4480  // We make sure the memory type is i8, which will be used during isel
4481  // to select the proper instruction.
4482  SDValue NewLD =
4483  DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
4484  MVT::i8, MemSD->getMemOperand());
4485 
4486  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
4487  NewLD.getValue(0)));
4488  Results.push_back(NewLD.getValue(1));
4489  }
4490  }
4491  }
4492 }
4493 
4494 void NVPTXTargetLowering::ReplaceNodeResults(
4495  SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4496  switch (N->getOpcode()) {
4497  default:
4498  report_fatal_error("Unhandled custom legalization");
4499  case ISD::LOAD:
4500  ReplaceLoadVector(N, DAG, Results);
4501  return;
4503  ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
4504  return;
4505  }
4506 }
4507 
4508 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
4509 void NVPTXSection::anchor() {}
4510 
4512  delete TextSection;
4513  delete DataSection;
4514  delete BSSSection;
4515  delete ReadOnlySection;
4516 
4517  delete StaticCtorSection;
4518  delete StaticDtorSection;
4519  delete LSDASection;
4520  delete EHFrameSection;
4521  delete DwarfAbbrevSection;
4522  delete DwarfInfoSection;
4523  delete DwarfLineSection;
4524  delete DwarfFrameSection;
4525  delete DwarfPubTypesSection;
4526  delete DwarfDebugInlineSection;
4527  delete DwarfStrSection;
4528  delete DwarfLocSection;
4529  delete DwarfARangesSection;
4530  delete DwarfRangesSection;
4531 }
4532 
4533 MCSection *
4535  SectionKind Kind, Mangler &Mang,
4536  const TargetMachine &TM) const {
4537  return getDataSection();
4538 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Instances of this class represent a uniqued identifier for a section in the current translation unit...
Definition: MCSection.h:48
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:646
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
GreatestCommonDivisor64 - Return the greatest common divisor of the two values using Euclid's algorit...
Definition: MathExtras.h:493
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
MCSection * DwarfPubTypesSection
bool hasOneUse() const
Return true if there is exactly one use of this node.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool hasROT32() const
const TargetMachine & getTargetMachine() const
InstrTy * getInstruction() const
Definition: CallSite.h:82
static cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it"" 1: do it 2: do it aggressively"), cl::init(2))
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
MCSection * TextSection
Section directive for standard text.
CallInst - This class represents a function call, abstracting a target machine's calling convention...
bool isImageOrSamplerVal(const Value *, const Module *)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl< ISD::OutputArg > &, unsigned retAlignment, const ImmutableCallSite *CS) const
static bool IsMulWideOperandDemotable(SDValue Op, unsigned OptSize, OperandSignedness &S)
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptS...
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:378
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
Type * getReturnType() const
Definition: Function.cpp:233
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
MCSection * getDataSection() const
unsigned getNumOperands() const
12: Structures
Definition: Type.h:71
bool isKernelFunction(const llvm::Function &)
const SDValue & getOperand(unsigned Num) const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:225
F(f)
FunTy * getCaller() const
getCaller - Return the caller function for this call site
Definition: CallSite.h:170
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:344
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:956
const SDValue & getBasePtr() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:398
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:178
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
static unsigned int uniqueCallSite
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits...
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
bool hasROT64() const
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Shift and rotation operations.
Definition: ISDOpcodes.h:332
static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it...
unsigned getFunctionAlignment(const Function *F) const
getFunctionAlignment - Return the Log2 alignment of this function.
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
bool isLiteral() const
isLiteral - Return true if this type is uniqued by structural equivalence, false if it is a struct de...
Definition: DerivedTypes.h:246
MCSection * StaticDtorSection
This section contains the static destructor pointer list.
SimpleValueType SimpleTy
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:372
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isTypeSupportedInIntrinsic(MVT VT) const
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< ISD::InputArg, 32 > Ins
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:637
ConstantExpr - a constant value that is initialized with an expression using other constant values...
Definition: Constants.h:852
unsigned getIROrder() const
Return the node ordering.
const MCSection * DwarfDebugInlineSection
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
unsigned int getSmVersion() const
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
SmallVector< ISD::OutputArg, 32 > Outs
TypeID getTypeID() const
getTypeID - Return the type id for the type.
Definition: Type.h:134
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
Pass structure by value.
Definition: Attributes.h:73
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
const APInt & getAPIntValue() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
void setIROrder(unsigned Order)
Set the node ordering.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
Type * getElementType() const
Definition: DerivedTypes.h:323
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
MCSection * DataSection
Section directive for standard data.
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
This class is used to represent ISD::STORE nodes.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
0: type with no size
Definition: Type.h:56
#define P(N)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:251
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
FunTy * getCalledFunction() const
getCalledFunction - Return the function being called if this is a direct call, otherwise return null ...
Definition: CallSite.h:99
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool sge(const APInt &RHS) const
Signed greather or equal comparison.
Definition: APInt.h:1153
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
const NVPTXTargetLowering * getTargetLowering() const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static unsigned getOpcForSurfaceInstr(unsigned Intrinsic)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
This class provides iterator support for SDUse operands that use a specific SDNode.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:644
ManagedStringPool * getManagedStrPool() const
SectionKind - This is a simple POD value that classifies the properties of a section.
Definition: SectionKind.h:28
static mvt_range vector_valuetypes()
Class to represent integer types.
Definition: DerivedTypes.h:37
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
std::vector< ArgListEntry > ArgListTy
static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
MCSection * StaticCtorSection
This section contains the static constructor pointer list.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
static bool IsPTXVectorType(MVT VT)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:335
MCSection * DwarfAbbrevSection
const MachinePointerInfo & getPointerInfo() const
static SDValue TryMULWIDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces...
MCSection * EHFrameSection
EH frame section.
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override
Return the preferred vector type legalization action.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.cpp:552
std::string * getManagedString(const char *S)
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
bool isInvariant() const
MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const override
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
NVPTXTargetLowering(const NVPTXTargetMachine &TM, const NVPTXSubtarget &STI)
MCSection * DwarfRangesSection
bool getAlign(const llvm::Function &, unsigned index, unsigned &)
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
MCSection * DwarfARangesSection
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isCast() const
Return true if this is a convert constant expression.
Definition: Constants.cpp:1177
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:378
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
op_iterator op_begin() const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
StringRef getName() const
getName - Return the name for this struct type if it has an identity.
Definition: Type.cpp:583
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const
Return the preferred vector type legalization action.
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, SDLoc dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
NVPTXTargetMachine.
SmallVector< SDValue, 32 > OutVals
MCSection * LSDASection
If exception handling is supported by the target, this is the section the Language Specific Data Area...
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
bool isAggregateType() const
isAggregateType - Return true if the type is an aggregate type.
Definition: Type.h:260
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:418
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
op_iterator op_end() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void size_t size
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
OperandSignedness
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:140
const ARM::ArchExtKind Kind
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:465
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
static const char * name
static const Function * getParent(const Value *V)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
Primary interface to the complete machine description for the target machine.
MCSection * BSSSection
Section that is default initialized to zero.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
MCSection * ReadOnlySection
Section that is readonly and can contain arbitrary initialized data.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MVT getVectorElementType() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:380
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
unsigned getAlignment() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
static unsigned getOpcForTextureInstr(unsigned Intrinsic)
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const NVPTXRegisterInfo * getRegisterInfo() const override
uint64_t getZExtValue() const
BRIND - Indirect branch.
Definition: ISDOpcodes.h:538
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.
const NVPTXTargetMachine * nvTM