LLVM  4.0.0
NVPTXISelLowering.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "NVPTX.h"
17 #include "NVPTXISelLowering.h"
18 #include "NVPTXSection.h"
19 #include "NVPTXSubtarget.h"
20 #include "NVPTXTargetMachine.h"
21 #include "NVPTXTargetObjectFile.h"
22 #include "NVPTXUtilities.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/CodeGen/Analysis.h"
33 #include "llvm/IR/Argument.h"
34 #include "llvm/IR/Attributes.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/CodeGen.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <cstdint>
59 #include <iterator>
60 #include <sstream>
61 #include <string>
62 #include <utility>
63 #include <vector>
64 
65 #undef DEBUG_TYPE
66 #define DEBUG_TYPE "nvptx-lower"
67 
68 using namespace llvm;
69 
70 static unsigned int uniqueCallSite = 0;
71 
73  "nvptx-sched4reg",
74  cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
75 
76 static cl::opt<unsigned>
78  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
79  " 1: do it 2: do it aggressively"),
80  cl::init(2));
81 
82 static bool IsPTXVectorType(MVT VT) {
83  switch (VT.SimpleTy) {
84  default:
85  return false;
86  case MVT::v2i1:
87  case MVT::v4i1:
88  case MVT::v2i8:
89  case MVT::v4i8:
90  case MVT::v2i16:
91  case MVT::v4i16:
92  case MVT::v2i32:
93  case MVT::v4i32:
94  case MVT::v2i64:
95  case MVT::v2f32:
96  case MVT::v4f32:
97  case MVT::v2f64:
98  return true;
99  }
100 }
101 
102 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
103 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
104 /// into their primitive components.
105 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
106 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
107 /// LowerCall, and LowerReturn.
108 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
109  Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
111  uint64_t StartingOffset = 0) {
112  SmallVector<EVT, 16> TempVTs;
113  SmallVector<uint64_t, 16> TempOffsets;
114 
115  ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
116  for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
117  EVT VT = TempVTs[i];
118  uint64_t Off = TempOffsets[i];
119  if (VT.isVector())
120  for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
121  ValueVTs.push_back(VT.getVectorElementType());
122  if (Offsets)
123  Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
124  }
125  else {
126  ValueVTs.push_back(VT);
127  if (Offsets)
128  Offsets->push_back(Off);
129  }
130  }
131 }
132 
133 // NVPTXTargetLowering Constructor.
135  const NVPTXSubtarget &STI)
136  : TargetLowering(TM), nvTM(&TM), STI(STI) {
137  // always lower memset, memcpy, and memmove intrinsics to load/store
138  // instructions, rather
139  // then generating calls to memset, mempcy or memmove.
140  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
141  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
142  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
143 
146 
147  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
148  // condition branches.
149  setJumpIsExpensive(true);
150 
151  // Wide divides are _very_ slow. Try to reduce the width of the divide if
152  // possible.
153  addBypassSlowDiv(64, 32);
154 
155  // By default, use the Source scheduling
156  if (sched4reg)
158  else
160 
161  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
162  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
163  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
164  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
165  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
166  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
167 
168  // Operations not directly supported by NVPTX.
183  // Some SIGN_EXTEND_INREG can be done using cvt instruction.
184  // For others we will expand to a SHL/SRA pair.
190 
197 
198  if (STI.hasROT64()) {
201  } else {
204  }
205  if (STI.hasROT32()) {
208  } else {
211  }
212 
220 
221  // Indirect branch is not supported.
222  // This also disables Jump Table creation.
225 
228 
229  // We want to legalize constant related memmove and memcopy
230  // intrinsics.
232 
233  // Turn FP extload into load/fpextend
243  // Turn FP truncstore into trunc + store.
244  // FIXME: vector types should also be expanded
248 
249  // PTX does not support load / store predicate registers
252 
253  for (MVT VT : MVT::integer_valuetypes()) {
257  }
258 
259  // This is legal in NVPTX
262 
263  // TRAP can be lowered to PTX trap
265 
268 
269  // Register custom handling for vector loads/stores
270  for (MVT VT : MVT::vector_valuetypes()) {
271  if (IsPTXVectorType(VT)) {
275  }
276  }
277 
278  // Custom handling for i8 intrinsics
280 
290 
291  // PTX does not directly support SELP of i1, so promote to i32 first
293 
294  // PTX cannot multiply two i64s in a single instruction.
297 
298  // We have some custom DAG combine patterns for these nodes
307 
308  // Library functions. These default to Expand, but we have instructions
309  // for them.
326 
327  // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
328  // No FPOW or FREM in PTX.
329 
330  // Now deduce the information based on the above mentioned
331  // actions
333 }
334 
335 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
336  switch ((NVPTXISD::NodeType)Opcode) {
338  break;
339  case NVPTXISD::CALL:
340  return "NVPTXISD::CALL";
341  case NVPTXISD::RET_FLAG:
342  return "NVPTXISD::RET_FLAG";
344  return "NVPTXISD::LOAD_PARAM";
345  case NVPTXISD::Wrapper:
346  return "NVPTXISD::Wrapper";
348  return "NVPTXISD::DeclareParam";
350  return "NVPTXISD::DeclareScalarParam";
352  return "NVPTXISD::DeclareRet";
354  return "NVPTXISD::DeclareScalarRet";
356  return "NVPTXISD::DeclareRetParam";
357  case NVPTXISD::PrintCall:
358  return "NVPTXISD::PrintCall";
360  return "NVPTXISD::PrintConvergentCall";
362  return "NVPTXISD::PrintCallUni";
364  return "NVPTXISD::PrintConvergentCallUni";
365  case NVPTXISD::LoadParam:
366  return "NVPTXISD::LoadParam";
368  return "NVPTXISD::LoadParamV2";
370  return "NVPTXISD::LoadParamV4";
372  return "NVPTXISD::StoreParam";
374  return "NVPTXISD::StoreParamV2";
376  return "NVPTXISD::StoreParamV4";
378  return "NVPTXISD::StoreParamS32";
380  return "NVPTXISD::StoreParamU32";
382  return "NVPTXISD::CallArgBegin";
383  case NVPTXISD::CallArg:
384  return "NVPTXISD::CallArg";
386  return "NVPTXISD::LastCallArg";
388  return "NVPTXISD::CallArgEnd";
389  case NVPTXISD::CallVoid:
390  return "NVPTXISD::CallVoid";
391  case NVPTXISD::CallVal:
392  return "NVPTXISD::CallVal";
394  return "NVPTXISD::CallSymbol";
395  case NVPTXISD::Prototype:
396  return "NVPTXISD::Prototype";
397  case NVPTXISD::MoveParam:
398  return "NVPTXISD::MoveParam";
400  return "NVPTXISD::StoreRetval";
402  return "NVPTXISD::StoreRetvalV2";
404  return "NVPTXISD::StoreRetvalV4";
406  return "NVPTXISD::PseudoUseParam";
407  case NVPTXISD::RETURN:
408  return "NVPTXISD::RETURN";
410  return "NVPTXISD::CallSeqBegin";
412  return "NVPTXISD::CallSeqEnd";
414  return "NVPTXISD::CallPrototype";
415  case NVPTXISD::LoadV2:
416  return "NVPTXISD::LoadV2";
417  case NVPTXISD::LoadV4:
418  return "NVPTXISD::LoadV4";
419  case NVPTXISD::LDGV2:
420  return "NVPTXISD::LDGV2";
421  case NVPTXISD::LDGV4:
422  return "NVPTXISD::LDGV4";
423  case NVPTXISD::LDUV2:
424  return "NVPTXISD::LDUV2";
425  case NVPTXISD::LDUV4:
426  return "NVPTXISD::LDUV4";
427  case NVPTXISD::StoreV2:
428  return "NVPTXISD::StoreV2";
429  case NVPTXISD::StoreV4:
430  return "NVPTXISD::StoreV4";
432  return "NVPTXISD::FUN_SHFL_CLAMP";
434  return "NVPTXISD::FUN_SHFR_CLAMP";
435  case NVPTXISD::IMAD:
436  return "NVPTXISD::IMAD";
437  case NVPTXISD::Dummy:
438  return "NVPTXISD::Dummy";
440  return "NVPTXISD::MUL_WIDE_SIGNED";
442  return "NVPTXISD::MUL_WIDE_UNSIGNED";
443  case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
444  case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
446  return "NVPTXISD::Tex1DFloatFloatLevel";
448  return "NVPTXISD::Tex1DFloatFloatGrad";
449  case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
450  case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
452  return "NVPTXISD::Tex1DS32FloatLevel";
454  return "NVPTXISD::Tex1DS32FloatGrad";
455  case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
456  case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
458  return "NVPTXISD::Tex1DU32FloatLevel";
460  return "NVPTXISD::Tex1DU32FloatGrad";
461  case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
462  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
464  return "NVPTXISD::Tex1DArrayFloatFloatLevel";
466  return "NVPTXISD::Tex1DArrayFloatFloatGrad";
467  case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
468  case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
470  return "NVPTXISD::Tex1DArrayS32FloatLevel";
472  return "NVPTXISD::Tex1DArrayS32FloatGrad";
473  case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
474  case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
476  return "NVPTXISD::Tex1DArrayU32FloatLevel";
478  return "NVPTXISD::Tex1DArrayU32FloatGrad";
479  case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
480  case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
482  return "NVPTXISD::Tex2DFloatFloatLevel";
484  return "NVPTXISD::Tex2DFloatFloatGrad";
485  case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
486  case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
488  return "NVPTXISD::Tex2DS32FloatLevel";
490  return "NVPTXISD::Tex2DS32FloatGrad";
491  case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
492  case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
494  return "NVPTXISD::Tex2DU32FloatLevel";
496  return "NVPTXISD::Tex2DU32FloatGrad";
497  case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
498  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
500  return "NVPTXISD::Tex2DArrayFloatFloatLevel";
502  return "NVPTXISD::Tex2DArrayFloatFloatGrad";
503  case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
504  case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
506  return "NVPTXISD::Tex2DArrayS32FloatLevel";
508  return "NVPTXISD::Tex2DArrayS32FloatGrad";
509  case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
510  case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
512  return "NVPTXISD::Tex2DArrayU32FloatLevel";
514  return "NVPTXISD::Tex2DArrayU32FloatGrad";
515  case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
516  case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
518  return "NVPTXISD::Tex3DFloatFloatLevel";
520  return "NVPTXISD::Tex3DFloatFloatGrad";
521  case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
522  case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
524  return "NVPTXISD::Tex3DS32FloatLevel";
526  return "NVPTXISD::Tex3DS32FloatGrad";
527  case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
528  case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
530  return "NVPTXISD::Tex3DU32FloatLevel";
532  return "NVPTXISD::Tex3DU32FloatGrad";
533  case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
535  return "NVPTXISD::TexCubeFloatFloatLevel";
536  case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
538  return "NVPTXISD::TexCubeS32FloatLevel";
539  case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
541  return "NVPTXISD::TexCubeU32FloatLevel";
543  return "NVPTXISD::TexCubeArrayFloatFloat";
545  return "NVPTXISD::TexCubeArrayFloatFloatLevel";
547  return "NVPTXISD::TexCubeArrayS32Float";
549  return "NVPTXISD::TexCubeArrayS32FloatLevel";
551  return "NVPTXISD::TexCubeArrayU32Float";
553  return "NVPTXISD::TexCubeArrayU32FloatLevel";
555  return "NVPTXISD::Tld4R2DFloatFloat";
557  return "NVPTXISD::Tld4G2DFloatFloat";
559  return "NVPTXISD::Tld4B2DFloatFloat";
561  return "NVPTXISD::Tld4A2DFloatFloat";
563  return "NVPTXISD::Tld4R2DS64Float";
565  return "NVPTXISD::Tld4G2DS64Float";
567  return "NVPTXISD::Tld4B2DS64Float";
569  return "NVPTXISD::Tld4A2DS64Float";
571  return "NVPTXISD::Tld4R2DU64Float";
573  return "NVPTXISD::Tld4G2DU64Float";
575  return "NVPTXISD::Tld4B2DU64Float";
577  return "NVPTXISD::Tld4A2DU64Float";
578 
580  return "NVPTXISD::TexUnified1DFloatS32";
582  return "NVPTXISD::TexUnified1DFloatFloat";
584  return "NVPTXISD::TexUnified1DFloatFloatLevel";
586  return "NVPTXISD::TexUnified1DFloatFloatGrad";
588  return "NVPTXISD::TexUnified1DS32S32";
590  return "NVPTXISD::TexUnified1DS32Float";
592  return "NVPTXISD::TexUnified1DS32FloatLevel";
594  return "NVPTXISD::TexUnified1DS32FloatGrad";
596  return "NVPTXISD::TexUnified1DU32S32";
598  return "NVPTXISD::TexUnified1DU32Float";
600  return "NVPTXISD::TexUnified1DU32FloatLevel";
602  return "NVPTXISD::TexUnified1DU32FloatGrad";
604  return "NVPTXISD::TexUnified1DArrayFloatS32";
606  return "NVPTXISD::TexUnified1DArrayFloatFloat";
608  return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
610  return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
612  return "NVPTXISD::TexUnified1DArrayS32S32";
614  return "NVPTXISD::TexUnified1DArrayS32Float";
616  return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
618  return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
620  return "NVPTXISD::TexUnified1DArrayU32S32";
622  return "NVPTXISD::TexUnified1DArrayU32Float";
624  return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
626  return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
628  return "NVPTXISD::TexUnified2DFloatS32";
630  return "NVPTXISD::TexUnified2DFloatFloat";
632  return "NVPTXISD::TexUnified2DFloatFloatLevel";
634  return "NVPTXISD::TexUnified2DFloatFloatGrad";
636  return "NVPTXISD::TexUnified2DS32S32";
638  return "NVPTXISD::TexUnified2DS32Float";
640  return "NVPTXISD::TexUnified2DS32FloatLevel";
642  return "NVPTXISD::TexUnified2DS32FloatGrad";
644  return "NVPTXISD::TexUnified2DU32S32";
646  return "NVPTXISD::TexUnified2DU32Float";
648  return "NVPTXISD::TexUnified2DU32FloatLevel";
650  return "NVPTXISD::TexUnified2DU32FloatGrad";
652  return "NVPTXISD::TexUnified2DArrayFloatS32";
654  return "NVPTXISD::TexUnified2DArrayFloatFloat";
656  return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
658  return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
660  return "NVPTXISD::TexUnified2DArrayS32S32";
662  return "NVPTXISD::TexUnified2DArrayS32Float";
664  return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
666  return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
668  return "NVPTXISD::TexUnified2DArrayU32S32";
670  return "NVPTXISD::TexUnified2DArrayU32Float";
672  return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
674  return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
676  return "NVPTXISD::TexUnified3DFloatS32";
678  return "NVPTXISD::TexUnified3DFloatFloat";
680  return "NVPTXISD::TexUnified3DFloatFloatLevel";
682  return "NVPTXISD::TexUnified3DFloatFloatGrad";
684  return "NVPTXISD::TexUnified3DS32S32";
686  return "NVPTXISD::TexUnified3DS32Float";
688  return "NVPTXISD::TexUnified3DS32FloatLevel";
690  return "NVPTXISD::TexUnified3DS32FloatGrad";
692  return "NVPTXISD::TexUnified3DU32S32";
694  return "NVPTXISD::TexUnified3DU32Float";
696  return "NVPTXISD::TexUnified3DU32FloatLevel";
698  return "NVPTXISD::TexUnified3DU32FloatGrad";
700  return "NVPTXISD::TexUnifiedCubeFloatFloat";
702  return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
704  return "NVPTXISD::TexUnifiedCubeS32Float";
706  return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
708  return "NVPTXISD::TexUnifiedCubeU32Float";
710  return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
712  return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
714  return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
716  return "NVPTXISD::TexUnifiedCubeArrayS32Float";
718  return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
720  return "NVPTXISD::TexUnifiedCubeArrayU32Float";
722  return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
724  return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
726  return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
728  return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
730  return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
732  return "NVPTXISD::Tld4UnifiedR2DS64Float";
734  return "NVPTXISD::Tld4UnifiedG2DS64Float";
736  return "NVPTXISD::Tld4UnifiedB2DS64Float";
738  return "NVPTXISD::Tld4UnifiedA2DS64Float";
740  return "NVPTXISD::Tld4UnifiedR2DU64Float";
742  return "NVPTXISD::Tld4UnifiedG2DU64Float";
744  return "NVPTXISD::Tld4UnifiedB2DU64Float";
746  return "NVPTXISD::Tld4UnifiedA2DU64Float";
747 
748  case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
749  case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
750  case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
751  case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
752  case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
753  case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
754  case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
755  case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
756  case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
757  case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
758  case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
759 
760  case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
761  case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
762  case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
763  case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
764  case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
765  case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
766  case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
767  case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
768  case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
769  case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
770  case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
771 
772  case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
773  case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
774  case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
775  case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
776  case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
777  case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
778  case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
779  case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
780  case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
781  case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
782  case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
783 
784  case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
785  case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
786  case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
787  case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
788  case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
789  case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
790  case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
791  case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
792  case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
793  case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
794  case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
795 
796  case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
797  case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
798  case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
799  case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
800  case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
801  case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
802  case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
803  case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
804  case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
805  case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
806  case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
807 
808  case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
809  case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
810  case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
811  case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
812  case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
813  case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
814  case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
815  case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
816  case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
817  case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
818  case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
819 
820  case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
821  case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
822  case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
823  case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
824  case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
825  case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
826  case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
827  case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
828  case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
829  case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
830  case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
831 
832  case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
833  case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
834  case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
835  case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
836  case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
837  case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
838  case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
839  case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
840  case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
841  case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
842  case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
843 
844  case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
845  case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
846  case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
847  case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
848  case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
849  case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
850  case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
851  case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
852  case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
853  case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
854  case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
855 
856  case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
857  case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
858  case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
859  case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
860  case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
861  case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
862  case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
863  case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
864  case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
865  case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
866  case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
867 
868  case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
869  case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
870  case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
871  case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
872  case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
873  case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
874  case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
875  case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
876  case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
877  case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
878  case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
879 
880  case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
881  case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
882  case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
883  case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
884  case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
885  case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
886  case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
887  case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
888  case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
889  case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
890  case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
891 
892  case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
893  case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
894  case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
895  case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
896  case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
897  case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
898  case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
899  case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
900  case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
901  case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
902  case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
903 
904  case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
905  case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
906  case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
907  case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
908  case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
909  case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
910  case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
911  case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
912  case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
913  case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
914  case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
915 
916  case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
917  case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
918  case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
919  case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
920  case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
921  case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
922  case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
923  case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
924  case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
925  case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
926  case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
927  }
928  return nullptr;
929 }
930 
933  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
934  return TypeSplitVector;
935 
937 }
938 
939 SDValue
941  SDLoc dl(Op);
942  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
943  auto PtrVT = getPointerTy(DAG.getDataLayout());
944  Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
945  return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
946 }
947 
949  const DataLayout &DL, Type *retTy, const ArgListTy &Args,
950  const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
951  const ImmutableCallSite *CS) const {
952  auto PtrVT = getPointerTy(DL);
953 
954  bool isABI = (STI.getSmVersion() >= 20);
955  assert(isABI && "Non-ABI compilation is not supported");
956  if (!isABI)
957  return "";
958 
959  std::stringstream O;
960  O << "prototype_" << uniqueCallSite << " : .callprototype ";
961 
962  if (retTy->getTypeID() == Type::VoidTyID) {
963  O << "()";
964  } else {
965  O << "(";
966  if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
967  unsigned size = 0;
968  if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
969  size = ITy->getBitWidth();
970  if (size < 32)
971  size = 32;
972  } else {
973  assert(retTy->isFloatingPointTy() &&
974  "Floating point type expected here");
975  size = retTy->getPrimitiveSizeInBits();
976  }
977 
978  O << ".param .b" << size << " _";
979  } else if (isa<PointerType>(retTy)) {
980  O << ".param .b" << PtrVT.getSizeInBits() << " _";
981  } else if ((retTy->getTypeID() == Type::StructTyID) ||
982  isa<VectorType>(retTy)) {
983  auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
984  O << ".param .align " << retAlignment << " .b8 _["
985  << DL.getTypeAllocSize(retTy) << "]";
986  } else {
987  llvm_unreachable("Unknown return type");
988  }
989  O << ") ";
990  }
991  O << "_ (";
992 
993  bool first = true;
994 
995  unsigned OIdx = 0;
996  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
997  Type *Ty = Args[i].Ty;
998  if (!first) {
999  O << ", ";
1000  }
1001  first = false;
1002 
1003  if (!Outs[OIdx].Flags.isByVal()) {
1004  if (Ty->isAggregateType() || Ty->isVectorTy()) {
1005  unsigned align = 0;
1006  const CallInst *CallI = cast<CallInst>(CS->getInstruction());
1007  // +1 because index 0 is reserved for return type alignment
1008  if (!getAlign(*CallI, i + 1, align))
1009  align = DL.getABITypeAlignment(Ty);
1010  unsigned sz = DL.getTypeAllocSize(Ty);
1011  O << ".param .align " << align << " .b8 ";
1012  O << "_";
1013  O << "[" << sz << "]";
1014  // update the index for Outs
1015  SmallVector<EVT, 16> vtparts;
1016  ComputeValueVTs(*this, DL, Ty, vtparts);
1017  if (unsigned len = vtparts.size())
1018  OIdx += len - 1;
1019  continue;
1020  }
1021  // i8 types in IR will be i16 types in SDAG
1022  assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
1023  (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
1024  "type mismatch between callee prototype and arguments");
1025  // scalar type
1026  unsigned sz = 0;
1027  if (isa<IntegerType>(Ty)) {
1028  sz = cast<IntegerType>(Ty)->getBitWidth();
1029  if (sz < 32)
1030  sz = 32;
1031  } else if (isa<PointerType>(Ty))
1032  sz = PtrVT.getSizeInBits();
1033  else
1034  sz = Ty->getPrimitiveSizeInBits();
1035  O << ".param .b" << sz << " ";
1036  O << "_";
1037  continue;
1038  }
1039  auto *PTy = dyn_cast<PointerType>(Ty);
1040  assert(PTy && "Param with byval attribute should be a pointer type");
1041  Type *ETy = PTy->getElementType();
1042 
1043  unsigned align = Outs[OIdx].Flags.getByValAlign();
1044  unsigned sz = DL.getTypeAllocSize(ETy);
1045  O << ".param .align " << align << " .b8 ";
1046  O << "_";
1047  O << "[" << sz << "]";
1048  }
1049  O << ");";
1050  return O.str();
1051 }
1052 
1053 unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
1054  const ImmutableCallSite *CS,
1055  Type *Ty, unsigned Idx,
1056  const DataLayout &DL) const {
1057  if (!CS) {
1058  // CallSite is zero, fallback to ABI type alignment
1059  return DL.getABITypeAlignment(Ty);
1060  }
1061 
1062  unsigned Align = 0;
1063  const Value *DirectCallee = CS->getCalledFunction();
1064 
1065  if (!DirectCallee) {
1066  // We don't have a direct function symbol, but that may be because of
1067  // constant cast instructions in the call.
1068  const Instruction *CalleeI = CS->getInstruction();
1069  assert(CalleeI && "Call target is not a function or derived value?");
1070 
1071  // With bitcast'd call targets, the instruction will be the call
1072  if (isa<CallInst>(CalleeI)) {
1073  // Check if we have call alignment metadata
1074  if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1075  return Align;
1076 
1077  const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1078  // Ignore any bitcast instructions
1079  while (isa<ConstantExpr>(CalleeV)) {
1080  const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
1081  if (!CE->isCast())
1082  break;
1083  // Look through the bitcast
1084  CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1085  }
1086 
1087  // We have now looked past all of the bitcasts. Do we finally have a
1088  // Function?
1089  if (isa<Function>(CalleeV))
1090  DirectCallee = CalleeV;
1091  }
1092  }
1093 
1094  // Check for function alignment information if we found that the
1095  // ultimate target is a Function
1096  if (DirectCallee)
1097  if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
1098  return Align;
1099 
1100  // Call is indirect or alignment information is not available, fall back to
1101  // the ABI type alignment
1102  return DL.getABITypeAlignment(Ty);
1103 }
1104 
1106  SmallVectorImpl<SDValue> &InVals) const {
1107  SelectionDAG &DAG = CLI.DAG;
1108  SDLoc dl = CLI.DL;
1110  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1112  SDValue Chain = CLI.Chain;
1113  SDValue Callee = CLI.Callee;
1114  bool &isTailCall = CLI.IsTailCall;
1115  ArgListTy &Args = CLI.getArgs();
1116  Type *retTy = CLI.RetTy;
1117  ImmutableCallSite *CS = CLI.CS;
1118 
1119  bool isABI = (STI.getSmVersion() >= 20);
1120  assert(isABI && "Non-ABI compilation is not supported");
1121  if (!isABI)
1122  return Chain;
1123  MachineFunction &MF = DAG.getMachineFunction();
1124  const Function *F = MF.getFunction();
1125  auto &DL = MF.getDataLayout();
1126 
1127  SDValue tempChain = Chain;
1128  Chain = DAG.getCALLSEQ_START(Chain,
1129  DAG.getIntPtrConstant(uniqueCallSite, dl, true),
1130  dl);
1131  SDValue InFlag = Chain.getValue(1);
1132 
1133  unsigned paramCount = 0;
1134  // Args.size() and Outs.size() need not match.
1135  // Outs.size() will be larger
1136  // * if there is an aggregate argument with multiple fields (each field
1137  // showing up separately in Outs)
1138  // * if there is a vector argument with more than typical vector-length
1139  // elements (generally if more than 4) where each vector element is
1140  // individually present in Outs.
1141  // So a different index should be used for indexing into Outs/OutVals.
1142  // See similar issue in LowerFormalArguments.
1143  unsigned OIdx = 0;
1144  // Declare the .params or .reg need to pass values
1145  // to the function
1146  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1147  EVT VT = Outs[OIdx].VT;
1148  Type *Ty = Args[i].Ty;
1149 
1150  if (!Outs[OIdx].Flags.isByVal()) {
1151  if (Ty->isAggregateType()) {
1152  // aggregate
1153  SmallVector<EVT, 16> vtparts;
1155  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
1156  0);
1157 
1158  unsigned align =
1159  getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
1160  // declare .param .align <align> .b8 .param<n>[<size>];
1161  unsigned sz = DL.getTypeAllocSize(Ty);
1162  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1163  SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
1164  MVT::i32),
1165  DAG.getConstant(paramCount, dl, MVT::i32),
1166  DAG.getConstant(sz, dl, MVT::i32),
1167  InFlag };
1168  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1169  DeclareParamOps);
1170  InFlag = Chain.getValue(1);
1171  for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1172  EVT elemtype = vtparts[j];
1173  unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
1174  if (elemtype.isInteger() && (sz < 8))
1175  sz = 8;
1176  SDValue StVal = OutVals[OIdx];
1177  if (elemtype.getSizeInBits() < 16) {
1178  StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
1179  }
1180  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1181  SDValue CopyParamOps[] = { Chain,
1182  DAG.getConstant(paramCount, dl, MVT::i32),
1183  DAG.getConstant(Offsets[j], dl, MVT::i32),
1184  StVal, InFlag };
1185  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1186  CopyParamVTs, CopyParamOps,
1187  elemtype, MachinePointerInfo(),
1188  ArgAlign);
1189  InFlag = Chain.getValue(1);
1190  ++OIdx;
1191  }
1192  if (vtparts.size() > 0)
1193  --OIdx;
1194  ++paramCount;
1195  continue;
1196  }
1197  if (Ty->isVectorTy()) {
1198  EVT ObjectVT = getValueType(DL, Ty);
1199  unsigned align =
1200  getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
1201  // declare .param .align <align> .b8 .param<n>[<size>];
1202  unsigned sz = DL.getTypeAllocSize(Ty);
1203  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1204  SDValue DeclareParamOps[] = { Chain,
1205  DAG.getConstant(align, dl, MVT::i32),
1206  DAG.getConstant(paramCount, dl, MVT::i32),
1207  DAG.getConstant(sz, dl, MVT::i32),
1208  InFlag };
1209  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1210  DeclareParamOps);
1211  InFlag = Chain.getValue(1);
1212  unsigned NumElts = ObjectVT.getVectorNumElements();
1213  EVT EltVT = ObjectVT.getVectorElementType();
1214  EVT MemVT = EltVT;
1215  bool NeedExtend = false;
1216  if (EltVT.getSizeInBits() < 16) {
1217  NeedExtend = true;
1218  EltVT = MVT::i16;
1219  }
1220 
1221  // V1 store
1222  if (NumElts == 1) {
1223  SDValue Elt = OutVals[OIdx++];
1224  if (NeedExtend)
1225  Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
1226 
1227  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1228  SDValue CopyParamOps[] = { Chain,
1229  DAG.getConstant(paramCount, dl, MVT::i32),
1230  DAG.getConstant(0, dl, MVT::i32), Elt,
1231  InFlag };
1232  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1233  CopyParamVTs, CopyParamOps,
1234  MemVT, MachinePointerInfo());
1235  InFlag = Chain.getValue(1);
1236  } else if (NumElts == 2) {
1237  SDValue Elt0 = OutVals[OIdx++];
1238  SDValue Elt1 = OutVals[OIdx++];
1239  if (NeedExtend) {
1240  Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
1241  Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
1242  }
1243 
1244  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1245  SDValue CopyParamOps[] = { Chain,
1246  DAG.getConstant(paramCount, dl, MVT::i32),
1247  DAG.getConstant(0, dl, MVT::i32), Elt0,
1248  Elt1, InFlag };
1250  CopyParamVTs, CopyParamOps,
1251  MemVT, MachinePointerInfo());
1252  InFlag = Chain.getValue(1);
1253  } else {
1254  unsigned curOffset = 0;
1255  // V4 stores
1256  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
1257  // the
1258  // vector will be expanded to a power of 2 elements, so we know we can
1259  // always round up to the next multiple of 4 when creating the vector
1260  // stores.
1261  // e.g. 4 elem => 1 st.v4
1262  // 6 elem => 2 st.v4
1263  // 8 elem => 2 st.v4
1264  // 11 elem => 3 st.v4
1265  unsigned VecSize = 4;
1266  if (EltVT.getSizeInBits() == 64)
1267  VecSize = 2;
1268 
1269  // This is potentially only part of a vector, so assume all elements
1270  // are packed together.
1271  unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
1272 
1273  for (unsigned i = 0; i < NumElts; i += VecSize) {
1274  // Get values
1275  SDValue StoreVal;
1277  Ops.push_back(Chain);
1278  Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
1279  Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
1280 
1281  unsigned Opc = NVPTXISD::StoreParamV2;
1282 
1283  StoreVal = OutVals[OIdx++];
1284  if (NeedExtend)
1285  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1286  Ops.push_back(StoreVal);
1287 
1288  if (i + 1 < NumElts) {
1289  StoreVal = OutVals[OIdx++];
1290  if (NeedExtend)
1291  StoreVal =
1292  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1293  } else {
1294  StoreVal = DAG.getUNDEF(EltVT);
1295  }
1296  Ops.push_back(StoreVal);
1297 
1298  if (VecSize == 4) {
1299  Opc = NVPTXISD::StoreParamV4;
1300  if (i + 2 < NumElts) {
1301  StoreVal = OutVals[OIdx++];
1302  if (NeedExtend)
1303  StoreVal =
1304  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1305  } else {
1306  StoreVal = DAG.getUNDEF(EltVT);
1307  }
1308  Ops.push_back(StoreVal);
1309 
1310  if (i + 3 < NumElts) {
1311  StoreVal = OutVals[OIdx++];
1312  if (NeedExtend)
1313  StoreVal =
1314  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1315  } else {
1316  StoreVal = DAG.getUNDEF(EltVT);
1317  }
1318  Ops.push_back(StoreVal);
1319  }
1320 
1321  Ops.push_back(InFlag);
1322 
1323  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1324  Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
1325  MemVT, MachinePointerInfo());
1326  InFlag = Chain.getValue(1);
1327  curOffset += PerStoreOffset;
1328  }
1329  }
1330  ++paramCount;
1331  --OIdx;
1332  continue;
1333  }
1334  // Plain scalar
1335  // for ABI, declare .param .b<size> .param<n>;
1336  unsigned sz = VT.getSizeInBits();
1337  bool needExtend = false;
1338  if (VT.isInteger()) {
1339  if (sz < 16)
1340  needExtend = true;
1341  if (sz < 32)
1342  sz = 32;
1343  }
1344  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1345  SDValue DeclareParamOps[] = { Chain,
1346  DAG.getConstant(paramCount, dl, MVT::i32),
1347  DAG.getConstant(sz, dl, MVT::i32),
1348  DAG.getConstant(0, dl, MVT::i32), InFlag };
1349  Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
1350  DeclareParamOps);
1351  InFlag = Chain.getValue(1);
1352  SDValue OutV = OutVals[OIdx];
1353  if (needExtend) {
1354  // zext/sext i1 to i16
1355  unsigned opc = ISD::ZERO_EXTEND;
1356  if (Outs[OIdx].Flags.isSExt())
1357  opc = ISD::SIGN_EXTEND;
1358  OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
1359  }
1360  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1361  SDValue CopyParamOps[] = { Chain,
1362  DAG.getConstant(paramCount, dl, MVT::i32),
1363  DAG.getConstant(0, dl, MVT::i32), OutV,
1364  InFlag };
1365 
1366  unsigned opcode = NVPTXISD::StoreParam;
1367  if (Outs[OIdx].Flags.isZExt() && VT.getSizeInBits() < 32)
1368  opcode = NVPTXISD::StoreParamU32;
1369  else if (Outs[OIdx].Flags.isSExt() && VT.getSizeInBits() < 32)
1370  opcode = NVPTXISD::StoreParamS32;
1371  Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
1372  VT, MachinePointerInfo());
1373 
1374  InFlag = Chain.getValue(1);
1375  ++paramCount;
1376  continue;
1377  }
1378  // struct or vector
1379  SmallVector<EVT, 16> vtparts;
1381  auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
1382  assert(PTy && "Type of a byval parameter should be pointer");
1383  ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
1384  vtparts, &Offsets, 0);
1385 
1386  // declare .param .align <align> .b8 .param<n>[<size>];
1387  unsigned sz = Outs[OIdx].Flags.getByValSize();
1388  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1389  unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1390  // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
1391  // so we don't need to worry about natural alignment or not.
1392  // See TargetLowering::LowerCallTo().
1393 
1394  // Enforce minumum alignment of 4 to work around ptxas miscompile
1395  // for sm_50+. See corresponding alignment adjustment in
1396  // emitFunctionParamList() for details.
1397  if (ArgAlign < 4)
1398  ArgAlign = 4;
1399  SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
1400  DAG.getConstant(paramCount, dl, MVT::i32),
1401  DAG.getConstant(sz, dl, MVT::i32), InFlag};
1402  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1403  DeclareParamOps);
1404  InFlag = Chain.getValue(1);
1405  for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1406  EVT elemtype = vtparts[j];
1407  int curOffset = Offsets[j];
1408  unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
1409  auto PtrVT = getPointerTy(DAG.getDataLayout());
1410  SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
1411  DAG.getConstant(curOffset, dl, PtrVT));
1412  SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
1413  MachinePointerInfo(), PartAlign);
1414  if (elemtype.getSizeInBits() < 16) {
1415  theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
1416  }
1417  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1418  SDValue CopyParamOps[] = { Chain,
1419  DAG.getConstant(paramCount, dl, MVT::i32),
1420  DAG.getConstant(curOffset, dl, MVT::i32),
1421  theVal, InFlag };
1422  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
1423  CopyParamOps, elemtype,
1424  MachinePointerInfo());
1425 
1426  InFlag = Chain.getValue(1);
1427  }
1428  ++paramCount;
1429  }
1430 
1432  unsigned retAlignment = 0;
1433 
1434  // Handle Result
1435  if (Ins.size() > 0) {
1436  SmallVector<EVT, 16> resvtparts;
1437  ComputeValueVTs(*this, DL, retTy, resvtparts);
1438 
1439  // Declare
1440  // .param .align 16 .b8 retval0[<size-in-bytes>], or
1441  // .param .b<size-in-bits> retval0
1442  unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
1443  // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
1444  // these three types to match the logic in
1445  // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
1446  // Plus, this behavior is consistent with nvcc's.
1447  if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
1448  retTy->isPointerTy()) {
1449  // Scalar needs to be at least 32bit wide
1450  if (resultsz < 32)
1451  resultsz = 32;
1452  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1453  SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1454  DAG.getConstant(resultsz, dl, MVT::i32),
1455  DAG.getConstant(0, dl, MVT::i32), InFlag };
1456  Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
1457  DeclareRetOps);
1458  InFlag = Chain.getValue(1);
1459  } else {
1460  retAlignment = getArgumentAlignment(Callee, CS, retTy, 0, DL);
1461  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1462  SDValue DeclareRetOps[] = { Chain,
1463  DAG.getConstant(retAlignment, dl, MVT::i32),
1464  DAG.getConstant(resultsz / 8, dl, MVT::i32),
1465  DAG.getConstant(0, dl, MVT::i32), InFlag };
1466  Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
1467  DeclareRetOps);
1468  InFlag = Chain.getValue(1);
1469  }
1470  }
1471 
1472  if (!Func) {
1473  // This is indirect function call case : PTX requires a prototype of the
1474  // form
1475  // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
1476  // to be emitted, and the label has to used as the last arg of call
1477  // instruction.
1478  // The prototype is embedded in a string and put as the operand for a
1479  // CallPrototype SDNode which will print out to the value of the string.
1480  SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1481  std::string Proto =
1482  getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
1483  const char *ProtoStr =
1484  nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
1485  SDValue ProtoOps[] = {
1486  Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
1487  };
1488  Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
1489  InFlag = Chain.getValue(1);
1490  }
1491  // Op to just print "call"
1492  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1493  SDValue PrintCallOps[] = {
1494  Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
1495  };
1496  // We model convergent calls as separate opcodes.
1497  unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall;
1498  if (CLI.IsConvergent)
1501  Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1502  InFlag = Chain.getValue(1);
1503 
1504  // Ops to print out the function name
1505  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1506  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1507  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
1508  InFlag = Chain.getValue(1);
1509 
1510  // Ops to print out the param list
1511  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1512  SDValue CallArgBeginOps[] = { Chain, InFlag };
1513  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
1514  CallArgBeginOps);
1515  InFlag = Chain.getValue(1);
1516 
1517  for (unsigned i = 0, e = paramCount; i != e; ++i) {
1518  unsigned opcode;
1519  if (i == (e - 1))
1520  opcode = NVPTXISD::LastCallArg;
1521  else
1522  opcode = NVPTXISD::CallArg;
1523  SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1524  SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1525  DAG.getConstant(i, dl, MVT::i32), InFlag };
1526  Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
1527  InFlag = Chain.getValue(1);
1528  }
1529  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1530  SDValue CallArgEndOps[] = { Chain,
1531  DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
1532  InFlag };
1533  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
1534  InFlag = Chain.getValue(1);
1535 
1536  if (!Func) {
1537  SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1538  SDValue PrototypeOps[] = { Chain,
1540  InFlag };
1541  Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
1542  InFlag = Chain.getValue(1);
1543  }
1544 
1545  // Generate loads from param memory/moves from registers for result
1546  if (Ins.size() > 0) {
1547  if (retTy && retTy->isVectorTy()) {
1548  EVT ObjectVT = getValueType(DL, retTy);
1549  unsigned NumElts = ObjectVT.getVectorNumElements();
1550  EVT EltVT = ObjectVT.getVectorElementType();
1552  ObjectVT) == NumElts &&
1553  "Vector was not scalarized");
1554  unsigned sz = EltVT.getSizeInBits();
1555  bool needTruncate = sz < 8;
1556 
1557  if (NumElts == 1) {
1558  // Just a simple load
1559  SmallVector<EVT, 4> LoadRetVTs;
1560  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1561  // If loading i1/i8 result, generate
1562  // load.b8 i16
1563  // if i1
1564  // trunc i16 to i1
1565  LoadRetVTs.push_back(MVT::i16);
1566  } else
1567  LoadRetVTs.push_back(EltVT);
1568  LoadRetVTs.push_back(MVT::Other);
1569  LoadRetVTs.push_back(MVT::Glue);
1570  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1571  DAG.getConstant(0, dl, MVT::i32), InFlag};
1572  SDValue retval = DAG.getMemIntrinsicNode(
1573  NVPTXISD::LoadParam, dl,
1574  DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1575  Chain = retval.getValue(1);
1576  InFlag = retval.getValue(2);
1577  SDValue Ret0 = retval;
1578  if (needTruncate)
1579  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
1580  InVals.push_back(Ret0);
1581  } else if (NumElts == 2) {
1582  // LoadV2
1583  SmallVector<EVT, 4> LoadRetVTs;
1584  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1585  // If loading i1/i8 result, generate
1586  // load.b8 i16
1587  // if i1
1588  // trunc i16 to i1
1589  LoadRetVTs.push_back(MVT::i16);
1590  LoadRetVTs.push_back(MVT::i16);
1591  } else {
1592  LoadRetVTs.push_back(EltVT);
1593  LoadRetVTs.push_back(EltVT);
1594  }
1595  LoadRetVTs.push_back(MVT::Other);
1596  LoadRetVTs.push_back(MVT::Glue);
1597  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1598  DAG.getConstant(0, dl, MVT::i32), InFlag};
1599  SDValue retval = DAG.getMemIntrinsicNode(
1601  DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1602  Chain = retval.getValue(2);
1603  InFlag = retval.getValue(3);
1604  SDValue Ret0 = retval.getValue(0);
1605  SDValue Ret1 = retval.getValue(1);
1606  if (needTruncate) {
1607  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
1608  InVals.push_back(Ret0);
1609  Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
1610  InVals.push_back(Ret1);
1611  } else {
1612  InVals.push_back(Ret0);
1613  InVals.push_back(Ret1);
1614  }
1615  } else {
1616  // Split into N LoadV4
1617  unsigned Ofst = 0;
1618  unsigned VecSize = 4;
1619  unsigned Opc = NVPTXISD::LoadParamV4;
1620  if (EltVT.getSizeInBits() == 64) {
1621  VecSize = 2;
1622  Opc = NVPTXISD::LoadParamV2;
1623  }
1624  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
1625  for (unsigned i = 0; i < NumElts; i += VecSize) {
1626  SmallVector<EVT, 8> LoadRetVTs;
1627  if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1628  // If loading i1/i8 result, generate
1629  // load.b8 i16
1630  // if i1
1631  // trunc i16 to i1
1632  for (unsigned j = 0; j < VecSize; ++j)
1633  LoadRetVTs.push_back(MVT::i16);
1634  } else {
1635  for (unsigned j = 0; j < VecSize; ++j)
1636  LoadRetVTs.push_back(EltVT);
1637  }
1638  LoadRetVTs.push_back(MVT::Other);
1639  LoadRetVTs.push_back(MVT::Glue);
1640  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1641  DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
1642  SDValue retval = DAG.getMemIntrinsicNode(
1643  Opc, dl, DAG.getVTList(LoadRetVTs),
1644  LoadRetOps, EltVT, MachinePointerInfo());
1645  if (VecSize == 2) {
1646  Chain = retval.getValue(2);
1647  InFlag = retval.getValue(3);
1648  } else {
1649  Chain = retval.getValue(4);
1650  InFlag = retval.getValue(5);
1651  }
1652 
1653  for (unsigned j = 0; j < VecSize; ++j) {
1654  if (i + j >= NumElts)
1655  break;
1656  SDValue Elt = retval.getValue(j);
1657  if (needTruncate)
1658  Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
1659  InVals.push_back(Elt);
1660  }
1661  Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1662  }
1663  }
1664  } else {
1667  auto &DL = DAG.getDataLayout();
1668  ComputePTXValueVTs(*this, DL, retTy, VTs, &Offsets, 0);
1669  assert(VTs.size() == Ins.size() && "Bad value decomposition");
1670  unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0, DL);
1671  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
1672  unsigned sz = VTs[i].getSizeInBits();
1673  unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
1674  bool needTruncate = false;
1675  if (VTs[i].isInteger() && sz < 8) {
1676  sz = 8;
1677  needTruncate = true;
1678  }
1679 
1680  SmallVector<EVT, 4> LoadRetVTs;
1681  EVT TheLoadType = VTs[i];
1682  if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
1683  // This is for integer types only, and specifically not for
1684  // aggregates.
1685  LoadRetVTs.push_back(MVT::i32);
1686  TheLoadType = MVT::i32;
1687  needTruncate = true;
1688  } else if (sz < 16) {
1689  // If loading i1/i8 result, generate
1690  // load i8 (-> i16)
1691  // trunc i16 to i1/i8
1692 
1693  // FIXME: Do we need to set needTruncate to true here, too? We could
1694  // not figure out what this branch is for in D17872, so we left it
1695  // alone. The comment above about loading i1/i8 may be wrong, as the
1696  // branch above seems to cover integers of size < 32.
1697  LoadRetVTs.push_back(MVT::i16);
1698  } else
1699  LoadRetVTs.push_back(Ins[i].VT);
1700  LoadRetVTs.push_back(MVT::Other);
1701  LoadRetVTs.push_back(MVT::Glue);
1702 
1703  SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
1704  DAG.getConstant(Offsets[i], dl, MVT::i32),
1705  InFlag};
1706  SDValue retval = DAG.getMemIntrinsicNode(
1707  NVPTXISD::LoadParam, dl,
1708  DAG.getVTList(LoadRetVTs), LoadRetOps,
1709  TheLoadType, MachinePointerInfo(), AlignI);
1710  Chain = retval.getValue(1);
1711  InFlag = retval.getValue(2);
1712  SDValue Ret0 = retval.getValue(0);
1713  if (needTruncate)
1714  Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
1715  InVals.push_back(Ret0);
1716  }
1717  }
1718  }
1719 
1720  Chain = DAG.getCALLSEQ_END(Chain,
1721  DAG.getIntPtrConstant(uniqueCallSite, dl, true),
1722  DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
1723  true),
1724  InFlag, dl);
1725  uniqueCallSite++;
1726 
1727  // set isTailCall to false for now, until we figure out how to express
1728  // tail call optimization in PTX
1729  isTailCall = false;
1730  return Chain;
1731 }
1732 
1733 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
1734 // (see LegalizeDAG.cpp). This is slow and uses local memory.
1735 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
1736 SDValue
1737 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
1738  SDNode *Node = Op.getNode();
1739  SDLoc dl(Node);
1741  unsigned NumOperands = Node->getNumOperands();
1742  for (unsigned i = 0; i < NumOperands; ++i) {
1743  SDValue SubOp = Node->getOperand(i);
1744  EVT VVT = SubOp.getNode()->getValueType(0);
1745  EVT EltVT = VVT.getVectorElementType();
1746  unsigned NumSubElem = VVT.getVectorNumElements();
1747  for (unsigned j = 0; j < NumSubElem; ++j) {
1748  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
1749  DAG.getIntPtrConstant(j, dl)));
1750  }
1751  }
1752  return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
1753 }
1754 
1755 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
1756 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1757 /// amount, or
1758 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1759 /// amount.
1760 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
1761  SelectionDAG &DAG) const {
1762  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1764 
1765  EVT VT = Op.getValueType();
1766  unsigned VTBits = VT.getSizeInBits();
1767  SDLoc dl(Op);
1768  SDValue ShOpLo = Op.getOperand(0);
1769  SDValue ShOpHi = Op.getOperand(1);
1770  SDValue ShAmt = Op.getOperand(2);
1771  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
1772 
1773  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1774  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1775  // {dHi, dLo} = {aHi, aLo} >> Amt
1776  // dHi = aHi >> Amt
1777  // dLo = shf.r.clamp aLo, aHi, Amt
1778 
1779  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1780  SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
1781  ShAmt);
1782 
1783  SDValue Ops[2] = { Lo, Hi };
1784  return DAG.getMergeValues(Ops, dl);
1785  }
1786  else {
1787  // {dHi, dLo} = {aHi, aLo} >> Amt
1788  // - if (Amt>=size) then
1789  // dLo = aHi >> (Amt-size)
1790  // dHi = aHi >> Amt (this is either all 0 or all 1)
1791  // else
1792  // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
1793  // dHi = aHi >> Amt
1794 
1795  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1796  DAG.getConstant(VTBits, dl, MVT::i32),
1797  ShAmt);
1798  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
1799  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1800  DAG.getConstant(VTBits, dl, MVT::i32));
1801  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
1802  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1803  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
1804 
1805  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1806  DAG.getConstant(VTBits, dl, MVT::i32),
1807  ISD::SETGE);
1808  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1809  SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1810 
1811  SDValue Ops[2] = { Lo, Hi };
1812  return DAG.getMergeValues(Ops, dl);
1813  }
1814 }
1815 
1816 /// LowerShiftLeftParts - Lower SHL_PARTS, which
1817 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1818 /// amount, or
1819 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1820 /// amount.
1821 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
1822  SelectionDAG &DAG) const {
1823  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1824  assert(Op.getOpcode() == ISD::SHL_PARTS);
1825 
1826  EVT VT = Op.getValueType();
1827  unsigned VTBits = VT.getSizeInBits();
1828  SDLoc dl(Op);
1829  SDValue ShOpLo = Op.getOperand(0);
1830  SDValue ShOpHi = Op.getOperand(1);
1831  SDValue ShAmt = Op.getOperand(2);
1832 
1833  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1834  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1835  // {dHi, dLo} = {aHi, aLo} << Amt
1836  // dHi = shf.l.clamp aLo, aHi, Amt
1837  // dLo = aLo << Amt
1838 
1839  SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
1840  ShAmt);
1841  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1842 
1843  SDValue Ops[2] = { Lo, Hi };
1844  return DAG.getMergeValues(Ops, dl);
1845  }
1846  else {
1847  // {dHi, dLo} = {aHi, aLo} << Amt
1848  // - if (Amt>=size) then
1849  // dLo = aLo << Amt (all 0)
1850  // dLo = aLo << (Amt-size)
1851  // else
1852  // dLo = aLo << Amt
1853  // dHi = (aHi << Amt) | (aLo >> (size-Amt))
1854 
1855  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1856  DAG.getConstant(VTBits, dl, MVT::i32),
1857  ShAmt);
1858  SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1859  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1860  DAG.getConstant(VTBits, dl, MVT::i32));
1861  SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1862  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1863  SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1864 
1865  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1866  DAG.getConstant(VTBits, dl, MVT::i32),
1867  ISD::SETGE);
1868  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1869  SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1870 
1871  SDValue Ops[2] = { Lo, Hi };
1872  return DAG.getMergeValues(Ops, dl);
1873  }
1874 }
1875 
1876 SDValue
1878  switch (Op.getOpcode()) {
1879  case ISD::RETURNADDR:
1880  return SDValue();
1881  case ISD::FRAMEADDR:
1882  return SDValue();
1883  case ISD::GlobalAddress:
1884  return LowerGlobalAddress(Op, DAG);
1886  return Op;
1887  case ISD::BUILD_VECTOR:
1889  return Op;
1890  case ISD::CONCAT_VECTORS:
1891  return LowerCONCAT_VECTORS(Op, DAG);
1892  case ISD::STORE:
1893  return LowerSTORE(Op, DAG);
1894  case ISD::LOAD:
1895  return LowerLOAD(Op, DAG);
1896  case ISD::SHL_PARTS:
1897  return LowerShiftLeftParts(Op, DAG);
1898  case ISD::SRA_PARTS:
1899  case ISD::SRL_PARTS:
1900  return LowerShiftRightParts(Op, DAG);
1901  case ISD::SELECT:
1902  return LowerSelect(Op, DAG);
1903  default:
1904  llvm_unreachable("Custom lowering not defined for operation");
1905  }
1906 }
1907 
1908 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
1909  SDValue Op0 = Op->getOperand(0);
1910  SDValue Op1 = Op->getOperand(1);
1911  SDValue Op2 = Op->getOperand(2);
1912  SDLoc DL(Op.getNode());
1913 
1914  assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
1915 
1916  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
1917  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
1918  SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
1919  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
1920 
1921  return Trunc;
1922 }
1923 
1924 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1925  if (Op.getValueType() == MVT::i1)
1926  return LowerLOADi1(Op, DAG);
1927  else
1928  return SDValue();
1929 }
1930 
1931 // v = ld i1* addr
1932 // =>
1933 // v1 = ld i8* addr (-> i16)
1934 // v = trunc i16 to i1
1935 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
1936  SDNode *Node = Op.getNode();
1937  LoadSDNode *LD = cast<LoadSDNode>(Node);
1938  SDLoc dl(Node);
1940  assert(Node->getValueType(0) == MVT::i1 &&
1941  "Custom lowering for i1 load only");
1942  SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
1943  LD->getPointerInfo(), LD->getAlignment(),
1944  LD->getMemOperand()->getFlags());
1945  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
1946  // The legalizer (the caller) is expecting two values from the legalized
1947  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
1948  // in LegalizeDAG.cpp which also uses MergeValues.
1949  SDValue Ops[] = { result, LD->getChain() };
1950  return DAG.getMergeValues(Ops, dl);
1951 }
1952 
1953 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1954  EVT ValVT = Op.getOperand(1).getValueType();
1955  if (ValVT == MVT::i1)
1956  return LowerSTOREi1(Op, DAG);
1957  else if (ValVT.isVector())
1958  return LowerSTOREVector(Op, DAG);
1959  else
1960  return SDValue();
1961 }
1962 
1963 SDValue
1964 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
1965  SDNode *N = Op.getNode();
1966  SDValue Val = N->getOperand(1);
1967  SDLoc DL(N);
1968  EVT ValVT = Val.getValueType();
1969 
1970  if (ValVT.isVector()) {
1971  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1972  // legal. We can (and should) split that into 2 stores of <2 x double> here
1973  // but I'm leaving that as a TODO for now.
1974  if (!ValVT.isSimple())
1975  return SDValue();
1976  switch (ValVT.getSimpleVT().SimpleTy) {
1977  default:
1978  return SDValue();
1979  case MVT::v2i8:
1980  case MVT::v2i16:
1981  case MVT::v2i32:
1982  case MVT::v2i64:
1983  case MVT::v2f32:
1984  case MVT::v2f64:
1985  case MVT::v4i8:
1986  case MVT::v4i16:
1987  case MVT::v4i32:
1988  case MVT::v4f32:
1989  // This is a "native" vector type
1990  break;
1991  }
1992 
1993  MemSDNode *MemSD = cast<MemSDNode>(N);
1994  const DataLayout &TD = DAG.getDataLayout();
1995 
1996  unsigned Align = MemSD->getAlignment();
1997  unsigned PrefAlign =
1998  TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
1999  if (Align < PrefAlign) {
2000  // This store is not sufficiently aligned, so bail out and let this vector
2001  // store be scalarized. Note that we may still be able to emit smaller
2002  // vector stores. For example, if we are storing a <4 x float> with an
2003  // alignment of 8, this check will fail but the legalizer will try again
2004  // with 2 x <2 x float>, which will succeed with an alignment of 8.
2005  return SDValue();
2006  }
2007 
2008  unsigned Opcode = 0;
2009  EVT EltVT = ValVT.getVectorElementType();
2010  unsigned NumElts = ValVT.getVectorNumElements();
2011 
2012  // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
2013  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
2014  // stored type to i16 and propagate the "real" type as the memory type.
2015  bool NeedExt = false;
2016  if (EltVT.getSizeInBits() < 16)
2017  NeedExt = true;
2018 
2019  switch (NumElts) {
2020  default:
2021  return SDValue();
2022  case 2:
2023  Opcode = NVPTXISD::StoreV2;
2024  break;
2025  case 4:
2026  Opcode = NVPTXISD::StoreV4;
2027  break;
2028  }
2029 
2031 
2032  // First is the chain
2033  Ops.push_back(N->getOperand(0));
2034 
2035  // Then the split values
2036  for (unsigned i = 0; i < NumElts; ++i) {
2037  SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
2038  DAG.getIntPtrConstant(i, DL));
2039  if (NeedExt)
2040  ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
2041  Ops.push_back(ExtVal);
2042  }
2043 
2044  // Then any remaining arguments
2045  Ops.append(N->op_begin() + 2, N->op_end());
2046 
2047  SDValue NewSt = DAG.getMemIntrinsicNode(
2048  Opcode, DL, DAG.getVTList(MVT::Other), Ops,
2049  MemSD->getMemoryVT(), MemSD->getMemOperand());
2050 
2051  //return DCI.CombineTo(N, NewSt, true);
2052  return NewSt;
2053  }
2054 
2055  return SDValue();
2056 }
2057 
2058 // st i1 v, addr
2059 // =>
2060 // v1 = zxt v to i16
2061 // st.u8 i16, addr
2062 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
2063  SDNode *Node = Op.getNode();
2064  SDLoc dl(Node);
2065  StoreSDNode *ST = cast<StoreSDNode>(Node);
2066  SDValue Tmp1 = ST->getChain();
2067  SDValue Tmp2 = ST->getBasePtr();
2068  SDValue Tmp3 = ST->getValue();
2069  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
2070  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
2071  SDValue Result =
2072  DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
2073  ST->getAlignment(), ST->getMemOperand()->getFlags());
2074  return Result;
2075 }
2076 
2077 SDValue
2078 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
2079  std::string ParamSym;
2080  raw_string_ostream ParamStr(ParamSym);
2081 
2082  ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
2083  ParamStr.flush();
2084 
2085  std::string *SavedStr =
2086  nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
2087  return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
2088 }
2089 
2090 // Check to see if the kernel argument is image*_t or sampler_t
2091 
2092 static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
2093  static const char *const specialTypes[] = { "struct._image2d_t",
2094  "struct._image3d_t",
2095  "struct._sampler_t" };
2096 
2097  Type *Ty = arg->getType();
2098  auto *PTy = dyn_cast<PointerType>(Ty);
2099 
2100  if (!PTy)
2101  return false;
2102 
2103  if (!context)
2104  return false;
2105 
2106  auto *STy = dyn_cast<StructType>(PTy->getElementType());
2107  if (!STy || STy->isLiteral())
2108  return false;
2109 
2110  return std::find(std::begin(specialTypes), std::end(specialTypes),
2111  STy->getName()) != std::end(specialTypes);
2112 }
2113 
2115  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2116  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2117  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2118  MachineFunction &MF = DAG.getMachineFunction();
2119  const DataLayout &DL = DAG.getDataLayout();
2120  auto PtrVT = getPointerTy(DAG.getDataLayout());
2121 
2122  const Function *F = MF.getFunction();
2123  const AttributeSet &PAL = F->getAttributes();
2124  const TargetLowering *TLI = STI.getTargetLowering();
2125 
2126  SDValue Root = DAG.getRoot();
2127  std::vector<SDValue> OutChains;
2128 
2129  bool isABI = (STI.getSmVersion() >= 20);
2130  assert(isABI && "Non-ABI compilation is not supported");
2131  if (!isABI)
2132  return Chain;
2133 
2134  std::vector<Type *> argTypes;
2135  std::vector<const Argument *> theArgs;
2136  for (const Argument &I : F->args()) {
2137  theArgs.push_back(&I);
2138  argTypes.push_back(I.getType());
2139  }
2140  // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
2141  // Ins.size() will be larger
2142  // * if there is an aggregate argument with multiple fields (each field
2143  // showing up separately in Ins)
2144  // * if there is a vector argument with more than typical vector-length
2145  // elements (generally if more than 4) where each vector element is
2146  // individually present in Ins.
2147  // So a different index should be used for indexing into Ins.
2148  // See similar issue in LowerCall.
2149  unsigned InsIdx = 0;
2150 
2151  int idx = 0;
2152  for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2153  Type *Ty = argTypes[i];
2154 
2155  // If the kernel argument is image*_t or sampler_t, convert it to
2156  // a i32 constant holding the parameter position. This can later
2157  // matched in the AsmPrinter to output the correct mangled name.
2158  if (isImageOrSamplerVal(
2159  theArgs[i],
2160  (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
2161  : nullptr))) {
2162  assert(isKernelFunction(*F) &&
2163  "Only kernels can have image/sampler params");
2164  InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
2165  continue;
2166  }
2167 
2168  if (theArgs[i]->use_empty()) {
2169  // argument is dead
2170  if (Ty->isAggregateType()) {
2171  SmallVector<EVT, 16> vtparts;
2172 
2173  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
2174  assert(vtparts.size() > 0 && "empty aggregate type not expected");
2175  for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2176  ++parti) {
2177  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2178  ++InsIdx;
2179  }
2180  if (vtparts.size() > 0)
2181  --InsIdx;
2182  continue;
2183  }
2184  if (Ty->isVectorTy()) {
2185  EVT ObjectVT = getValueType(DL, Ty);
2186  unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
2187  for (unsigned parti = 0; parti < NumRegs; ++parti) {
2188  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2189  ++InsIdx;
2190  }
2191  if (NumRegs > 0)
2192  --InsIdx;
2193  continue;
2194  }
2195  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2196  continue;
2197  }
2198 
2199  // In the following cases, assign a node order of "idx+1"
2200  // to newly created nodes. The SDNodes for params have to
2201  // appear in the same order as their order of appearance
2202  // in the original function. "idx+1" holds that order.
2203  if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
2204  if (Ty->isAggregateType()) {
2205  SmallVector<EVT, 16> vtparts;
2206  SmallVector<uint64_t, 16> offsets;
2207 
2208  // NOTE: Here, we lose the ability to issue vector loads for vectors
2209  // that are a part of a struct. This should be investigated in the
2210  // future.
2211  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
2212  0);
2213  assert(vtparts.size() > 0 && "empty aggregate type not expected");
2214  bool aggregateIsPacked = false;
2215  if (StructType *STy = dyn_cast<StructType>(Ty))
2216  aggregateIsPacked = STy->isPacked();
2217 
2218  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2219  for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2220  ++parti) {
2221  EVT partVT = vtparts[parti];
2222  Value *srcValue = Constant::getNullValue(
2223  PointerType::get(partVT.getTypeForEVT(F->getContext()),
2225  SDValue srcAddr =
2226  DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
2227  DAG.getConstant(offsets[parti], dl, PtrVT));
2228  unsigned partAlign = aggregateIsPacked
2229  ? 1
2230  : DL.getABITypeAlignment(
2231  partVT.getTypeForEVT(F->getContext()));
2232  SDValue p;
2233  if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
2234  ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2236  p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
2237  MachinePointerInfo(srcValue), partVT, partAlign);
2238  } else {
2239  p = DAG.getLoad(partVT, dl, Root, srcAddr,
2240  MachinePointerInfo(srcValue), partAlign);
2241  }
2242  if (p.getNode())
2243  p.getNode()->setIROrder(idx + 1);
2244  InVals.push_back(p);
2245  ++InsIdx;
2246  }
2247  if (vtparts.size() > 0)
2248  --InsIdx;
2249  continue;
2250  }
2251  if (Ty->isVectorTy()) {
2252  EVT ObjectVT = getValueType(DL, Ty);
2253  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2254  unsigned NumElts = ObjectVT.getVectorNumElements();
2255  assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
2256  "Vector was not scalarized");
2257  EVT EltVT = ObjectVT.getVectorElementType();
2258 
2259  // V1 load
2260  // f32 = load ...
2261  if (NumElts == 1) {
2262  // We only have one element, so just directly load it
2264  EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2265  SDValue P = DAG.getLoad(
2266  EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
2267  DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
2270  if (P.getNode())
2271  P.getNode()->setIROrder(idx + 1);
2272 
2273  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2274  P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
2275  InVals.push_back(P);
2276  ++InsIdx;
2277  } else if (NumElts == 2) {
2278  // V2 load
2279  // f32,f32 = load ...
2280  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
2282  VecVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2283  SDValue P = DAG.getLoad(
2284  VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
2285  DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
2288  if (P.getNode())
2289  P.getNode()->setIROrder(idx + 1);
2290 
2291  SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2292  DAG.getIntPtrConstant(0, dl));
2293  SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2294  DAG.getIntPtrConstant(1, dl));
2295 
2296  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
2297  Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
2298  Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
2299  }
2300 
2301  InVals.push_back(Elt0);
2302  InVals.push_back(Elt1);
2303  InsIdx += 2;
2304  } else {
2305  // V4 loads
2306  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
2307  // the vector will be expanded to a power of 2 elements, so we know we
2308  // can always round up to the next multiple of 4 when creating the
2309  // vector loads.
2310  // e.g. 4 elem => 1 ld.v4
2311  // 6 elem => 2 ld.v4
2312  // 8 elem => 2 ld.v4
2313  // 11 elem => 3 ld.v4
2314  unsigned VecSize = 4;
2315  if (EltVT.getSizeInBits() == 64) {
2316  VecSize = 2;
2317  }
2318  EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2319  unsigned Ofst = 0;
2320  for (unsigned i = 0; i < NumElts; i += VecSize) {
2321  Value *SrcValue = Constant::getNullValue(
2322  PointerType::get(VecVT.getTypeForEVT(F->getContext()),
2324  SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
2325  DAG.getConstant(Ofst, dl, PtrVT));
2326  SDValue P = DAG.getLoad(
2327  VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue),
2328  DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
2331  if (P.getNode())
2332  P.getNode()->setIROrder(idx + 1);
2333 
2334  for (unsigned j = 0; j < VecSize; ++j) {
2335  if (i + j >= NumElts)
2336  break;
2337  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2338  DAG.getIntPtrConstant(j, dl));
2339  if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2340  Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
2341  InVals.push_back(Elt);
2342  }
2343  Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2344  }
2345  InsIdx += NumElts;
2346  }
2347 
2348  if (NumElts > 0)
2349  --InsIdx;
2350  continue;
2351  }
2352  // A plain scalar.
2353  EVT ObjectVT = getValueType(DL, Ty);
2354  // If ABI, load from the param symbol
2355  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2357  ObjectVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2358  SDValue p;
2359  if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
2360  ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2362  p = DAG.getExtLoad(
2363  ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
2364  ObjectVT,
2365  DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2366  } else {
2367  p = DAG.getLoad(
2368  Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue),
2369  DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2370  }
2371  if (p.getNode())
2372  p.getNode()->setIROrder(idx + 1);
2373  InVals.push_back(p);
2374  continue;
2375  }
2376 
2377  // Param has ByVal attribute
2378  // Return MoveParam(param symbol).
2379  // Ideally, the param symbol can be returned directly,
2380  // but when SDNode builder decides to use it in a CopyToReg(),
2381  // machine instruction fails because TargetExternalSymbol
2382  // (not lowered) is target dependent, and CopyToReg assumes
2383  // the source is lowered.
2384  EVT ObjectVT = getValueType(DL, Ty);
2385  assert(ObjectVT == Ins[InsIdx].VT &&
2386  "Ins type did not match function type");
2387  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2388  SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
2389  if (p.getNode())
2390  p.getNode()->setIROrder(idx + 1);
2391  InVals.push_back(p);
2392  }
2393 
2394  // Clang will check explicit VarArg and issue error if any. However, Clang
2395  // will let code with
2396  // implicit var arg like f() pass. See bug 617733.
2397  // We treat this case as if the arg list is empty.
2398  // if (F.isVarArg()) {
2399  // assert(0 && "VarArg not supported yet!");
2400  //}
2401 
2402  if (!OutChains.empty())
2403  DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
2404 
2405  return Chain;
2406 }
2407 
2408 SDValue
2410  bool isVarArg,
2411  const SmallVectorImpl<ISD::OutputArg> &Outs,
2412  const SmallVectorImpl<SDValue> &OutVals,
2413  const SDLoc &dl, SelectionDAG &DAG) const {
2414  MachineFunction &MF = DAG.getMachineFunction();
2415  const Function *F = MF.getFunction();
2416  Type *RetTy = F->getReturnType();
2417  const DataLayout &TD = DAG.getDataLayout();
2418 
2419  bool isABI = (STI.getSmVersion() >= 20);
2420  assert(isABI && "Non-ABI compilation is not supported");
2421  if (!isABI)
2422  return Chain;
2423 
2424  if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
2425  // If we have a vector type, the OutVals array will be the scalarized
2426  // components and we have combine them into 1 or more vector stores.
2427  unsigned NumElts = VTy->getNumElements();
2428  assert(NumElts == Outs.size() && "Bad scalarization of return value");
2429 
2430  // const_cast can be removed in later LLVM versions
2431  EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
2432  bool NeedExtend = false;
2433  if (EltVT.getSizeInBits() < 16)
2434  NeedExtend = true;
2435 
2436  // V1 store
2437  if (NumElts == 1) {
2438  SDValue StoreVal = OutVals[0];
2439  // We only have one element, so just directly store it
2440  if (NeedExtend)
2441  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
2442  SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
2444  DAG.getVTList(MVT::Other), Ops,
2445  EltVT, MachinePointerInfo());
2446  } else if (NumElts == 2) {
2447  // V2 store
2448  SDValue StoreVal0 = OutVals[0];
2449  SDValue StoreVal1 = OutVals[1];
2450 
2451  if (NeedExtend) {
2452  StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
2453  StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
2454  }
2455 
2456  SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
2457  StoreVal1 };
2459  DAG.getVTList(MVT::Other), Ops,
2460  EltVT, MachinePointerInfo());
2461  } else {
2462  // V4 stores
2463  // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
2464  // vector will be expanded to a power of 2 elements, so we know we can
2465  // always round up to the next multiple of 4 when creating the vector
2466  // stores.
2467  // e.g. 4 elem => 1 st.v4
2468  // 6 elem => 2 st.v4
2469  // 8 elem => 2 st.v4
2470  // 11 elem => 3 st.v4
2471 
2472  unsigned VecSize = 4;
2473  if (OutVals[0].getValueSizeInBits() == 64)
2474  VecSize = 2;
2475 
2476  unsigned Offset = 0;
2477 
2478  EVT VecVT =
2479  EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2480  unsigned PerStoreOffset =
2481  TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2482 
2483  for (unsigned i = 0; i < NumElts; i += VecSize) {
2484  // Get values
2485  SDValue StoreVal;
2487  Ops.push_back(Chain);
2488  Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
2489  unsigned Opc = NVPTXISD::StoreRetvalV2;
2490  EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
2491 
2492  StoreVal = OutVals[i];
2493  if (NeedExtend)
2494  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2495  Ops.push_back(StoreVal);
2496 
2497  if (i + 1 < NumElts) {
2498  StoreVal = OutVals[i + 1];
2499  if (NeedExtend)
2500  StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2501  } else {
2502  StoreVal = DAG.getUNDEF(ExtendedVT);
2503  }
2504  Ops.push_back(StoreVal);
2505 
2506  if (VecSize == 4) {
2508  if (i + 2 < NumElts) {
2509  StoreVal = OutVals[i + 2];
2510  if (NeedExtend)
2511  StoreVal =
2512  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2513  } else {
2514  StoreVal = DAG.getUNDEF(ExtendedVT);
2515  }
2516  Ops.push_back(StoreVal);
2517 
2518  if (i + 3 < NumElts) {
2519  StoreVal = OutVals[i + 3];
2520  if (NeedExtend)
2521  StoreVal =
2522  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2523  } else {
2524  StoreVal = DAG.getUNDEF(ExtendedVT);
2525  }
2526  Ops.push_back(StoreVal);
2527  }
2528 
2529  // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
2530  Chain =
2531  DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
2532  EltVT, MachinePointerInfo());
2533  Offset += PerStoreOffset;
2534  }
2535  }
2536  } else {
2537  SmallVector<EVT, 16> ValVTs;
2539  ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
2540  assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
2541 
2542  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2543  SDValue theVal = OutVals[i];
2544  EVT TheValType = theVal.getValueType();
2545  unsigned numElems = 1;
2546  if (TheValType.isVector())
2547  numElems = TheValType.getVectorNumElements();
2548  for (unsigned j = 0, je = numElems; j != je; ++j) {
2549  SDValue TmpVal = theVal;
2550  if (TheValType.isVector())
2551  TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2552  TheValType.getVectorElementType(), TmpVal,
2553  DAG.getIntPtrConstant(j, dl));
2554  EVT TheStoreType = ValVTs[i];
2555  if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
2556  // The following zero-extension is for integer types only, and
2557  // specifically not for aggregates.
2558  TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
2559  TheStoreType = MVT::i32;
2560  }
2561  else if (TmpVal.getValueSizeInBits() < 16)
2562  TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
2563 
2564  SDValue Ops[] = {
2565  Chain,
2566  DAG.getConstant(Offsets[i], dl, MVT::i32),
2567  TmpVal };
2569  DAG.getVTList(MVT::Other), Ops,
2570  TheStoreType,
2571  MachinePointerInfo());
2572  }
2573  }
2574  }
2575 
2576  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
2577 }
2578 
2580  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2581  SelectionDAG &DAG) const {
2582  if (Constraint.length() > 1)
2583  return;
2584  else
2585  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2586 }
2587 
2588 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
2589  switch (Intrinsic) {
2590  default:
2591  return 0;
2592 
2593  case Intrinsic::nvvm_tex_1d_v4f32_s32:
2594  return NVPTXISD::Tex1DFloatS32;
2595  case Intrinsic::nvvm_tex_1d_v4f32_f32:
2597  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
2599  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
2601  case Intrinsic::nvvm_tex_1d_v4s32_s32:
2602  return NVPTXISD::Tex1DS32S32;
2603  case Intrinsic::nvvm_tex_1d_v4s32_f32:
2604  return NVPTXISD::Tex1DS32Float;
2605  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
2607  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
2609  case Intrinsic::nvvm_tex_1d_v4u32_s32:
2610  return NVPTXISD::Tex1DU32S32;
2611  case Intrinsic::nvvm_tex_1d_v4u32_f32:
2612  return NVPTXISD::Tex1DU32Float;
2613  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
2615  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
2617 
2618  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
2620  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
2622  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
2624  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
2626  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
2628  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
2630  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
2632  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
2634  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
2636  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
2638  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
2640  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
2642 
2643  case Intrinsic::nvvm_tex_2d_v4f32_s32:
2644  return NVPTXISD::Tex2DFloatS32;
2645  case Intrinsic::nvvm_tex_2d_v4f32_f32:
2647  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
2649  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
2651  case Intrinsic::nvvm_tex_2d_v4s32_s32:
2652  return NVPTXISD::Tex2DS32S32;
2653  case Intrinsic::nvvm_tex_2d_v4s32_f32:
2654  return NVPTXISD::Tex2DS32Float;
2655  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
2657  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
2659  case Intrinsic::nvvm_tex_2d_v4u32_s32:
2660  return NVPTXISD::Tex2DU32S32;
2661  case Intrinsic::nvvm_tex_2d_v4u32_f32:
2662  return NVPTXISD::Tex2DU32Float;
2663  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
2665  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
2667 
2668  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
2670  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
2672  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
2674  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
2676  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
2678  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
2680  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
2682  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
2684  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
2686  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
2688  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
2690  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
2692 
2693  case Intrinsic::nvvm_tex_3d_v4f32_s32:
2694  return NVPTXISD::Tex3DFloatS32;
2695  case Intrinsic::nvvm_tex_3d_v4f32_f32:
2697  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
2699  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
2701  case Intrinsic::nvvm_tex_3d_v4s32_s32:
2702  return NVPTXISD::Tex3DS32S32;
2703  case Intrinsic::nvvm_tex_3d_v4s32_f32:
2704  return NVPTXISD::Tex3DS32Float;
2705  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
2707  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
2709  case Intrinsic::nvvm_tex_3d_v4u32_s32:
2710  return NVPTXISD::Tex3DU32S32;
2711  case Intrinsic::nvvm_tex_3d_v4u32_f32:
2712  return NVPTXISD::Tex3DU32Float;
2713  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
2715  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
2717 
2718  case Intrinsic::nvvm_tex_cube_v4f32_f32:
2720  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
2722  case Intrinsic::nvvm_tex_cube_v4s32_f32:
2724  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
2726  case Intrinsic::nvvm_tex_cube_v4u32_f32:
2728  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
2730 
2731  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
2733  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
2735  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
2737  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
2739  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
2741  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
2743 
2744  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
2746  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
2748  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
2750  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
2752  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
2754  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
2756  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
2758  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
2760  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
2762  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
2764  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
2766  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
2768 
2769  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
2771  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
2773  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
2775  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
2777  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
2779  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
2781  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
2783  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
2785  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
2787  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
2789  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
2791  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
2793 
2794  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
2796  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
2798  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
2800  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
2802  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
2804  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
2806  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
2808  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
2810  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
2812  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
2814  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
2816  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
2818 
2819  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
2821  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
2823  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
2825  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
2827  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
2829  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
2831  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
2833  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
2835  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
2837  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
2839  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
2841  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
2843 
2844  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
2846  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
2848  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
2850  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
2852  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
2854  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
2856  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
2858  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
2860  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
2862  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
2864  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
2866  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
2868 
2869  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
2871  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
2873  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
2875  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
2877  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
2879  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
2881  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
2883  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
2885  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
2887  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
2889  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
2891  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
2893 
2894  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
2896  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
2898  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
2900  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
2902  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
2904  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
2906 
2907  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
2909  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
2911  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
2913  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
2915  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
2917  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
2919 
2920  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
2922  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
2924  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
2926  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
2928  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
2930  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
2932  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
2934  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
2936  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
2938  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
2940  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
2942  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
2944  }
2945 }
2946 
2947 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
2948  switch (Intrinsic) {
2949  default:
2950  return 0;
2951  case Intrinsic::nvvm_suld_1d_i8_clamp:
2952  return NVPTXISD::Suld1DI8Clamp;
2953  case Intrinsic::nvvm_suld_1d_i16_clamp:
2954  return NVPTXISD::Suld1DI16Clamp;
2955  case Intrinsic::nvvm_suld_1d_i32_clamp:
2956  return NVPTXISD::Suld1DI32Clamp;
2957  case Intrinsic::nvvm_suld_1d_i64_clamp:
2958  return NVPTXISD::Suld1DI64Clamp;
2959  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
2961  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
2963  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
2965  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
2967  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
2969  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
2971  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
2973  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
2975  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
2977  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
2979  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
2981  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
2983  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
2985  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
2987  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
2989  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
2991  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
2993  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
2995  case Intrinsic::nvvm_suld_2d_i8_clamp:
2996  return NVPTXISD::Suld2DI8Clamp;
2997  case Intrinsic::nvvm_suld_2d_i16_clamp:
2998  return NVPTXISD::Suld2DI16Clamp;
2999  case Intrinsic::nvvm_suld_2d_i32_clamp:
3000  return NVPTXISD::Suld2DI32Clamp;
3001  case Intrinsic::nvvm_suld_2d_i64_clamp:
3002  return NVPTXISD::Suld2DI64Clamp;
3003  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3005  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3007  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3009  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3011  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3013  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3015  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3017  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3019  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3021  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3023  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3025  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3027  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3029  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3031  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3033  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3035  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3037  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3039  case Intrinsic::nvvm_suld_3d_i8_clamp:
3040  return NVPTXISD::Suld3DI8Clamp;
3041  case Intrinsic::nvvm_suld_3d_i16_clamp:
3042  return NVPTXISD::Suld3DI16Clamp;
3043  case Intrinsic::nvvm_suld_3d_i32_clamp:
3044  return NVPTXISD::Suld3DI32Clamp;
3045  case Intrinsic::nvvm_suld_3d_i64_clamp:
3046  return NVPTXISD::Suld3DI64Clamp;
3047  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3049  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3051  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3053  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3055  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3057  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3059  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3061  case Intrinsic::nvvm_suld_1d_i8_trap:
3062  return NVPTXISD::Suld1DI8Trap;
3063  case Intrinsic::nvvm_suld_1d_i16_trap:
3064  return NVPTXISD::Suld1DI16Trap;
3065  case Intrinsic::nvvm_suld_1d_i32_trap:
3066  return NVPTXISD::Suld1DI32Trap;
3067  case Intrinsic::nvvm_suld_1d_i64_trap:
3068  return NVPTXISD::Suld1DI64Trap;
3069  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3070  return NVPTXISD::Suld1DV2I8Trap;
3071  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3073  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3075  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3077  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3078  return NVPTXISD::Suld1DV4I8Trap;
3079  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3081  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3083  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3085  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3087  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3089  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3091  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3093  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3095  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3097  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3099  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3101  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3103  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3105  case Intrinsic::nvvm_suld_2d_i8_trap:
3106  return NVPTXISD::Suld2DI8Trap;
3107  case Intrinsic::nvvm_suld_2d_i16_trap:
3108  return NVPTXISD::Suld2DI16Trap;
3109  case Intrinsic::nvvm_suld_2d_i32_trap:
3110  return NVPTXISD::Suld2DI32Trap;
3111  case Intrinsic::nvvm_suld_2d_i64_trap:
3112  return NVPTXISD::Suld2DI64Trap;
3113  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3114  return NVPTXISD::Suld2DV2I8Trap;
3115  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3117  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3119  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3121  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3122  return NVPTXISD::Suld2DV4I8Trap;
3123  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3125  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3127  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3129  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3131  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3133  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3135  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3137  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3139  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3141  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3143  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3145  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3147  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3149  case Intrinsic::nvvm_suld_3d_i8_trap:
3150  return NVPTXISD::Suld3DI8Trap;
3151  case Intrinsic::nvvm_suld_3d_i16_trap:
3152  return NVPTXISD::Suld3DI16Trap;
3153  case Intrinsic::nvvm_suld_3d_i32_trap:
3154  return NVPTXISD::Suld3DI32Trap;
3155  case Intrinsic::nvvm_suld_3d_i64_trap:
3156  return NVPTXISD::Suld3DI64Trap;
3157  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3158  return NVPTXISD::Suld3DV2I8Trap;
3159  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3161  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3163  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3165  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3166  return NVPTXISD::Suld3DV4I8Trap;
3167  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3169  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3171  case Intrinsic::nvvm_suld_1d_i8_zero:
3172  return NVPTXISD::Suld1DI8Zero;
3173  case Intrinsic::nvvm_suld_1d_i16_zero:
3174  return NVPTXISD::Suld1DI16Zero;
3175  case Intrinsic::nvvm_suld_1d_i32_zero:
3176  return NVPTXISD::Suld1DI32Zero;
3177  case Intrinsic::nvvm_suld_1d_i64_zero:
3178  return NVPTXISD::Suld1DI64Zero;
3179  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3180  return NVPTXISD::Suld1DV2I8Zero;
3181  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3183  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3185  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3187  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3188  return NVPTXISD::Suld1DV4I8Zero;
3189  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3191  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3193  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3195  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3197  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3199  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3201  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3203  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3205  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3207  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3209  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3211  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3213  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3215  case Intrinsic::nvvm_suld_2d_i8_zero:
3216  return NVPTXISD::Suld2DI8Zero;
3217  case Intrinsic::nvvm_suld_2d_i16_zero:
3218  return NVPTXISD::Suld2DI16Zero;
3219  case Intrinsic::nvvm_suld_2d_i32_zero:
3220  return NVPTXISD::Suld2DI32Zero;
3221  case Intrinsic::nvvm_suld_2d_i64_zero:
3222  return NVPTXISD::Suld2DI64Zero;
3223  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3224  return NVPTXISD::Suld2DV2I8Zero;
3225  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3227  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3229  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3231  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3232  return NVPTXISD::Suld2DV4I8Zero;
3233  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3235  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3237  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3239  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3241  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3243  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3245  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3247  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3249  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3251  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3253  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3255  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3257  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3259  case Intrinsic::nvvm_suld_3d_i8_zero:
3260  return NVPTXISD::Suld3DI8Zero;
3261  case Intrinsic::nvvm_suld_3d_i16_zero:
3262  return NVPTXISD::Suld3DI16Zero;
3263  case Intrinsic::nvvm_suld_3d_i32_zero:
3264  return NVPTXISD::Suld3DI32Zero;
3265  case Intrinsic::nvvm_suld_3d_i64_zero:
3266  return NVPTXISD::Suld3DI64Zero;
3267  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3268  return NVPTXISD::Suld3DV2I8Zero;
3269  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3271  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3273  case Intrinsic::nvvm_suld_3d_v2i64_zero:
3275  case Intrinsic::nvvm_suld_3d_v4i8_zero:
3276  return NVPTXISD::Suld3DV4I8Zero;
3277  case Intrinsic::nvvm_suld_3d_v4i16_zero:
3279  case Intrinsic::nvvm_suld_3d_v4i32_zero:
3281  }
3282 }
3283 
3284 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
3285 // TgtMemIntrinsic
3286 // because we need the information that is only available in the "Value" type
3287 // of destination
3288 // pointer. In particular, the address space information.
3290  IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
3291  switch (Intrinsic) {
3292  default:
3293  return false;
3294 
3295  case Intrinsic::nvvm_atomic_load_add_f32:
3296  case Intrinsic::nvvm_atomic_load_inc_32:
3297  case Intrinsic::nvvm_atomic_load_dec_32:
3298 
3299  case Intrinsic::nvvm_atomic_add_gen_f_cta:
3300  case Intrinsic::nvvm_atomic_add_gen_f_sys:
3301  case Intrinsic::nvvm_atomic_add_gen_i_cta:
3302  case Intrinsic::nvvm_atomic_add_gen_i_sys:
3303  case Intrinsic::nvvm_atomic_and_gen_i_cta:
3304  case Intrinsic::nvvm_atomic_and_gen_i_sys:
3305  case Intrinsic::nvvm_atomic_cas_gen_i_cta:
3306  case Intrinsic::nvvm_atomic_cas_gen_i_sys:
3307  case Intrinsic::nvvm_atomic_dec_gen_i_cta:
3308  case Intrinsic::nvvm_atomic_dec_gen_i_sys:
3309  case Intrinsic::nvvm_atomic_inc_gen_i_cta:
3310  case Intrinsic::nvvm_atomic_inc_gen_i_sys:
3311  case Intrinsic::nvvm_atomic_max_gen_i_cta:
3312  case Intrinsic::nvvm_atomic_max_gen_i_sys:
3313  case Intrinsic::nvvm_atomic_min_gen_i_cta:
3314  case Intrinsic::nvvm_atomic_min_gen_i_sys:
3315  case Intrinsic::nvvm_atomic_or_gen_i_cta:
3316  case Intrinsic::nvvm_atomic_or_gen_i_sys:
3317  case Intrinsic::nvvm_atomic_exch_gen_i_cta:
3318  case Intrinsic::nvvm_atomic_exch_gen_i_sys:
3319  case Intrinsic::nvvm_atomic_xor_gen_i_cta:
3320  case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
3321  auto &DL = I.getModule()->getDataLayout();
3322  Info.opc = ISD::INTRINSIC_W_CHAIN;
3323  Info.memVT = getValueType(DL, I.getType());
3324  Info.ptrVal = I.getArgOperand(0);
3325  Info.offset = 0;
3326  Info.vol = false;
3327  Info.readMem = true;
3328  Info.writeMem = true;
3329  Info.align = 0;
3330  return true;
3331  }
3332 
3333  case Intrinsic::nvvm_ldu_global_i:
3334  case Intrinsic::nvvm_ldu_global_f:
3335  case Intrinsic::nvvm_ldu_global_p: {
3336  auto &DL = I.getModule()->getDataLayout();
3337  Info.opc = ISD::INTRINSIC_W_CHAIN;
3338  if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3339  Info.memVT = getValueType(DL, I.getType());
3340  else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3341  Info.memVT = getPointerTy(DL);
3342  else
3343  Info.memVT = getValueType(DL, I.getType());
3344  Info.ptrVal = I.getArgOperand(0);
3345  Info.offset = 0;
3346  Info.vol = false;
3347  Info.readMem = true;
3348  Info.writeMem = false;
3349  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3350 
3351  return true;
3352  }
3353  case Intrinsic::nvvm_ldg_global_i:
3354  case Intrinsic::nvvm_ldg_global_f:
3355  case Intrinsic::nvvm_ldg_global_p: {
3356  auto &DL = I.getModule()->getDataLayout();
3357 
3358  Info.opc = ISD::INTRINSIC_W_CHAIN;
3359  if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
3360  Info.memVT = getValueType(DL, I.getType());
3361  else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
3362  Info.memVT = getPointerTy(DL);
3363  else
3364  Info.memVT = getValueType(DL, I.getType());
3365  Info.ptrVal = I.getArgOperand(0);
3366  Info.offset = 0;
3367  Info.vol = false;
3368  Info.readMem = true;
3369  Info.writeMem = false;
3370  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3371 
3372  return true;
3373  }
3374 
3375  case Intrinsic::nvvm_tex_1d_v4f32_s32:
3376  case Intrinsic::nvvm_tex_1d_v4f32_f32:
3377  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3378  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3379  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3380  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3381  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3382  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3383  case Intrinsic::nvvm_tex_2d_v4f32_s32:
3384  case Intrinsic::nvvm_tex_2d_v4f32_f32:
3385  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3386  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3387  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3388  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3389  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3390  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3391  case Intrinsic::nvvm_tex_3d_v4f32_s32:
3392  case Intrinsic::nvvm_tex_3d_v4f32_f32:
3393  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3394  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3395  case Intrinsic::nvvm_tex_cube_v4f32_f32:
3396  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3397  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3398  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3399  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3400  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3401  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3402  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3403  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3404  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3405  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3406  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3407  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3408  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3409  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3410  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3411  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3412  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3413  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3414  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3415  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3416  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3417  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3418  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3419  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3420  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3421  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3422  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3423  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3424  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3425  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3426  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3427  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3428  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3429  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3430  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
3431  Info.opc = getOpcForTextureInstr(Intrinsic);
3432  Info.memVT = MVT::v4f32;
3433  Info.ptrVal = nullptr;
3434  Info.offset = 0;
3435  Info.vol = false;
3436  Info.readMem = true;
3437  Info.writeMem = false;
3438  Info.align = 16;
3439  return true;
3440 
3441  case Intrinsic::nvvm_tex_1d_v4s32_s32:
3442  case Intrinsic::nvvm_tex_1d_v4s32_f32:
3443  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3444  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3445  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3446  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3447  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3448  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3449  case Intrinsic::nvvm_tex_2d_v4s32_s32:
3450  case Intrinsic::nvvm_tex_2d_v4s32_f32:
3451  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3452  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3453  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3454  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3455  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3456  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3457  case Intrinsic::nvvm_tex_3d_v4s32_s32:
3458  case Intrinsic::nvvm_tex_3d_v4s32_f32:
3459  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3460  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3461  case Intrinsic::nvvm_tex_cube_v4s32_f32:
3462  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3463  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3464  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3465  case Intrinsic::nvvm_tex_cube_v4u32_f32:
3466  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3467  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3468  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3469  case Intrinsic::nvvm_tex_1d_v4u32_s32:
3470  case Intrinsic::nvvm_tex_1d_v4u32_f32:
3471  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3472  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3473  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3474  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3475  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3476  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3477  case Intrinsic::nvvm_tex_2d_v4u32_s32:
3478  case Intrinsic::nvvm_tex_2d_v4u32_f32:
3479  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3480  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3481  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3482  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3483  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3484  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3485  case Intrinsic::nvvm_tex_3d_v4u32_s32:
3486  case Intrinsic::nvvm_tex_3d_v4u32_f32:
3487  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3488  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3489  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3490  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3491  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3492  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3493  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3494  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3495  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3496  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3497  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3498  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3499  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3500  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3501  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3502  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3503  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3504  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3505  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3506  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3507  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3508  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3509  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3510  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3511  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3512  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3513  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3514  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3515  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3516  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3517  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3518  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3519  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3520  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3521  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3522  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3523  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3524  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3525  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3526  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3527  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3528  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3529  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3530  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3531  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3532  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3533  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3534  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3535  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3536  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3537  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3538  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3539  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3540  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3541  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3542  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3543  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3544  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3545  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3546  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3547  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3548  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3549  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3550  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3551  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3552  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
3553  Info.opc = getOpcForTextureInstr(Intrinsic);
3554  Info.memVT = MVT::v4i32;
3555  Info.ptrVal = nullptr;
3556  Info.offset = 0;
3557  Info.vol = false;
3558  Info.readMem = true;
3559  Info.writeMem = false;
3560  Info.align = 16;
3561  return true;
3562 
3563  case Intrinsic::nvvm_suld_1d_i8_clamp:
3564  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3565  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3566  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3567  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3568  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3569  case Intrinsic::nvvm_suld_2d_i8_clamp:
3570  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3571  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3572  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3573  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3574  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3575  case Intrinsic::nvvm_suld_3d_i8_clamp:
3576  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3577  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3578  case Intrinsic::nvvm_suld_1d_i8_trap:
3579  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3580  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3581  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3582  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3583  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3584  case Intrinsic::nvvm_suld_2d_i8_trap:
3585  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3586  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3587  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3588  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3589  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3590  case Intrinsic::nvvm_suld_3d_i8_trap:
3591  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3592  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3593  case Intrinsic::nvvm_suld_1d_i8_zero:
3594  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3595  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3596  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3597  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3598  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3599  case Intrinsic::nvvm_suld_2d_i8_zero:
3600  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3601  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3602  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3603  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3604  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3605  case Intrinsic::nvvm_suld_3d_i8_zero:
3606  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3607  case Intrinsic::nvvm_suld_3d_v4i8_zero:
3608  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3609  Info.memVT = MVT::i8;
3610  Info.ptrVal = nullptr;
3611  Info.offset = 0;
3612  Info.vol = false;
3613  Info.readMem = true;
3614  Info.writeMem = false;
3615  Info.align = 16;
3616  return true;
3617 
3618  case Intrinsic::nvvm_suld_1d_i16_clamp:
3619  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3620  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3621  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3622  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3623  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3624  case Intrinsic::nvvm_suld_2d_i16_clamp:
3625  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3626  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3627  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3628  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3629  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3630  case Intrinsic::nvvm_suld_3d_i16_clamp:
3631  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3632  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3633  case Intrinsic::nvvm_suld_1d_i16_trap:
3634  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3635  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3636  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3637  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3638  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3639  case Intrinsic::nvvm_suld_2d_i16_trap:
3640  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3641  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3642  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3643  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3644  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3645  case Intrinsic::nvvm_suld_3d_i16_trap:
3646  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3647  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3648  case Intrinsic::nvvm_suld_1d_i16_zero:
3649  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3650  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3651  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3652  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3653  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3654  case Intrinsic::nvvm_suld_2d_i16_zero:
3655  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3656  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3657  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3658  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3659  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3660  case Intrinsic::nvvm_suld_3d_i16_zero:
3661  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3662  case Intrinsic::nvvm_suld_3d_v4i16_zero:
3663  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3664  Info.memVT = MVT::i16;
3665  Info.ptrVal = nullptr;
3666  Info.offset = 0;
3667  Info.vol = false;
3668  Info.readMem = true;
3669  Info.writeMem = false;
3670  Info.align = 16;
3671  return true;
3672 
3673  case Intrinsic::nvvm_suld_1d_i32_clamp:
3674  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3675  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3676  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3677  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3678  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3679  case Intrinsic::nvvm_suld_2d_i32_clamp:
3680  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3681  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3682  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3683  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3684  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3685  case Intrinsic::nvvm_suld_3d_i32_clamp:
3686  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3687  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3688  case Intrinsic::nvvm_suld_1d_i32_trap:
3689  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3690  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3691  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3692  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3693  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3694  case Intrinsic::nvvm_suld_2d_i32_trap:
3695  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3696  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3697  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3698  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3699  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3700  case Intrinsic::nvvm_suld_3d_i32_trap:
3701  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3702  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3703  case Intrinsic::nvvm_suld_1d_i32_zero:
3704  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3705  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3706  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3707  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3708  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3709  case Intrinsic::nvvm_suld_2d_i32_zero:
3710  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3711  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3712  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3713  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3714  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3715  case Intrinsic::nvvm_suld_3d_i32_zero:
3716  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3717  case Intrinsic::nvvm_suld_3d_v4i32_zero:
3718  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3719  Info.memVT = MVT::i32;
3720  Info.ptrVal = nullptr;
3721  Info.offset = 0;
3722  Info.vol = false;
3723  Info.readMem = true;
3724  Info.writeMem = false;
3725  Info.align = 16;
3726  return true;
3727 
3728  case Intrinsic::nvvm_suld_1d_i64_clamp:
3729  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3730  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3731  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3732  case Intrinsic::nvvm_suld_2d_i64_clamp:
3733  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3734  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3735  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3736  case Intrinsic::nvvm_suld_3d_i64_clamp:
3737  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3738  case Intrinsic::nvvm_suld_1d_i64_trap:
3739  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3740  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3741  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3742  case Intrinsic::nvvm_suld_2d_i64_trap:
3743  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3744  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3745  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3746  case Intrinsic::nvvm_suld_3d_i64_trap:
3747  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3748  case Intrinsic::nvvm_suld_1d_i64_zero:
3749  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3750  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3751  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3752  case Intrinsic::nvvm_suld_2d_i64_zero:
3753  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3754  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3755  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3756  case Intrinsic::nvvm_suld_3d_i64_zero:
3757  case Intrinsic::nvvm_suld_3d_v2i64_zero:
3758  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3759  Info.memVT = MVT::i64;
3760  Info.ptrVal = nullptr;
3761  Info.offset = 0;
3762  Info.vol = false;
3763  Info.readMem = true;
3764  Info.writeMem = false;
3765  Info.align = 16;
3766  return true;
3767  }
3768  return false;
3769 }
3770 
3771 /// isLegalAddressingMode - Return true if the addressing mode represented
3772 /// by AM is legal for this target, for a load/store of the specified type.
3773 /// Used to guide target specific optimizations, like loop strength reduction
3774 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
3775 /// (CodeGenPrepare.cpp)
3777  const AddrMode &AM, Type *Ty,
3778  unsigned AS) const {
3779  // AddrMode - This represents an addressing mode of:
3780  // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
3781  //
3782  // The legal address modes are
3783  // - [avar]
3784  // - [areg]
3785  // - [areg+immoff]
3786  // - [immAddr]
3787 
3788  if (AM.BaseGV) {
3789  return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
3790  }
3791 
3792  switch (AM.Scale) {
3793  case 0: // "r", "r+i" or "i" is allowed
3794  break;
3795  case 1:
3796  if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
3797  return false;
3798  // Otherwise we have r+i.
3799  break;
3800  default:
3801  // No scale > 1 is allowed
3802  return false;
3803  }
3804  return true;
3805 }
3806 
3807 //===----------------------------------------------------------------------===//
3808 // NVPTX Inline Assembly Support
3809 //===----------------------------------------------------------------------===//
3810 
3811 /// getConstraintType - Given a constraint letter, return the type of
3812 /// constraint it is for this target.
3815  if (Constraint.size() == 1) {
3816  switch (Constraint[0]) {
3817  default:
3818  break;
3819  case 'b':
3820  case 'r':
3821  case 'h':
3822  case 'c':
3823  case 'l':
3824  case 'f':
3825  case 'd':
3826  case '0':
3827  case 'N':
3828  return C_RegisterClass;
3829  }
3830  }
3831  return TargetLowering::getConstraintType(Constraint);
3832 }
3833 
3834 std::pair<unsigned, const TargetRegisterClass *>
3836  StringRef Constraint,
3837  MVT VT) const {
3838  if (Constraint.size() == 1) {
3839  switch (Constraint[0]) {
3840  case 'b':
3841  return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
3842  case 'c':
3843  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3844  case 'h':
3845  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3846  case 'r':
3847  return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
3848  case 'l':
3849  case 'N':
3850  return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
3851  case 'f':
3852  return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
3853  case 'd':
3854  return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
3855  }
3856  }
3857  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3858 }
3859 
3860 //===----------------------------------------------------------------------===//
3861 // NVPTX DAG Combining
3862 //===----------------------------------------------------------------------===//
3863 
3865  CodeGenOpt::Level OptLevel) const {
3866  const Function *F = MF.getFunction();
3867  const TargetOptions &TO = MF.getTarget().Options;
3868 
3869  // Always honor command-line argument
3870  if (FMAContractLevelOpt.getNumOccurrences() > 0) {
3871  return FMAContractLevelOpt > 0;
3872  } else if (OptLevel == 0) {
3873  // Do not contract if we're not optimizing the code
3874  return false;
3875  } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
3876  // Honor TargetOptions flags that explicitly say fusion is okay
3877  return true;
3878  } else if (F->hasFnAttribute("unsafe-fp-math")) {
3879  // Check for unsafe-fp-math=true coming from Clang
3880  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
3881  StringRef Val = Attr.getValueAsString();
3882  if (Val == "true")
3883  return true;
3884  }
3885 
3886  // We did not have a clear indication that fusion is allowed, so assume not
3887  return false;
3888 }
3889 
3890 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
3891 /// operands N0 and N1. This is a helper for PerformADDCombine that is
3892 /// called with the default operands, and if that fails, with commuted
3893 /// operands.
3896  const NVPTXSubtarget &Subtarget,
3897  CodeGenOpt::Level OptLevel) {
3898  SelectionDAG &DAG = DCI.DAG;
3899  // Skip non-integer, non-scalar case
3900  EVT VT=N0.getValueType();
3901  if (VT.isVector())
3902  return SDValue();
3903 
3904  // fold (add (mul a, b), c) -> (mad a, b, c)
3905  //
3906  if (N0.getOpcode() == ISD::MUL) {
3907  assert (VT.isInteger());
3908  // For integer:
3909  // Since integer multiply-add costs the same as integer multiply
3910  // but is more costly than integer add, do the fusion only when
3911  // the mul is only used in the add.
3912  if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
3913  !N0.getNode()->hasOneUse())
3914  return SDValue();
3915 
3916  // Do the folding
3917  return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
3918  N0.getOperand(0), N0.getOperand(1), N1);
3919  }
3920  else if (N0.getOpcode() == ISD::FMUL) {
3921  if (VT == MVT::f32 || VT == MVT::f64) {
3922  const auto *TLI = static_cast<const NVPTXTargetLowering *>(
3923  &DAG.getTargetLoweringInfo());
3924  if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
3925  return SDValue();
3926 
3927  // For floating point:
3928  // Do the fusion only when the mul has less than 5 uses and all
3929  // are add.
3930  // The heuristic is that if a use is not an add, then that use
3931  // cannot be fused into fma, therefore mul is still needed anyway.
3932  // If there are more than 4 uses, even if they are all add, fusing
3933  // them will increase register pressue.
3934  //
3935  int numUses = 0;
3936  int nonAddCount = 0;
3937  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
3938  UE = N0.getNode()->use_end();
3939  UI != UE; ++UI) {
3940  numUses++;
3941  SDNode *User = *UI;
3942  if (User->getOpcode() != ISD::FADD)
3943  ++nonAddCount;
3944  }
3945  if (numUses >= 5)
3946  return SDValue();
3947  if (nonAddCount) {
3948  int orderNo = N->getIROrder();
3949  int orderNo2 = N0.getNode()->getIROrder();
3950  // simple heuristics here for considering potential register
3951  // pressure, the logics here is that the differnce are used
3952  // to measure the distance between def and use, the longer distance
3953  // more likely cause register pressure.
3954  if (orderNo - orderNo2 < 500)
3955  return SDValue();
3956 
3957  // Now, check if at least one of the FMUL's operands is live beyond the node N,
3958  // which guarantees that the FMA will not increase register pressure at node N.
3959  bool opIsLive = false;
3960  const SDNode *left = N0.getOperand(0).getNode();
3961  const SDNode *right = N0.getOperand(1).getNode();
3962 
3963  if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
3964  opIsLive = true;
3965 
3966  if (!opIsLive)
3967  for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
3968  SDNode *User = *UI;
3969  int orderNo3 = User->getIROrder();
3970  if (orderNo3 > orderNo) {
3971  opIsLive = true;
3972  break;
3973  }
3974  }
3975 
3976  if (!opIsLive)
3977  for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
3978  SDNode *User = *UI;
3979  int orderNo3 = User->getIROrder();
3980  if (orderNo3 > orderNo) {
3981  opIsLive = true;
3982  break;
3983  }
3984  }
3985 
3986  if (!opIsLive)
3987  return SDValue();
3988  }
3989 
3990  return DAG.getNode(ISD::FMA, SDLoc(N), VT,
3991  N0.getOperand(0), N0.getOperand(1), N1);
3992  }
3993  }
3994 
3995  return SDValue();
3996 }
3997 
3998 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3999 ///
4002  const NVPTXSubtarget &Subtarget,
4003  CodeGenOpt::Level OptLevel) {
4004  SDValue N0 = N->getOperand(0);
4005  SDValue N1 = N->getOperand(1);
4006 
4007  // First try with the default operand order.
4008  if (SDValue Result =
4009  PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
4010  return Result;
4011 
4012  // If that didn't work, try again with the operands commuted.
4013  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
4014 }
4015 
4018  // The type legalizer turns a vector load of i8 values into a zextload to i16
4019  // registers, optionally ANY_EXTENDs it (if target type is integer),
4020  // and ANDs off the high 8 bits. Since we turn this load into a
4021  // target-specific DAG node, the DAG combiner fails to eliminate these AND
4022  // nodes. Do that here.
4023  SDValue Val = N->getOperand(0);
4024  SDValue Mask = N->getOperand(1);
4025 
4026  if (isa<ConstantSDNode>(Val)) {
4027  std::swap(Val, Mask);
4028  }
4029 
4030  SDValue AExt;
4031  // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
4032  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4033  AExt = Val;
4034  Val = Val->getOperand(0);
4035  }
4036 
4037  if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
4038  Val = Val->getOperand(0);
4039  }
4040 
4041  if (Val->getOpcode() == NVPTXISD::LoadV2 ||
4042  Val->getOpcode() == NVPTXISD::LoadV4) {
4044  if (!MaskCnst) {
4045  // Not an AND with a constant
4046  return SDValue();
4047  }
4048 
4049  uint64_t MaskVal = MaskCnst->getZExtValue();
4050  if (MaskVal != 0xff) {
4051  // Not an AND that chops off top 8 bits
4052  return SDValue();
4053  }
4054 
4055  MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
4056  if (!Mem) {
4057  // Not a MemSDNode?!?
4058  return SDValue();
4059  }
4060 
4061  EVT MemVT = Mem->getMemoryVT();
4062  if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
4063  // We only handle the i8 case
4064  return SDValue();
4065  }
4066 
4067  unsigned ExtType =
4068  cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
4069  getZExtValue();
4070  if (ExtType == ISD::SEXTLOAD) {
4071  // If for some reason the load is a sextload, the and is needed to zero
4072  // out the high 8 bits
4073  return SDValue();
4074  }
4075 
4076  bool AddTo = false;
4077  if (AExt.getNode() != nullptr) {
4078  // Re-insert the ext as a zext.
4079  Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4080  AExt.getValueType(), Val);
4081  AddTo = true;
4082  }
4083 
4084  // If we get here, the AND is unnecessary. Just replace it with the load
4085  DCI.CombineTo(N, Val, AddTo);
4086  }
4087 
4088  return SDValue();
4089 }
4090 
4093  // Currently this detects patterns for integer min and max and
4094  // lowers them to PTX-specific intrinsics that enable hardware
4095  // support.
4096 
4097  const SDValue Cond = N->getOperand(0);
4098  if (Cond.getOpcode() != ISD::SETCC) return SDValue();
4099 
4100  const SDValue LHS = Cond.getOperand(0);
4101  const SDValue RHS = Cond.getOperand(1);
4102  const SDValue True = N->getOperand(1);
4103  const SDValue False = N->getOperand(2);
4104  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
4105  return SDValue();
4106 
4107  const EVT VT = N->getValueType(0);
4108  if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
4109 
4110  const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
4111  SDValue Larger; // The larger of LHS and RHS when condition is true.
4112  switch (CC) {
4113  case ISD::SETULT:
4114  case ISD::SETULE:
4115  case ISD::SETLT:
4116  case ISD::SETLE:
4117  Larger = RHS;
4118  break;
4119 
4120  case ISD::SETGT:
4121  case ISD::SETGE:
4122  case ISD::SETUGT:
4123  case ISD::SETUGE:
4124  Larger = LHS;
4125  break;
4126 
4127  default:
4128  return SDValue();
4129  }
4130  const bool IsMax = (Larger == True);
4131  const bool IsSigned = ISD::isSignedIntSetCC(CC);
4132 
4133  unsigned IntrinsicId;
4134  if (VT == MVT::i32) {
4135  if (IsSigned)
4136  IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
4137  else
4138  IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
4139  } else {
4140  assert(VT == MVT::i64);
4141  if (IsSigned)
4142  IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
4143  else
4144  IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
4145  }
4146 
4147  SDLoc DL(N);
4148  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
4149  DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
4150 }
4151 
4154  CodeGenOpt::Level OptLevel) {
4155  assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
4156 
4157  // Don't do anything at less than -O2.
4158  if (OptLevel < CodeGenOpt::Default)
4159  return SDValue();
4160 
4161  SelectionDAG &DAG = DCI.DAG;
4162  SDLoc DL(N);
4163  EVT VT = N->getValueType(0);
4164  bool IsSigned = N->getOpcode() == ISD::SREM;
4165  unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
4166 
4167  const SDValue &Num = N->getOperand(0);
4168  const SDValue &Den = N->getOperand(1);
4169 
4170  for (const SDNode *U : Num->uses()) {
4171  if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
4172  U->getOperand(1) == Den) {
4173  // Num % Den -> Num - (Num / Den) * Den
4174  return DAG.getNode(ISD::SUB, DL, VT, Num,
4175  DAG.getNode(ISD::MUL, DL, VT,
4176  DAG.getNode(DivOpc, DL, VT, Num, Den),
4177  Den));
4178  }
4179  }
4180  return SDValue();
4181 }
4182 
4184  Signed = 0,
4187 };
4188 
4189 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
4190 /// that can be demoted to \p OptSize bits without loss of information. The
4191 /// signedness of the operand, if determinable, is placed in \p S.
4193  unsigned OptSize,
4194  OperandSignedness &S) {
4195  S = Unknown;
4196 
4197  if (Op.getOpcode() == ISD::SIGN_EXTEND ||
4199  EVT OrigVT = Op.getOperand(0).getValueType();
4200  if (OrigVT.getSizeInBits() <= OptSize) {
4201  S = Signed;
4202  return true;
4203  }
4204  } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
4205  EVT OrigVT = Op.getOperand(0).getValueType();
4206  if (OrigVT.getSizeInBits() <= OptSize) {
4207  S = Unsigned;
4208  return true;
4209  }
4210  }
4211 
4212  return false;
4213 }
4214 
4215 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
4216 /// be demoted to \p OptSize bits without loss of information. If the operands
4217 /// contain a constant, it should appear as the RHS operand. The signedness of
4218 /// the operands is placed in \p IsSigned.
4220  unsigned OptSize,
4221  bool &IsSigned) {
4222  OperandSignedness LHSSign;
4223 
4224  // The LHS operand must be a demotable op
4225  if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
4226  return false;
4227 
4228  // We should have been able to determine the signedness from the LHS
4229  if (LHSSign == Unknown)
4230  return false;
4231 
4232  IsSigned = (LHSSign == Signed);
4233 
4234  // The RHS can be a demotable op or a constant
4235  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
4236  const APInt &Val = CI->getAPIntValue();
4237  if (LHSSign == Unsigned) {
4238  return Val.isIntN(OptSize);
4239  } else {
4240  return Val.isSignedIntN(OptSize);
4241  }
4242  } else {
4243  OperandSignedness RHSSign;
4244  if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
4245  return false;
4246 
4247  return LHSSign == RHSSign;
4248  }
4249 }
4250 
4251 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
4252 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
4253 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
4254 /// amount.
4257  EVT MulType = N->getValueType(0);
4258  if (MulType != MVT::i32 && MulType != MVT::i64) {
4259  return SDValue();
4260  }
4261 
4262  SDLoc DL(N);
4263  unsigned OptSize = MulType.getSizeInBits() >> 1;
4264  SDValue LHS = N->getOperand(0);
4265  SDValue RHS = N->getOperand(1);
4266 
4267  // Canonicalize the multiply so the constant (if any) is on the right
4268  if (N->getOpcode() == ISD::MUL) {
4269  if (isa<ConstantSDNode>(LHS)) {
4270  std::swap(LHS, RHS);
4271  }
4272  }
4273 
4274  // If we have a SHL, determine the actual multiply amount
4275  if (N->getOpcode() == ISD::SHL) {
4276  ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
4277  if (!ShlRHS) {
4278  return SDValue();
4279  }
4280 
4281  APInt ShiftAmt = ShlRHS->getAPIntValue();
4282  unsigned BitWidth = MulType.getSizeInBits();
4283  if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
4284  APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
4285  RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
4286  } else {
4287  return SDValue();
4288  }
4289  }
4290 
4291  bool Signed;
4292  // Verify that our operands are demotable
4293  if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
4294  return SDValue();
4295  }
4296 
4297  EVT DemotedVT;
4298  if (MulType == MVT::i32) {
4299  DemotedVT = MVT::i16;
4300  } else {
4301  DemotedVT = MVT::i32;
4302  }
4303 
4304  // Truncate the operands to the correct size. Note that these are just for
4305  // type consistency and will (likely) be eliminated in later phases.
4306  SDValue TruncLHS =
4307  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
4308  SDValue TruncRHS =
4309  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
4310 
4311  unsigned Opc;
4312  if (Signed) {
4314  } else {
4316  }
4317 
4318  return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
4319 }
4320 
4321 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
4324  CodeGenOpt::Level OptLevel) {
4325  if (OptLevel > 0) {
4326  // Try mul.wide combining at OptLevel > 0
4327  if (SDValue Ret = TryMULWIDECombine(N, DCI))
4328  return Ret;
4329  }
4330 
4331  return SDValue();
4332 }
4333 
4334 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
4337  CodeGenOpt::Level OptLevel) {
4338  if (OptLevel > 0) {
4339  // Try mul.wide combining at OptLevel > 0
4340  if (SDValue Ret = TryMULWIDECombine(N, DCI))
4341  return Ret;
4342  }
4343 
4344  return SDValue();
4345 }
4346 
4347 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
4348  DAGCombinerInfo &DCI) const {
4350  switch (N->getOpcode()) {
4351  default: break;
4352  case ISD::ADD:
4353  case ISD::FADD:
4354  return PerformADDCombine(N, DCI, STI, OptLevel);
4355  case ISD::MUL:
4356  return PerformMULCombine(N, DCI, OptLevel);
4357  case ISD::SHL:
4358  return PerformSHLCombine(N, DCI, OptLevel);
4359  case ISD::AND:
4360  return PerformANDCombine(N, DCI);
4361  case ISD::SELECT:
4362  return PerformSELECTCombine(N, DCI);
4363  case ISD::UREM:
4364  case ISD::SREM:
4365  return PerformREMCombine(N, DCI, OptLevel);
4366  }
4367  return SDValue();
4368 }
4369 
4370 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
4373  EVT ResVT = N->getValueType(0);
4374  SDLoc DL(N);
4375 
4376  assert(ResVT.isVector() && "Vector load must have vector type");
4377 
4378  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
4379  // legal. We can (and should) split that into 2 loads of <2 x double> here
4380  // but I'm leaving that as a TODO for now.
4381  assert(ResVT.isSimple() && "Can only handle simple types");
4382  switch (ResVT.getSimpleVT().SimpleTy) {
4383  default:
4384  return;
4385  case MVT::v2i8:
4386  case MVT::v2i16:
4387  case MVT::v2i32:
4388  case MVT::v2i64:
4389  case MVT::v2f32:
4390  case MVT::v2f64:
4391  case MVT::v4i8:
4392  case MVT::v4i16:
4393  case MVT::v4i32:
4394  case MVT::v4f32:
4395  // This is a "native" vector type
4396  break;
4397  }
4398 
4399  LoadSDNode *LD = cast<LoadSDNode>(N);
4400 
4401  unsigned Align = LD->getAlignment();
4402  auto &TD = DAG.getDataLayout();
4403  unsigned PrefAlign =
4404  TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
4405  if (Align < PrefAlign) {
4406  // This load is not sufficiently aligned, so bail out and let this vector
4407  // load be scalarized. Note that we may still be able to emit smaller
4408  // vector loads. For example, if we are loading a <4 x float> with an
4409  // alignment of 8, this check will fail but the legalizer will try again
4410  // with 2 x <2 x float>, which will succeed with an alignment of 8.
4411  return;
4412  }
4413 
4414  EVT EltVT = ResVT.getVectorElementType();
4415  unsigned NumElts = ResVT.getVectorNumElements();
4416 
4417  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
4418  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4419  // loaded type to i16 and propagate the "real" type as the memory type.
4420  bool NeedTrunc = false;
4421  if (EltVT.getSizeInBits() < 16) {
4422  EltVT = MVT::i16;
4423  NeedTrunc = true;
4424  }
4425 
4426  unsigned Opcode = 0;
4427  SDVTList LdResVTs;
4428 
4429  switch (NumElts) {
4430  default:
4431  return;
4432  case 2:
4433  Opcode = NVPTXISD::LoadV2;
4434  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4435  break;
4436  case 4: {
4437  Opcode = NVPTXISD::LoadV4;
4438  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4439  LdResVTs = DAG.getVTList(ListVTs);
4440  break;
4441  }
4442  }
4443 
4444  // Copy regular operands
4445  SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
4446 
4447  // The select routine does not have access to the LoadSDNode instance, so
4448  // pass along the extension information
4449  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
4450 
4451  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4452  LD->getMemoryVT(),
4453  LD->getMemOperand());
4454 
4455  SmallVector<SDValue, 4> ScalarRes;
4456 
4457  for (unsigned i = 0; i < NumElts; ++i) {
4458  SDValue Res = NewLD.getValue(i);
4459  if (NeedTrunc)
4460  Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4461  ScalarRes.push_back(Res);
4462  }
4463 
4464  SDValue LoadChain = NewLD.getValue(NumElts);
4465 
4466  SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
4467 
4468  Results.push_back(BuildVec);
4469  Results.push_back(LoadChain);
4470 }
4471 
4474  SDValue Chain = N->getOperand(0);
4475  SDValue Intrin = N->getOperand(1);
4476  SDLoc DL(N);
4477 
4478  // Get the intrinsic ID
4479  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
4480  switch (IntrinNo) {
4481  default:
4482  return;
4483  case Intrinsic::nvvm_ldg_global_i:
4484  case Intrinsic::nvvm_ldg_global_f:
4485  case Intrinsic::nvvm_ldg_global_p:
4486  case Intrinsic::nvvm_ldu_global_i:
4487  case Intrinsic::nvvm_ldu_global_f:
4488  case Intrinsic::nvvm_ldu_global_p: {
4489  EVT ResVT = N->getValueType(0);
4490 
4491  if (ResVT.isVector()) {
4492  // Vector LDG/LDU
4493 
4494  unsigned NumElts = ResVT.getVectorNumElements();
4495  EVT EltVT = ResVT.getVectorElementType();
4496 
4497  // Since LDU/LDG are target nodes, we cannot rely on DAG type
4498  // legalization.
4499  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4500  // loaded type to i16 and propagate the "real" type as the memory type.
4501  bool NeedTrunc = false;
4502  if (EltVT.getSizeInBits() < 16) {
4503  EltVT = MVT::i16;
4504  NeedTrunc = true;
4505  }
4506 
4507  unsigned Opcode = 0;
4508  SDVTList LdResVTs;
4509 
4510  switch (NumElts) {
4511  default:
4512  return;
4513  case 2:
4514  switch (IntrinNo) {
4515  default:
4516  return;
4517  case Intrinsic::nvvm_ldg_global_i:
4518  case Intrinsic::nvvm_ldg_global_f:
4519  case Intrinsic::nvvm_ldg_global_p:
4520  Opcode = NVPTXISD::LDGV2;
4521  break;
4522  case Intrinsic::nvvm_ldu_global_i:
4523  case Intrinsic::nvvm_ldu_global_f:
4524  case Intrinsic::nvvm_ldu_global_p:
4525  Opcode = NVPTXISD::LDUV2;
4526  break;
4527  }
4528  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4529  break;
4530  case 4: {
4531  switch (IntrinNo) {
4532  default:
4533  return;
4534  case Intrinsic::nvvm_ldg_global_i:
4535  case Intrinsic::nvvm_ldg_global_f:
4536  case Intrinsic::nvvm_ldg_global_p:
4537  Opcode = NVPTXISD::LDGV4;
4538  break;
4539  case Intrinsic::nvvm_ldu_global_i:
4540  case Intrinsic::nvvm_ldu_global_f:
4541  case Intrinsic::nvvm_ldu_global_p:
4542  Opcode = NVPTXISD::LDUV4;
4543  break;
4544  }
4545  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4546  LdResVTs = DAG.getVTList(ListVTs);
4547  break;
4548  }
4549  }
4550 
4551  SmallVector<SDValue, 8> OtherOps;
4552 
4553  // Copy regular operands
4554 
4555  OtherOps.push_back(Chain); // Chain
4556  // Skip operand 1 (intrinsic ID)
4557  // Others
4558  OtherOps.append(N->op_begin() + 2, N->op_end());
4559 
4560  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4561 
4562  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4563  MemSD->getMemoryVT(),
4564  MemSD->getMemOperand());
4565 
4566  SmallVector<SDValue, 4> ScalarRes;
4567 
4568  for (unsigned i = 0; i < NumElts; ++i) {
4569  SDValue Res = NewLD.getValue(i);
4570  if (NeedTrunc)
4571  Res =
4572  DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4573  ScalarRes.push_back(Res);
4574  }
4575 
4576  SDValue LoadChain = NewLD.getValue(NumElts);
4577 
4578  SDValue BuildVec =
4579  DAG.getBuildVector(ResVT, DL, ScalarRes);
4580 
4581  Results.push_back(BuildVec);
4582  Results.push_back(LoadChain);
4583  } else {
4584  // i8 LDG/LDU
4585  assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
4586  "Custom handling of non-i8 ldu/ldg?");
4587 
4588  // Just copy all operands as-is
4589  SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
4590 
4591  // Force output to i16
4592  SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
4593 
4594  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4595 
4596  // We make sure the memory type is i8, which will be used during isel
4597  // to select the proper instruction.
4598  SDValue NewLD =
4599  DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
4600  MVT::i8, MemSD->getMemOperand());
4601 
4602  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
4603  NewLD.getValue(0)));
4604  Results.push_back(NewLD.getValue(1));
4605  }
4606  }
4607  }
4608 }
4609 
4610 void NVPTXTargetLowering::ReplaceNodeResults(
4612  switch (N->getOpcode()) {
4613  default:
4614  report_fatal_error("Unhandled custom legalization");
4615  case ISD::LOAD:
4616  ReplaceLoadVector(N, DAG, Results);
4617  return;
4619  ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
4620  return;
4621  }
4622 }
4623 
4624 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
4625 void NVPTXSection::anchor() {}
4626 
4628  delete static_cast<NVPTXSection *>(TextSection);
4629  delete static_cast<NVPTXSection *>(DataSection);
4630  delete static_cast<NVPTXSection *>(BSSSection);
4631  delete static_cast<NVPTXSection *>(ReadOnlySection);
4632 
4633  delete static_cast<NVPTXSection *>(StaticCtorSection);
4634  delete static_cast<NVPTXSection *>(StaticDtorSection);
4635  delete static_cast<NVPTXSection *>(LSDASection);
4636  delete static_cast<NVPTXSection *>(EHFrameSection);
4637  delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
4638  delete static_cast<NVPTXSection *>(DwarfInfoSection);
4639  delete static_cast<NVPTXSection *>(DwarfLineSection);
4640  delete static_cast<NVPTXSection *>(DwarfFrameSection);
4641  delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
4642  delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
4643  delete static_cast<NVPTXSection *>(DwarfStrSection);
4644  delete static_cast<NVPTXSection *>(DwarfLocSection);
4645  delete static_cast<NVPTXSection *>(DwarfARangesSection);
4646  delete static_cast<NVPTXSection *>(DwarfRangesSection);
4647  delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
4648 }
4649 
4651  const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
4652  return getDataSection();
4653 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
Instances of this class represent a uniqued identifier for a section in the current translation unit...
Definition: MCSection.h:40
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
Flags getFlags() const
Return the raw flags of the source value,.
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
LLVM Argument representation.
Definition: Argument.h:34
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:804
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:724
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
GreatestCommonDivisor64 - Return the greatest common divisor of the two values using Euclid's algorit...
Definition: MathExtras.h:538
bool getAlign(const Function &F, unsigned index, unsigned &align)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
size_t i
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
MCSection * DwarfPubTypesSection
bool hasOneUse() const
Return true if there is exactly one use of this node.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool hasROT32() const
const TargetMachine & getTargetMachine() const
static cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it"" 1: do it 2: do it aggressively"), cl::init(2))
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
MCSection * TextSection
Section directive for standard text.
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
MCSection * StaticCtorSection
This section contains the static constructor pointer list.
This class represents a function call, abstracting a target machine's calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:655
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl< ISD::OutputArg > &, unsigned retAlignment, const ImmutableCallSite *CS) const
static bool IsMulWideOperandDemotable(SDValue Op, unsigned OptSize, OperandSignedness &S)
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptS...
static bool isImageOrSamplerVal(const Value *arg, const Module *context)
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:777
Function Alias Analysis Results
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
unsigned getNumOperands() const
Return the number of values used by this operation.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.cpp:238
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
MCSection * getDataSection() const
unsigned getNumOperands() const
13: Structures
Definition: Type.h:72
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned Num) const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:234
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:387
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:994
const SDValue & getBasePtr() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:418
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
MCSection * SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static SDValue PerformREMCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
static unsigned int uniqueCallSite
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits...
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
bool hasROT64() const
struct fuzzer::@269 Flags
Shift and rotation operations.
Definition: ISDOpcodes.h:344
static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it...
Class to represent struct types.
Definition: DerivedTypes.h:199
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
The memory access is dereferenceable (i.e., doesn't trap).
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:377
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SmallVector< ISD::InputArg, 32 > Ins
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
bool isKernelFunction(const Function &F)
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned getIROrder() const
Return the node ordering.
const MCSection * DwarfDebugInlineSection
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
unsigned int getSmVersion() const
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MCSection * StaticDtorSection
This section contains the static destructor pointer list.
SmallVector< ISD::OutputArg, 32 > Outs
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
const APInt & getAPIntValue() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
void setIROrder(unsigned Order)
Set the node ordering.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:875
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
MCSection * DataSection
Section directive for standard data.
Class to represent pointers.
Definition: DerivedTypes.h:443
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
This class is used to represent ISD::STORE nodes.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
0: type with no size
Definition: Type.h:56
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:274
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool sge(const APInt &RHS) const
Signed greather or equal comparison.
Definition: APInt.h:1135
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const NVPTXTargetLowering * getTargetLowering() const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static unsigned getOpcForSurfaceInstr(unsigned Intrinsic)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
This class provides iterator support for SDUse operands that use a specific SDNode.
uint32_t Offset
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
ManagedStringPool * getManagedStrPool() const
SectionKind - This is a simple POD value that classifies the properties of a section.
Definition: SectionKind.h:23
static mvt_range vector_valuetypes()
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const SDValue & getValue() const
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
std::vector< ArgListEntry > ArgListTy
static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool IsPTXVectorType(MVT VT)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:378
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
MCSection * DwarfAbbrevSection
const MachinePointerInfo & getPointerInfo() const
static SDValue TryMULWIDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces...
MCSection * EHFrameSection
EH frame section.
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override
Return the preferred vector type legalization action.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:757
InstrTy * getInstruction() const
Definition: CallSite.h:93
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.cpp:533
std::string * getManagedString(const char *S)
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
NVPTXTargetLowering(const NVPTXTargetMachine &TM, const NVPTXSubtarget &STI)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
MCSection * DwarfRangesSection
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
MCSection * DwarfARangesSection
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:715
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class to represent vector types.
Definition: DerivedTypes.h:369
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isCast() const
Return true if this is a convert constant expression.
Definition: Constants.cpp:1060
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:383
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
iterator_range< use_iterator > uses()
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
op_iterator op_begin() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const
Return the preferred vector type legalization action.
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
NVPTXTargetMachine.
SmallVector< SDValue, 32 > OutVals
MCSection * LSDASection
If exception handling is supported by the target, this is the section the Language Specific Data Area...
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:247
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
Represents a section in PTX PTX does not have sections.
Definition: NVPTXSection.h:25
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
The memory access always returns the same value (or traps).
op_iterator op_end() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
OperandSignedness
const unsigned Kind
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:178
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
FunTy * getCalledFunction() const
getCalledFunction - Return the function being called if this is a direct call, otherwise return null ...
Definition: CallSite.h:110
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
Primary interface to the complete machine description for the target machine.
MCSection * BSSSection
Section that is default initialized to zero.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
MCSection * ReadOnlySection
Section that is readonly and can contain arbitrary initialized data.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
MCSection * DwarfMacinfoSection
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Conversion operators.
Definition: ISDOpcodes.h:397
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
unsigned getAlignment() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
static unsigned getOpcForTextureInstr(unsigned Intrinsic)
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
This file describes how to lower LLVM code to machine code.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
const NVPTXRegisterInfo * getRegisterInfo() const override
uint64_t getZExtValue() const
BRIND - Indirect branch.
Definition: ISDOpcodes.h:556
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
const NVPTXTargetMachine * nvTM