LLVM  4.0.0
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
20 #include "llvm/Support/Debug.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
30  "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31  cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32  " IEEE Compliant F32 div.rnd if available."),
33  cl::init(2));
34 
35 static cl::opt<bool>
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37  cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38  cl::init(true));
39 
40 static cl::opt<bool>
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42  cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43  cl::init(false));
44 
45 
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
49  llvm::CodeGenOpt::Level OptLevel) {
50  return new NVPTXDAGToDAGISel(TM, OptLevel);
51 }
52 
54  CodeGenOpt::Level OptLevel)
55  : SelectionDAGISel(tm, OptLevel), TM(tm) {
56  doMulWide = (OptLevel > 0);
57 }
58 
60  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
62 }
63 
64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65  if (UsePrecDivF32.getNumOccurrences() > 0) {
66  // If nvptx-prec-div32=N is used on the command-line, always honor it
67  return UsePrecDivF32;
68  } else {
69  // Otherwise, use div.approx if fast math is enabled
70  if (TM.Options.UnsafeFPMath)
71  return 0;
72  else
73  return 2;
74  }
75 }
76 
77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79  // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80  return UsePrecSqrtF32;
81  } else {
82  // Otherwise, use sqrt.approx if fast math is enabled
83  return !TM.Options.UnsafeFPMath;
84  }
85 }
86 
87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88  if (FtzEnabled.getNumOccurrences() > 0) {
89  // If nvptx-f32ftz is used on the command-line, always honor it
90  return FtzEnabled;
91  } else {
92  const Function *F = MF->getFunction();
93  // Otherwise, check for an nvptx-f32ftz attribute on the function
94  if (F->hasFnAttribute("nvptx-f32ftz"))
95  return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96  else
97  return false;
98  }
99 }
100 
101 bool NVPTXDAGToDAGISel::allowFMA() const {
103  return TL->allowFMA(*MF, OptLevel);
104 }
105 
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
108 void NVPTXDAGToDAGISel::Select(SDNode *N) {
109 
110  if (N->isMachineOpcode()) {
111  N->setNodeId(-1);
112  return; // Already selected.
113  }
114 
115  switch (N->getOpcode()) {
116  case ISD::LOAD:
117  if (tryLoad(N))
118  return;
119  break;
120  case ISD::STORE:
121  if (tryStore(N))
122  return;
123  break;
124  case NVPTXISD::LoadV2:
125  case NVPTXISD::LoadV4:
126  if (tryLoadVector(N))
127  return;
128  break;
129  case NVPTXISD::LDGV2:
130  case NVPTXISD::LDGV4:
131  case NVPTXISD::LDUV2:
132  case NVPTXISD::LDUV4:
133  if (tryLDGLDU(N))
134  return;
135  break;
136  case NVPTXISD::StoreV2:
137  case NVPTXISD::StoreV4:
138  if (tryStoreVector(N))
139  return;
140  break;
141  case NVPTXISD::LoadParam:
144  if (tryLoadParam(N))
145  return;
146  break;
150  if (tryStoreRetval(N))
151  return;
152  break;
158  if (tryStoreParam(N))
159  return;
160  break;
162  if (tryIntrinsicNoChain(N))
163  return;
164  break;
166  if (tryIntrinsicChain(N))
167  return;
168  break;
337  if (tryTextureIntrinsic(N))
338  return;
339  break;
505  if (trySurfaceIntrinsic(N))
506  return;
507  break;
508  case ISD::AND:
509  case ISD::SRA:
510  case ISD::SRL:
511  // Try to select BFE
512  if (tryBFE(N))
513  return;
514  break;
515  case ISD::ADDRSPACECAST:
516  SelectAddrSpaceCast(N);
517  return;
518  default:
519  break;
520  }
521  SelectCode(N);
522 }
523 
524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
525  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
526  switch (IID) {
527  default:
528  return false;
529  case Intrinsic::nvvm_ldg_global_f:
530  case Intrinsic::nvvm_ldg_global_i:
531  case Intrinsic::nvvm_ldg_global_p:
532  case Intrinsic::nvvm_ldu_global_f:
533  case Intrinsic::nvvm_ldu_global_i:
534  case Intrinsic::nvvm_ldu_global_p:
535  return tryLDGLDU(N);
536  }
537 }
538 
539 static unsigned int getCodeAddrSpace(MemSDNode *N) {
540  const Value *Src = N->getMemOperand()->getValue();
541 
542  if (!Src)
544 
545  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
546  switch (PT->getAddressSpace()) {
553  default: break;
554  }
555  }
557 }
558 
559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
560  unsigned CodeAddrSpace, MachineFunction *F) {
561  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
562  // space.
563  //
564  // We have two ways of identifying invariant loads: Loads may be explicitly
565  // marked as invariant, or we may infer them to be invariant.
566  //
567  // We currently infer invariance only for kernel function pointer params that
568  // are noalias (i.e. __restrict) and never written to.
569  //
570  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
571  // not during the SelectionDAG phase).
572  //
573  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
574  // explicitly invariant loads because these are how clang tells us to use ldg
575  // when the user uses a builtin.
576  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
577  return false;
578 
579  if (N->isInvariant())
580  return true;
581 
582  // Load wasn't explicitly invariant. Attempt to infer invariance.
583  if (!isKernelFunction(*F->getFunction()))
584  return false;
585 
586  // We use GetUnderlyingObjects() here instead of
587  // GetUnderlyingObject() mainly because the former looks through phi
588  // nodes while the latter does not. We need to look through phi
589  // nodes to handle pointer induction variables.
591  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
592  Objs, F->getDataLayout());
593  for (Value *Obj : Objs) {
594  auto *A = dyn_cast<const Argument>(Obj);
595  if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
596  }
597 
598  return true;
599 }
600 
601 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
602  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
603  switch (IID) {
604  default:
605  return false;
606  case Intrinsic::nvvm_texsurf_handle_internal:
607  SelectTexSurfHandle(N);
608  return true;
609  }
610 }
611 
612 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
613  // Op 0 is the intrinsic ID
614  SDValue Wrapper = N->getOperand(1);
615  SDValue GlobalVal = Wrapper.getOperand(0);
616  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
617  MVT::i64, GlobalVal));
618 }
619 
620 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
621  SDValue Src = N->getOperand(0);
622  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
623  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
624  unsigned DstAddrSpace = CastN->getDestAddressSpace();
625 
626  assert(SrcAddrSpace != DstAddrSpace &&
627  "addrspacecast must be between different address spaces");
628 
629  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
630  // Specific to generic
631  unsigned Opc;
632  switch (SrcAddrSpace) {
633  default: report_fatal_error("Bad address space in addrspacecast");
635  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
636  break;
638  Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
639  break;
640  case ADDRESS_SPACE_CONST:
641  Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
642  break;
643  case ADDRESS_SPACE_LOCAL:
644  Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
645  break;
646  }
648  Src));
649  return;
650  } else {
651  // Generic to specific
652  if (SrcAddrSpace != 0)
653  report_fatal_error("Cannot cast between two non-generic address spaces");
654  unsigned Opc;
655  switch (DstAddrSpace) {
656  default: report_fatal_error("Bad address space in addrspacecast");
658  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
659  : NVPTX::cvta_to_global_yes;
660  break;
662  Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
663  : NVPTX::cvta_to_shared_yes;
664  break;
665  case ADDRESS_SPACE_CONST:
666  Opc =
667  TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
668  break;
669  case ADDRESS_SPACE_LOCAL:
670  Opc =
671  TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
672  break;
673  case ADDRESS_SPACE_PARAM:
674  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
675  : NVPTX::nvvm_ptr_gen_to_param;
676  break;
677  }
679  Src));
680  return;
681  }
682 }
683 
684 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
685  SDLoc dl(N);
686  LoadSDNode *LD = cast<LoadSDNode>(N);
687  EVT LoadedVT = LD->getMemoryVT();
688  SDNode *NVPTXLD = nullptr;
689 
690  // do not support pre/post inc/dec
691  if (LD->isIndexed())
692  return false;
693 
694  if (!LoadedVT.isSimple())
695  return false;
696 
697  // Address Space Setting
698  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
699 
700  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
701  return tryLDGLDU(N);
702  }
703 
704  // Volatile Setting
705  // - .volatile is only availalble for .global and .shared
706  bool isVolatile = LD->isVolatile();
707  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
708  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
709  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
710  isVolatile = false;
711 
712  // Vector Setting
713  MVT SimpleVT = LoadedVT.getSimpleVT();
714  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
715  if (SimpleVT.isVector()) {
716  unsigned num = SimpleVT.getVectorNumElements();
717  if (num == 2)
718  vecType = NVPTX::PTXLdStInstCode::V2;
719  else if (num == 4)
720  vecType = NVPTX::PTXLdStInstCode::V4;
721  else
722  return false;
723  }
724 
725  // Type Setting: fromType + fromTypeWidth
726  //
727  // Sign : ISD::SEXTLOAD
728  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
729  // type is integer
730  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
731  MVT ScalarVT = SimpleVT.getScalarType();
732  // Read at least 8 bits (predicates are stored as 8-bit values)
733  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
734  unsigned int fromType;
735  if ((LD->getExtensionType() == ISD::SEXTLOAD))
737  else if (ScalarVT.isFloatingPoint())
739  else
741 
742  // Create the machine instruction DAG
743  SDValue Chain = N->getOperand(0);
744  SDValue N1 = N->getOperand(1);
745  SDValue Addr;
746  SDValue Offset, Base;
747  unsigned Opcode;
749 
750  if (SelectDirectAddr(N1, Addr)) {
751  switch (TargetVT) {
752  case MVT::i8:
753  Opcode = NVPTX::LD_i8_avar;
754  break;
755  case MVT::i16:
756  Opcode = NVPTX::LD_i16_avar;
757  break;
758  case MVT::i32:
759  Opcode = NVPTX::LD_i32_avar;
760  break;
761  case MVT::i64:
762  Opcode = NVPTX::LD_i64_avar;
763  break;
764  case MVT::f32:
765  Opcode = NVPTX::LD_f32_avar;
766  break;
767  case MVT::f64:
768  Opcode = NVPTX::LD_f64_avar;
769  break;
770  default:
771  return false;
772  }
773  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
774  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
775  getI32Imm(fromTypeWidth, dl), Addr, Chain };
776  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
777  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
778  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
779  switch (TargetVT) {
780  case MVT::i8:
781  Opcode = NVPTX::LD_i8_asi;
782  break;
783  case MVT::i16:
784  Opcode = NVPTX::LD_i16_asi;
785  break;
786  case MVT::i32:
787  Opcode = NVPTX::LD_i32_asi;
788  break;
789  case MVT::i64:
790  Opcode = NVPTX::LD_i64_asi;
791  break;
792  case MVT::f32:
793  Opcode = NVPTX::LD_f32_asi;
794  break;
795  case MVT::f64:
796  Opcode = NVPTX::LD_f64_asi;
797  break;
798  default:
799  return false;
800  }
801  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
802  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
803  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
804  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
805  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
806  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
807  if (TM.is64Bit()) {
808  switch (TargetVT) {
809  case MVT::i8:
810  Opcode = NVPTX::LD_i8_ari_64;
811  break;
812  case MVT::i16:
813  Opcode = NVPTX::LD_i16_ari_64;
814  break;
815  case MVT::i32:
816  Opcode = NVPTX::LD_i32_ari_64;
817  break;
818  case MVT::i64:
819  Opcode = NVPTX::LD_i64_ari_64;
820  break;
821  case MVT::f32:
822  Opcode = NVPTX::LD_f32_ari_64;
823  break;
824  case MVT::f64:
825  Opcode = NVPTX::LD_f64_ari_64;
826  break;
827  default:
828  return false;
829  }
830  } else {
831  switch (TargetVT) {
832  case MVT::i8:
833  Opcode = NVPTX::LD_i8_ari;
834  break;
835  case MVT::i16:
836  Opcode = NVPTX::LD_i16_ari;
837  break;
838  case MVT::i32:
839  Opcode = NVPTX::LD_i32_ari;
840  break;
841  case MVT::i64:
842  Opcode = NVPTX::LD_i64_ari;
843  break;
844  case MVT::f32:
845  Opcode = NVPTX::LD_f32_ari;
846  break;
847  case MVT::f64:
848  Opcode = NVPTX::LD_f64_ari;
849  break;
850  default:
851  return false;
852  }
853  }
854  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
855  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
856  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
857  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
858  } else {
859  if (TM.is64Bit()) {
860  switch (TargetVT) {
861  case MVT::i8:
862  Opcode = NVPTX::LD_i8_areg_64;
863  break;
864  case MVT::i16:
865  Opcode = NVPTX::LD_i16_areg_64;
866  break;
867  case MVT::i32:
868  Opcode = NVPTX::LD_i32_areg_64;
869  break;
870  case MVT::i64:
871  Opcode = NVPTX::LD_i64_areg_64;
872  break;
873  case MVT::f32:
874  Opcode = NVPTX::LD_f32_areg_64;
875  break;
876  case MVT::f64:
877  Opcode = NVPTX::LD_f64_areg_64;
878  break;
879  default:
880  return false;
881  }
882  } else {
883  switch (TargetVT) {
884  case MVT::i8:
885  Opcode = NVPTX::LD_i8_areg;
886  break;
887  case MVT::i16:
888  Opcode = NVPTX::LD_i16_areg;
889  break;
890  case MVT::i32:
891  Opcode = NVPTX::LD_i32_areg;
892  break;
893  case MVT::i64:
894  Opcode = NVPTX::LD_i64_areg;
895  break;
896  case MVT::f32:
897  Opcode = NVPTX::LD_f32_areg;
898  break;
899  case MVT::f64:
900  Opcode = NVPTX::LD_f64_areg;
901  break;
902  default:
903  return false;
904  }
905  }
906  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
907  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
908  getI32Imm(fromTypeWidth, dl), N1, Chain };
909  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
910  }
911 
912  if (!NVPTXLD)
913  return false;
914 
916  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
917  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
918 
919  ReplaceNode(N, NVPTXLD);
920  return true;
921 }
922 
923 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
924 
925  SDValue Chain = N->getOperand(0);
926  SDValue Op1 = N->getOperand(1);
927  SDValue Addr, Offset, Base;
928  unsigned Opcode;
929  SDLoc DL(N);
930  SDNode *LD;
931  MemSDNode *MemSD = cast<MemSDNode>(N);
932  EVT LoadedVT = MemSD->getMemoryVT();
933 
934  if (!LoadedVT.isSimple())
935  return false;
936 
937  // Address Space Setting
938  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
939 
940  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
941  return tryLDGLDU(N);
942  }
943 
944  // Volatile Setting
945  // - .volatile is only availalble for .global and .shared
946  bool IsVolatile = MemSD->isVolatile();
947  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
948  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
949  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
950  IsVolatile = false;
951 
952  // Vector Setting
953  MVT SimpleVT = LoadedVT.getSimpleVT();
954 
955  // Type Setting: fromType + fromTypeWidth
956  //
957  // Sign : ISD::SEXTLOAD
958  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
959  // type is integer
960  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
961  MVT ScalarVT = SimpleVT.getScalarType();
962  // Read at least 8 bits (predicates are stored as 8-bit values)
963  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
964  unsigned int FromType;
965  // The last operand holds the original LoadSDNode::getExtensionType() value
966  unsigned ExtensionType = cast<ConstantSDNode>(
967  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
968  if (ExtensionType == ISD::SEXTLOAD)
970  else if (ScalarVT.isFloatingPoint())
972  else
974 
975  unsigned VecType;
976 
977  switch (N->getOpcode()) {
978  case NVPTXISD::LoadV2:
979  VecType = NVPTX::PTXLdStInstCode::V2;
980  break;
981  case NVPTXISD::LoadV4:
982  VecType = NVPTX::PTXLdStInstCode::V4;
983  break;
984  default:
985  return false;
986  }
987 
988  EVT EltVT = N->getValueType(0);
989 
990  if (SelectDirectAddr(Op1, Addr)) {
991  switch (N->getOpcode()) {
992  default:
993  return false;
994  case NVPTXISD::LoadV2:
995  switch (EltVT.getSimpleVT().SimpleTy) {
996  default:
997  return false;
998  case MVT::i8:
999  Opcode = NVPTX::LDV_i8_v2_avar;
1000  break;
1001  case MVT::i16:
1002  Opcode = NVPTX::LDV_i16_v2_avar;
1003  break;
1004  case MVT::i32:
1005  Opcode = NVPTX::LDV_i32_v2_avar;
1006  break;
1007  case MVT::i64:
1008  Opcode = NVPTX::LDV_i64_v2_avar;
1009  break;
1010  case MVT::f32:
1011  Opcode = NVPTX::LDV_f32_v2_avar;
1012  break;
1013  case MVT::f64:
1014  Opcode = NVPTX::LDV_f64_v2_avar;
1015  break;
1016  }
1017  break;
1018  case NVPTXISD::LoadV4:
1019  switch (EltVT.getSimpleVT().SimpleTy) {
1020  default:
1021  return false;
1022  case MVT::i8:
1023  Opcode = NVPTX::LDV_i8_v4_avar;
1024  break;
1025  case MVT::i16:
1026  Opcode = NVPTX::LDV_i16_v4_avar;
1027  break;
1028  case MVT::i32:
1029  Opcode = NVPTX::LDV_i32_v4_avar;
1030  break;
1031  case MVT::f32:
1032  Opcode = NVPTX::LDV_f32_v4_avar;
1033  break;
1034  }
1035  break;
1036  }
1037 
1038  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1039  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1040  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1041  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1042  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1043  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1044  switch (N->getOpcode()) {
1045  default:
1046  return false;
1047  case NVPTXISD::LoadV2:
1048  switch (EltVT.getSimpleVT().SimpleTy) {
1049  default:
1050  return false;
1051  case MVT::i8:
1052  Opcode = NVPTX::LDV_i8_v2_asi;
1053  break;
1054  case MVT::i16:
1055  Opcode = NVPTX::LDV_i16_v2_asi;
1056  break;
1057  case MVT::i32:
1058  Opcode = NVPTX::LDV_i32_v2_asi;
1059  break;
1060  case MVT::i64:
1061  Opcode = NVPTX::LDV_i64_v2_asi;
1062  break;
1063  case MVT::f32:
1064  Opcode = NVPTX::LDV_f32_v2_asi;
1065  break;
1066  case MVT::f64:
1067  Opcode = NVPTX::LDV_f64_v2_asi;
1068  break;
1069  }
1070  break;
1071  case NVPTXISD::LoadV4:
1072  switch (EltVT.getSimpleVT().SimpleTy) {
1073  default:
1074  return false;
1075  case MVT::i8:
1076  Opcode = NVPTX::LDV_i8_v4_asi;
1077  break;
1078  case MVT::i16:
1079  Opcode = NVPTX::LDV_i16_v4_asi;
1080  break;
1081  case MVT::i32:
1082  Opcode = NVPTX::LDV_i32_v4_asi;
1083  break;
1084  case MVT::f32:
1085  Opcode = NVPTX::LDV_f32_v4_asi;
1086  break;
1087  }
1088  break;
1089  }
1090 
1091  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1092  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1093  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1094  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1095  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1096  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1097  if (TM.is64Bit()) {
1098  switch (N->getOpcode()) {
1099  default:
1100  return false;
1101  case NVPTXISD::LoadV2:
1102  switch (EltVT.getSimpleVT().SimpleTy) {
1103  default:
1104  return false;
1105  case MVT::i8:
1106  Opcode = NVPTX::LDV_i8_v2_ari_64;
1107  break;
1108  case MVT::i16:
1109  Opcode = NVPTX::LDV_i16_v2_ari_64;
1110  break;
1111  case MVT::i32:
1112  Opcode = NVPTX::LDV_i32_v2_ari_64;
1113  break;
1114  case MVT::i64:
1115  Opcode = NVPTX::LDV_i64_v2_ari_64;
1116  break;
1117  case MVT::f32:
1118  Opcode = NVPTX::LDV_f32_v2_ari_64;
1119  break;
1120  case MVT::f64:
1121  Opcode = NVPTX::LDV_f64_v2_ari_64;
1122  break;
1123  }
1124  break;
1125  case NVPTXISD::LoadV4:
1126  switch (EltVT.getSimpleVT().SimpleTy) {
1127  default:
1128  return false;
1129  case MVT::i8:
1130  Opcode = NVPTX::LDV_i8_v4_ari_64;
1131  break;
1132  case MVT::i16:
1133  Opcode = NVPTX::LDV_i16_v4_ari_64;
1134  break;
1135  case MVT::i32:
1136  Opcode = NVPTX::LDV_i32_v4_ari_64;
1137  break;
1138  case MVT::f32:
1139  Opcode = NVPTX::LDV_f32_v4_ari_64;
1140  break;
1141  }
1142  break;
1143  }
1144  } else {
1145  switch (N->getOpcode()) {
1146  default:
1147  return false;
1148  case NVPTXISD::LoadV2:
1149  switch (EltVT.getSimpleVT().SimpleTy) {
1150  default:
1151  return false;
1152  case MVT::i8:
1153  Opcode = NVPTX::LDV_i8_v2_ari;
1154  break;
1155  case MVT::i16:
1156  Opcode = NVPTX::LDV_i16_v2_ari;
1157  break;
1158  case MVT::i32:
1159  Opcode = NVPTX::LDV_i32_v2_ari;
1160  break;
1161  case MVT::i64:
1162  Opcode = NVPTX::LDV_i64_v2_ari;
1163  break;
1164  case MVT::f32:
1165  Opcode = NVPTX::LDV_f32_v2_ari;
1166  break;
1167  case MVT::f64:
1168  Opcode = NVPTX::LDV_f64_v2_ari;
1169  break;
1170  }
1171  break;
1172  case NVPTXISD::LoadV4:
1173  switch (EltVT.getSimpleVT().SimpleTy) {
1174  default:
1175  return false;
1176  case MVT::i8:
1177  Opcode = NVPTX::LDV_i8_v4_ari;
1178  break;
1179  case MVT::i16:
1180  Opcode = NVPTX::LDV_i16_v4_ari;
1181  break;
1182  case MVT::i32:
1183  Opcode = NVPTX::LDV_i32_v4_ari;
1184  break;
1185  case MVT::f32:
1186  Opcode = NVPTX::LDV_f32_v4_ari;
1187  break;
1188  }
1189  break;
1190  }
1191  }
1192 
1193  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1194  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1195  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1196 
1197  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1198  } else {
1199  if (TM.is64Bit()) {
1200  switch (N->getOpcode()) {
1201  default:
1202  return false;
1203  case NVPTXISD::LoadV2:
1204  switch (EltVT.getSimpleVT().SimpleTy) {
1205  default:
1206  return false;
1207  case MVT::i8:
1208  Opcode = NVPTX::LDV_i8_v2_areg_64;
1209  break;
1210  case MVT::i16:
1211  Opcode = NVPTX::LDV_i16_v2_areg_64;
1212  break;
1213  case MVT::i32:
1214  Opcode = NVPTX::LDV_i32_v2_areg_64;
1215  break;
1216  case MVT::i64:
1217  Opcode = NVPTX::LDV_i64_v2_areg_64;
1218  break;
1219  case MVT::f32:
1220  Opcode = NVPTX::LDV_f32_v2_areg_64;
1221  break;
1222  case MVT::f64:
1223  Opcode = NVPTX::LDV_f64_v2_areg_64;
1224  break;
1225  }
1226  break;
1227  case NVPTXISD::LoadV4:
1228  switch (EltVT.getSimpleVT().SimpleTy) {
1229  default:
1230  return false;
1231  case MVT::i8:
1232  Opcode = NVPTX::LDV_i8_v4_areg_64;
1233  break;
1234  case MVT::i16:
1235  Opcode = NVPTX::LDV_i16_v4_areg_64;
1236  break;
1237  case MVT::i32:
1238  Opcode = NVPTX::LDV_i32_v4_areg_64;
1239  break;
1240  case MVT::f32:
1241  Opcode = NVPTX::LDV_f32_v4_areg_64;
1242  break;
1243  }
1244  break;
1245  }
1246  } else {
1247  switch (N->getOpcode()) {
1248  default:
1249  return false;
1250  case NVPTXISD::LoadV2:
1251  switch (EltVT.getSimpleVT().SimpleTy) {
1252  default:
1253  return false;
1254  case MVT::i8:
1255  Opcode = NVPTX::LDV_i8_v2_areg;
1256  break;
1257  case MVT::i16:
1258  Opcode = NVPTX::LDV_i16_v2_areg;
1259  break;
1260  case MVT::i32:
1261  Opcode = NVPTX::LDV_i32_v2_areg;
1262  break;
1263  case MVT::i64:
1264  Opcode = NVPTX::LDV_i64_v2_areg;
1265  break;
1266  case MVT::f32:
1267  Opcode = NVPTX::LDV_f32_v2_areg;
1268  break;
1269  case MVT::f64:
1270  Opcode = NVPTX::LDV_f64_v2_areg;
1271  break;
1272  }
1273  break;
1274  case NVPTXISD::LoadV4:
1275  switch (EltVT.getSimpleVT().SimpleTy) {
1276  default:
1277  return false;
1278  case MVT::i8:
1279  Opcode = NVPTX::LDV_i8_v4_areg;
1280  break;
1281  case MVT::i16:
1282  Opcode = NVPTX::LDV_i16_v4_areg;
1283  break;
1284  case MVT::i32:
1285  Opcode = NVPTX::LDV_i32_v4_areg;
1286  break;
1287  case MVT::f32:
1288  Opcode = NVPTX::LDV_f32_v4_areg;
1289  break;
1290  }
1291  break;
1292  }
1293  }
1294 
1295  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1296  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1297  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1298  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1299  }
1300 
1302  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1303  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1304 
1305  ReplaceNode(N, LD);
1306  return true;
1307 }
1308 
1309 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1310 
1311  SDValue Chain = N->getOperand(0);
1312  SDValue Op1;
1313  MemSDNode *Mem;
1314  bool IsLDG = true;
1315 
1316  // If this is an LDG intrinsic, the address is the third operand. If its an
1317  // LDG/LDU SD node (from custom vector handling), then its the second operand
1318  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1319  Op1 = N->getOperand(2);
1320  Mem = cast<MemIntrinsicSDNode>(N);
1321  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1322  switch (IID) {
1323  default:
1324  return false;
1325  case Intrinsic::nvvm_ldg_global_f:
1326  case Intrinsic::nvvm_ldg_global_i:
1327  case Intrinsic::nvvm_ldg_global_p:
1328  IsLDG = true;
1329  break;
1330  case Intrinsic::nvvm_ldu_global_f:
1331  case Intrinsic::nvvm_ldu_global_i:
1332  case Intrinsic::nvvm_ldu_global_p:
1333  IsLDG = false;
1334  break;
1335  }
1336  } else {
1337  Op1 = N->getOperand(1);
1338  Mem = cast<MemSDNode>(N);
1339  }
1340 
1341  unsigned Opcode;
1342  SDLoc DL(N);
1343  SDNode *LD;
1344  SDValue Base, Offset, Addr;
1345 
1346  EVT EltVT = Mem->getMemoryVT();
1347  unsigned NumElts = 1;
1348  if (EltVT.isVector()) {
1349  NumElts = EltVT.getVectorNumElements();
1350  EltVT = EltVT.getVectorElementType();
1351  }
1352 
1353  // Build the "promoted" result VTList for the load. If we are really loading
1354  // i8s, then the return type will be promoted to i16 since we do not expose
1355  // 8-bit registers in NVPTX.
1356  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1357  SmallVector<EVT, 5> InstVTs;
1358  for (unsigned i = 0; i != NumElts; ++i) {
1359  InstVTs.push_back(NodeVT);
1360  }
1361  InstVTs.push_back(MVT::Other);
1362  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1363 
1364  if (SelectDirectAddr(Op1, Addr)) {
1365  switch (N->getOpcode()) {
1366  default:
1367  return false;
1369  if (IsLDG) {
1370  switch (EltVT.getSimpleVT().SimpleTy) {
1371  default:
1372  return false;
1373  case MVT::i8:
1374  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1375  break;
1376  case MVT::i16:
1377  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1378  break;
1379  case MVT::i32:
1380  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1381  break;
1382  case MVT::i64:
1383  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1384  break;
1385  case MVT::f32:
1386  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1387  break;
1388  case MVT::f64:
1389  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1390  break;
1391  }
1392  } else {
1393  switch (EltVT.getSimpleVT().SimpleTy) {
1394  default:
1395  return false;
1396  case MVT::i8:
1397  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1398  break;
1399  case MVT::i16:
1400  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1401  break;
1402  case MVT::i32:
1403  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1404  break;
1405  case MVT::i64:
1406  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1407  break;
1408  case MVT::f32:
1409  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1410  break;
1411  case MVT::f64:
1412  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1413  break;
1414  }
1415  }
1416  break;
1417  case NVPTXISD::LDGV2:
1418  switch (EltVT.getSimpleVT().SimpleTy) {
1419  default:
1420  return false;
1421  case MVT::i8:
1422  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1423  break;
1424  case MVT::i16:
1425  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1426  break;
1427  case MVT::i32:
1428  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1429  break;
1430  case MVT::i64:
1431  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1432  break;
1433  case MVT::f32:
1434  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1435  break;
1436  case MVT::f64:
1437  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1438  break;
1439  }
1440  break;
1441  case NVPTXISD::LDUV2:
1442  switch (EltVT.getSimpleVT().SimpleTy) {
1443  default:
1444  return false;
1445  case MVT::i8:
1446  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1447  break;
1448  case MVT::i16:
1449  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1450  break;
1451  case MVT::i32:
1452  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1453  break;
1454  case MVT::i64:
1455  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1456  break;
1457  case MVT::f32:
1458  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1459  break;
1460  case MVT::f64:
1461  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1462  break;
1463  }
1464  break;
1465  case NVPTXISD::LDGV4:
1466  switch (EltVT.getSimpleVT().SimpleTy) {
1467  default:
1468  return false;
1469  case MVT::i8:
1470  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1471  break;
1472  case MVT::i16:
1473  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1474  break;
1475  case MVT::i32:
1476  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1477  break;
1478  case MVT::f32:
1479  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1480  break;
1481  }
1482  break;
1483  case NVPTXISD::LDUV4:
1484  switch (EltVT.getSimpleVT().SimpleTy) {
1485  default:
1486  return false;
1487  case MVT::i8:
1488  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1489  break;
1490  case MVT::i16:
1491  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1492  break;
1493  case MVT::i32:
1494  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1495  break;
1496  case MVT::f32:
1497  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1498  break;
1499  }
1500  break;
1501  }
1502 
1503  SDValue Ops[] = { Addr, Chain };
1504  LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1505  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1506  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1507  if (TM.is64Bit()) {
1508  switch (N->getOpcode()) {
1509  default:
1510  return false;
1511  case ISD::LOAD:
1513  if (IsLDG) {
1514  switch (EltVT.getSimpleVT().SimpleTy) {
1515  default:
1516  return false;
1517  case MVT::i8:
1518  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1519  break;
1520  case MVT::i16:
1521  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1522  break;
1523  case MVT::i32:
1524  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1525  break;
1526  case MVT::i64:
1527  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1528  break;
1529  case MVT::f32:
1530  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1531  break;
1532  case MVT::f64:
1533  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1534  break;
1535  }
1536  } else {
1537  switch (EltVT.getSimpleVT().SimpleTy) {
1538  default:
1539  return false;
1540  case MVT::i8:
1541  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1542  break;
1543  case MVT::i16:
1544  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1545  break;
1546  case MVT::i32:
1547  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1548  break;
1549  case MVT::i64:
1550  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1551  break;
1552  case MVT::f32:
1553  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1554  break;
1555  case MVT::f64:
1556  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1557  break;
1558  }
1559  }
1560  break;
1561  case NVPTXISD::LoadV2:
1562  case NVPTXISD::LDGV2:
1563  switch (EltVT.getSimpleVT().SimpleTy) {
1564  default:
1565  return false;
1566  case MVT::i8:
1567  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1568  break;
1569  case MVT::i16:
1570  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1571  break;
1572  case MVT::i32:
1573  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1574  break;
1575  case MVT::i64:
1576  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1577  break;
1578  case MVT::f32:
1579  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1580  break;
1581  case MVT::f64:
1582  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1583  break;
1584  }
1585  break;
1586  case NVPTXISD::LDUV2:
1587  switch (EltVT.getSimpleVT().SimpleTy) {
1588  default:
1589  return false;
1590  case MVT::i8:
1591  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1592  break;
1593  case MVT::i16:
1594  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1595  break;
1596  case MVT::i32:
1597  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1598  break;
1599  case MVT::i64:
1600  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1601  break;
1602  case MVT::f32:
1603  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1604  break;
1605  case MVT::f64:
1606  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1607  break;
1608  }
1609  break;
1610  case NVPTXISD::LoadV4:
1611  case NVPTXISD::LDGV4:
1612  switch (EltVT.getSimpleVT().SimpleTy) {
1613  default:
1614  return false;
1615  case MVT::i8:
1616  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1617  break;
1618  case MVT::i16:
1619  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1620  break;
1621  case MVT::i32:
1622  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1623  break;
1624  case MVT::f32:
1625  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1626  break;
1627  }
1628  break;
1629  case NVPTXISD::LDUV4:
1630  switch (EltVT.getSimpleVT().SimpleTy) {
1631  default:
1632  return false;
1633  case MVT::i8:
1634  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1635  break;
1636  case MVT::i16:
1637  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1638  break;
1639  case MVT::i32:
1640  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1641  break;
1642  case MVT::f32:
1643  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1644  break;
1645  }
1646  break;
1647  }
1648  } else {
1649  switch (N->getOpcode()) {
1650  default:
1651  return false;
1652  case ISD::LOAD:
1654  if (IsLDG) {
1655  switch (EltVT.getSimpleVT().SimpleTy) {
1656  default:
1657  return false;
1658  case MVT::i8:
1659  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1660  break;
1661  case MVT::i16:
1662  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1663  break;
1664  case MVT::i32:
1665  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1666  break;
1667  case MVT::i64:
1668  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1669  break;
1670  case MVT::f32:
1671  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1672  break;
1673  case MVT::f64:
1674  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1675  break;
1676  }
1677  } else {
1678  switch (EltVT.getSimpleVT().SimpleTy) {
1679  default:
1680  return false;
1681  case MVT::i8:
1682  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1683  break;
1684  case MVT::i16:
1685  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1686  break;
1687  case MVT::i32:
1688  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1689  break;
1690  case MVT::i64:
1691  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1692  break;
1693  case MVT::f32:
1694  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1695  break;
1696  case MVT::f64:
1697  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1698  break;
1699  }
1700  }
1701  break;
1702  case NVPTXISD::LoadV2:
1703  case NVPTXISD::LDGV2:
1704  switch (EltVT.getSimpleVT().SimpleTy) {
1705  default:
1706  return false;
1707  case MVT::i8:
1708  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1709  break;
1710  case MVT::i16:
1711  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1712  break;
1713  case MVT::i32:
1714  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1715  break;
1716  case MVT::i64:
1717  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1718  break;
1719  case MVT::f32:
1720  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1721  break;
1722  case MVT::f64:
1723  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1724  break;
1725  }
1726  break;
1727  case NVPTXISD::LDUV2:
1728  switch (EltVT.getSimpleVT().SimpleTy) {
1729  default:
1730  return false;
1731  case MVT::i8:
1732  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1733  break;
1734  case MVT::i16:
1735  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1736  break;
1737  case MVT::i32:
1738  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1739  break;
1740  case MVT::i64:
1741  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1742  break;
1743  case MVT::f32:
1744  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1745  break;
1746  case MVT::f64:
1747  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1748  break;
1749  }
1750  break;
1751  case NVPTXISD::LoadV4:
1752  case NVPTXISD::LDGV4:
1753  switch (EltVT.getSimpleVT().SimpleTy) {
1754  default:
1755  return false;
1756  case MVT::i8:
1757  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1758  break;
1759  case MVT::i16:
1760  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1761  break;
1762  case MVT::i32:
1763  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1764  break;
1765  case MVT::f32:
1766  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1767  break;
1768  }
1769  break;
1770  case NVPTXISD::LDUV4:
1771  switch (EltVT.getSimpleVT().SimpleTy) {
1772  default:
1773  return false;
1774  case MVT::i8:
1775  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1776  break;
1777  case MVT::i16:
1778  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1779  break;
1780  case MVT::i32:
1781  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1782  break;
1783  case MVT::f32:
1784  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1785  break;
1786  }
1787  break;
1788  }
1789  }
1790 
1791  SDValue Ops[] = { Base, Offset, Chain };
1792 
1793  LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1794  } else {
1795  if (TM.is64Bit()) {
1796  switch (N->getOpcode()) {
1797  default:
1798  return false;
1799  case ISD::LOAD:
1801  if (IsLDG) {
1802  switch (EltVT.getSimpleVT().SimpleTy) {
1803  default:
1804  return false;
1805  case MVT::i8:
1806  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1807  break;
1808  case MVT::i16:
1809  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1810  break;
1811  case MVT::i32:
1812  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1813  break;
1814  case MVT::i64:
1815  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1816  break;
1817  case MVT::f32:
1818  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1819  break;
1820  case MVT::f64:
1821  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1822  break;
1823  }
1824  } else {
1825  switch (EltVT.getSimpleVT().SimpleTy) {
1826  default:
1827  return false;
1828  case MVT::i8:
1829  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1830  break;
1831  case MVT::i16:
1832  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1833  break;
1834  case MVT::i32:
1835  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1836  break;
1837  case MVT::i64:
1838  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1839  break;
1840  case MVT::f32:
1841  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1842  break;
1843  case MVT::f64:
1844  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1845  break;
1846  }
1847  }
1848  break;
1849  case NVPTXISD::LoadV2:
1850  case NVPTXISD::LDGV2:
1851  switch (EltVT.getSimpleVT().SimpleTy) {
1852  default:
1853  return false;
1854  case MVT::i8:
1855  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1856  break;
1857  case MVT::i16:
1858  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1859  break;
1860  case MVT::i32:
1861  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1862  break;
1863  case MVT::i64:
1864  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1865  break;
1866  case MVT::f32:
1867  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1868  break;
1869  case MVT::f64:
1870  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1871  break;
1872  }
1873  break;
1874  case NVPTXISD::LDUV2:
1875  switch (EltVT.getSimpleVT().SimpleTy) {
1876  default:
1877  return false;
1878  case MVT::i8:
1879  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1880  break;
1881  case MVT::i16:
1882  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1883  break;
1884  case MVT::i32:
1885  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1886  break;
1887  case MVT::i64:
1888  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1889  break;
1890  case MVT::f32:
1891  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1892  break;
1893  case MVT::f64:
1894  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1895  break;
1896  }
1897  break;
1898  case NVPTXISD::LoadV4:
1899  case NVPTXISD::LDGV4:
1900  switch (EltVT.getSimpleVT().SimpleTy) {
1901  default:
1902  return false;
1903  case MVT::i8:
1904  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1905  break;
1906  case MVT::i16:
1907  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1908  break;
1909  case MVT::i32:
1910  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1911  break;
1912  case MVT::f32:
1913  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1914  break;
1915  }
1916  break;
1917  case NVPTXISD::LDUV4:
1918  switch (EltVT.getSimpleVT().SimpleTy) {
1919  default:
1920  return false;
1921  case MVT::i8:
1922  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1923  break;
1924  case MVT::i16:
1925  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1926  break;
1927  case MVT::i32:
1928  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1929  break;
1930  case MVT::f32:
1931  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1932  break;
1933  }
1934  break;
1935  }
1936  } else {
1937  switch (N->getOpcode()) {
1938  default:
1939  return false;
1940  case ISD::LOAD:
1942  if (IsLDG) {
1943  switch (EltVT.getSimpleVT().SimpleTy) {
1944  default:
1945  return false;
1946  case MVT::i8:
1947  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1948  break;
1949  case MVT::i16:
1950  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1951  break;
1952  case MVT::i32:
1953  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1954  break;
1955  case MVT::i64:
1956  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1957  break;
1958  case MVT::f32:
1959  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1960  break;
1961  case MVT::f64:
1962  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1963  break;
1964  }
1965  } else {
1966  switch (EltVT.getSimpleVT().SimpleTy) {
1967  default:
1968  return false;
1969  case MVT::i8:
1970  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1971  break;
1972  case MVT::i16:
1973  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1974  break;
1975  case MVT::i32:
1976  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1977  break;
1978  case MVT::i64:
1979  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1980  break;
1981  case MVT::f32:
1982  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1983  break;
1984  case MVT::f64:
1985  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1986  break;
1987  }
1988  }
1989  break;
1990  case NVPTXISD::LoadV2:
1991  case NVPTXISD::LDGV2:
1992  switch (EltVT.getSimpleVT().SimpleTy) {
1993  default:
1994  return false;
1995  case MVT::i8:
1996  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1997  break;
1998  case MVT::i16:
1999  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
2000  break;
2001  case MVT::i32:
2002  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
2003  break;
2004  case MVT::i64:
2005  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
2006  break;
2007  case MVT::f32:
2008  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2009  break;
2010  case MVT::f64:
2011  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2012  break;
2013  }
2014  break;
2015  case NVPTXISD::LDUV2:
2016  switch (EltVT.getSimpleVT().SimpleTy) {
2017  default:
2018  return false;
2019  case MVT::i8:
2020  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2021  break;
2022  case MVT::i16:
2023  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2024  break;
2025  case MVT::i32:
2026  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2027  break;
2028  case MVT::i64:
2029  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2030  break;
2031  case MVT::f32:
2032  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2033  break;
2034  case MVT::f64:
2035  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2036  break;
2037  }
2038  break;
2039  case NVPTXISD::LoadV4:
2040  case NVPTXISD::LDGV4:
2041  switch (EltVT.getSimpleVT().SimpleTy) {
2042  default:
2043  return false;
2044  case MVT::i8:
2045  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2046  break;
2047  case MVT::i16:
2048  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2049  break;
2050  case MVT::i32:
2051  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2052  break;
2053  case MVT::f32:
2054  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2055  break;
2056  }
2057  break;
2058  case NVPTXISD::LDUV4:
2059  switch (EltVT.getSimpleVT().SimpleTy) {
2060  default:
2061  return false;
2062  case MVT::i8:
2063  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2064  break;
2065  case MVT::i16:
2066  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2067  break;
2068  case MVT::i32:
2069  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2070  break;
2071  case MVT::f32:
2072  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2073  break;
2074  }
2075  break;
2076  }
2077  }
2078 
2079  SDValue Ops[] = { Op1, Chain };
2080  LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
2081  }
2082 
2084  MemRefs0[0] = Mem->getMemOperand();
2085  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2086 
2087  // For automatic generation of LDG (through SelectLoad[Vector], not the
2088  // intrinsics), we may have an extending load like:
2089  //
2090  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2091  //
2092  // In this case, the matching logic above will select a load for the original
2093  // memory type (in this case, i8) and our types will not match (the node needs
2094  // to return an i32 in this case). Our LDG/LDU nodes do not support the
2095  // concept of sign-/zero-extension, so emulate it here by adding an explicit
2096  // CVT instruction. Ptxas should clean up any redundancies here.
2097 
2098  EVT OrigType = N->getValueType(0);
2099  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
2100 
2101  if (OrigType != EltVT && LdNode) {
2102  // We have an extending-load. The instruction we selected operates on the
2103  // smaller type, but the SDNode we are replacing has the larger type. We
2104  // need to emit a CVT to make the types match.
2105  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2106  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2107  EltVT.getSimpleVT(), IsSigned);
2108 
2109  // For each output value, apply the manual sign/zero-extension and make sure
2110  // all users of the load go through that CVT.
2111  for (unsigned i = 0; i != NumElts; ++i) {
2112  SDValue Res(LD, i);
2113  SDValue OrigVal(N, i);
2114 
2115  SDNode *CvtNode =
2116  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2118  DL, MVT::i32));
2119  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2120  }
2121  }
2122 
2123  ReplaceNode(N, LD);
2124  return true;
2125 }
2126 
2127 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
2128  SDLoc dl(N);
2129  StoreSDNode *ST = cast<StoreSDNode>(N);
2130  EVT StoreVT = ST->getMemoryVT();
2131  SDNode *NVPTXST = nullptr;
2132 
2133  // do not support pre/post inc/dec
2134  if (ST->isIndexed())
2135  return false;
2136 
2137  if (!StoreVT.isSimple())
2138  return false;
2139 
2140  // Address Space Setting
2141  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2142 
2143  // Volatile Setting
2144  // - .volatile is only availalble for .global and .shared
2145  bool isVolatile = ST->isVolatile();
2146  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2147  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2148  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2149  isVolatile = false;
2150 
2151  // Vector Setting
2152  MVT SimpleVT = StoreVT.getSimpleVT();
2153  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2154  if (SimpleVT.isVector()) {
2155  unsigned num = SimpleVT.getVectorNumElements();
2156  if (num == 2)
2157  vecType = NVPTX::PTXLdStInstCode::V2;
2158  else if (num == 4)
2159  vecType = NVPTX::PTXLdStInstCode::V4;
2160  else
2161  return false;
2162  }
2163 
2164  // Type Setting: toType + toTypeWidth
2165  // - for integer type, always use 'u'
2166  //
2167  MVT ScalarVT = SimpleVT.getScalarType();
2168  unsigned toTypeWidth = ScalarVT.getSizeInBits();
2169  unsigned int toType;
2170  if (ScalarVT.isFloatingPoint())
2172  else
2174 
2175  // Create the machine instruction DAG
2176  SDValue Chain = N->getOperand(0);
2177  SDValue N1 = N->getOperand(1);
2178  SDValue N2 = N->getOperand(2);
2179  SDValue Addr;
2180  SDValue Offset, Base;
2181  unsigned Opcode;
2183 
2184  if (SelectDirectAddr(N2, Addr)) {
2185  switch (SourceVT) {
2186  case MVT::i8:
2187  Opcode = NVPTX::ST_i8_avar;
2188  break;
2189  case MVT::i16:
2190  Opcode = NVPTX::ST_i16_avar;
2191  break;
2192  case MVT::i32:
2193  Opcode = NVPTX::ST_i32_avar;
2194  break;
2195  case MVT::i64:
2196  Opcode = NVPTX::ST_i64_avar;
2197  break;
2198  case MVT::f32:
2199  Opcode = NVPTX::ST_f32_avar;
2200  break;
2201  case MVT::f64:
2202  Opcode = NVPTX::ST_f64_avar;
2203  break;
2204  default:
2205  return false;
2206  }
2207  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2208  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2209  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2210  Chain };
2211  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2212  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2213  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2214  switch (SourceVT) {
2215  case MVT::i8:
2216  Opcode = NVPTX::ST_i8_asi;
2217  break;
2218  case MVT::i16:
2219  Opcode = NVPTX::ST_i16_asi;
2220  break;
2221  case MVT::i32:
2222  Opcode = NVPTX::ST_i32_asi;
2223  break;
2224  case MVT::i64:
2225  Opcode = NVPTX::ST_i64_asi;
2226  break;
2227  case MVT::f32:
2228  Opcode = NVPTX::ST_f32_asi;
2229  break;
2230  case MVT::f64:
2231  Opcode = NVPTX::ST_f64_asi;
2232  break;
2233  default:
2234  return false;
2235  }
2236  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2237  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2238  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2239  Offset, Chain };
2240  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2241  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2242  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2243  if (TM.is64Bit()) {
2244  switch (SourceVT) {
2245  case MVT::i8:
2246  Opcode = NVPTX::ST_i8_ari_64;
2247  break;
2248  case MVT::i16:
2249  Opcode = NVPTX::ST_i16_ari_64;
2250  break;
2251  case MVT::i32:
2252  Opcode = NVPTX::ST_i32_ari_64;
2253  break;
2254  case MVT::i64:
2255  Opcode = NVPTX::ST_i64_ari_64;
2256  break;
2257  case MVT::f32:
2258  Opcode = NVPTX::ST_f32_ari_64;
2259  break;
2260  case MVT::f64:
2261  Opcode = NVPTX::ST_f64_ari_64;
2262  break;
2263  default:
2264  return false;
2265  }
2266  } else {
2267  switch (SourceVT) {
2268  case MVT::i8:
2269  Opcode = NVPTX::ST_i8_ari;
2270  break;
2271  case MVT::i16:
2272  Opcode = NVPTX::ST_i16_ari;
2273  break;
2274  case MVT::i32:
2275  Opcode = NVPTX::ST_i32_ari;
2276  break;
2277  case MVT::i64:
2278  Opcode = NVPTX::ST_i64_ari;
2279  break;
2280  case MVT::f32:
2281  Opcode = NVPTX::ST_f32_ari;
2282  break;
2283  case MVT::f64:
2284  Opcode = NVPTX::ST_f64_ari;
2285  break;
2286  default:
2287  return false;
2288  }
2289  }
2290  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2291  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2292  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2293  Offset, Chain };
2294  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2295  } else {
2296  if (TM.is64Bit()) {
2297  switch (SourceVT) {
2298  case MVT::i8:
2299  Opcode = NVPTX::ST_i8_areg_64;
2300  break;
2301  case MVT::i16:
2302  Opcode = NVPTX::ST_i16_areg_64;
2303  break;
2304  case MVT::i32:
2305  Opcode = NVPTX::ST_i32_areg_64;
2306  break;
2307  case MVT::i64:
2308  Opcode = NVPTX::ST_i64_areg_64;
2309  break;
2310  case MVT::f32:
2311  Opcode = NVPTX::ST_f32_areg_64;
2312  break;
2313  case MVT::f64:
2314  Opcode = NVPTX::ST_f64_areg_64;
2315  break;
2316  default:
2317  return false;
2318  }
2319  } else {
2320  switch (SourceVT) {
2321  case MVT::i8:
2322  Opcode = NVPTX::ST_i8_areg;
2323  break;
2324  case MVT::i16:
2325  Opcode = NVPTX::ST_i16_areg;
2326  break;
2327  case MVT::i32:
2328  Opcode = NVPTX::ST_i32_areg;
2329  break;
2330  case MVT::i64:
2331  Opcode = NVPTX::ST_i64_areg;
2332  break;
2333  case MVT::f32:
2334  Opcode = NVPTX::ST_f32_areg;
2335  break;
2336  case MVT::f64:
2337  Opcode = NVPTX::ST_f64_areg;
2338  break;
2339  default:
2340  return false;
2341  }
2342  }
2343  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2344  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2345  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2346  Chain };
2347  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2348  }
2349 
2350  if (!NVPTXST)
2351  return false;
2352 
2354  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2355  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2356  ReplaceNode(N, NVPTXST);
2357  return true;
2358 }
2359 
2360 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
2361  SDValue Chain = N->getOperand(0);
2362  SDValue Op1 = N->getOperand(1);
2363  SDValue Addr, Offset, Base;
2364  unsigned Opcode;
2365  SDLoc DL(N);
2366  SDNode *ST;
2367  EVT EltVT = Op1.getValueType();
2368  MemSDNode *MemSD = cast<MemSDNode>(N);
2369  EVT StoreVT = MemSD->getMemoryVT();
2370 
2371  // Address Space Setting
2372  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2373 
2374  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2375  report_fatal_error("Cannot store to pointer that points to constant "
2376  "memory space");
2377  }
2378 
2379  // Volatile Setting
2380  // - .volatile is only availalble for .global and .shared
2381  bool IsVolatile = MemSD->isVolatile();
2382  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2383  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2384  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2385  IsVolatile = false;
2386 
2387  // Type Setting: toType + toTypeWidth
2388  // - for integer type, always use 'u'
2389  assert(StoreVT.isSimple() && "Store value is not simple");
2390  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2391  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2392  unsigned ToType;
2393  if (ScalarVT.isFloatingPoint())
2395  else
2397 
2399  SDValue N2;
2400  unsigned VecType;
2401 
2402  switch (N->getOpcode()) {
2403  case NVPTXISD::StoreV2:
2404  VecType = NVPTX::PTXLdStInstCode::V2;
2405  StOps.push_back(N->getOperand(1));
2406  StOps.push_back(N->getOperand(2));
2407  N2 = N->getOperand(3);
2408  break;
2409  case NVPTXISD::StoreV4:
2410  VecType = NVPTX::PTXLdStInstCode::V4;
2411  StOps.push_back(N->getOperand(1));
2412  StOps.push_back(N->getOperand(2));
2413  StOps.push_back(N->getOperand(3));
2414  StOps.push_back(N->getOperand(4));
2415  N2 = N->getOperand(5);
2416  break;
2417  default:
2418  return false;
2419  }
2420 
2421  StOps.push_back(getI32Imm(IsVolatile, DL));
2422  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2423  StOps.push_back(getI32Imm(VecType, DL));
2424  StOps.push_back(getI32Imm(ToType, DL));
2425  StOps.push_back(getI32Imm(ToTypeWidth, DL));
2426 
2427  if (SelectDirectAddr(N2, Addr)) {
2428  switch (N->getOpcode()) {
2429  default:
2430  return false;
2431  case NVPTXISD::StoreV2:
2432  switch (EltVT.getSimpleVT().SimpleTy) {
2433  default:
2434  return false;
2435  case MVT::i8:
2436  Opcode = NVPTX::STV_i8_v2_avar;
2437  break;
2438  case MVT::i16:
2439  Opcode = NVPTX::STV_i16_v2_avar;
2440  break;
2441  case MVT::i32:
2442  Opcode = NVPTX::STV_i32_v2_avar;
2443  break;
2444  case MVT::i64:
2445  Opcode = NVPTX::STV_i64_v2_avar;
2446  break;
2447  case MVT::f32:
2448  Opcode = NVPTX::STV_f32_v2_avar;
2449  break;
2450  case MVT::f64:
2451  Opcode = NVPTX::STV_f64_v2_avar;
2452  break;
2453  }
2454  break;
2455  case NVPTXISD::StoreV4:
2456  switch (EltVT.getSimpleVT().SimpleTy) {
2457  default:
2458  return false;
2459  case MVT::i8:
2460  Opcode = NVPTX::STV_i8_v4_avar;
2461  break;
2462  case MVT::i16:
2463  Opcode = NVPTX::STV_i16_v4_avar;
2464  break;
2465  case MVT::i32:
2466  Opcode = NVPTX::STV_i32_v4_avar;
2467  break;
2468  case MVT::f32:
2469  Opcode = NVPTX::STV_f32_v4_avar;
2470  break;
2471  }
2472  break;
2473  }
2474  StOps.push_back(Addr);
2475  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2476  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2477  switch (N->getOpcode()) {
2478  default:
2479  return false;
2480  case NVPTXISD::StoreV2:
2481  switch (EltVT.getSimpleVT().SimpleTy) {
2482  default:
2483  return false;
2484  case MVT::i8:
2485  Opcode = NVPTX::STV_i8_v2_asi;
2486  break;
2487  case MVT::i16:
2488  Opcode = NVPTX::STV_i16_v2_asi;
2489  break;
2490  case MVT::i32:
2491  Opcode = NVPTX::STV_i32_v2_asi;
2492  break;
2493  case MVT::i64:
2494  Opcode = NVPTX::STV_i64_v2_asi;
2495  break;
2496  case MVT::f32:
2497  Opcode = NVPTX::STV_f32_v2_asi;
2498  break;
2499  case MVT::f64:
2500  Opcode = NVPTX::STV_f64_v2_asi;
2501  break;
2502  }
2503  break;
2504  case NVPTXISD::StoreV4:
2505  switch (EltVT.getSimpleVT().SimpleTy) {
2506  default:
2507  return false;
2508  case MVT::i8:
2509  Opcode = NVPTX::STV_i8_v4_asi;
2510  break;
2511  case MVT::i16:
2512  Opcode = NVPTX::STV_i16_v4_asi;
2513  break;
2514  case MVT::i32:
2515  Opcode = NVPTX::STV_i32_v4_asi;
2516  break;
2517  case MVT::f32:
2518  Opcode = NVPTX::STV_f32_v4_asi;
2519  break;
2520  }
2521  break;
2522  }
2523  StOps.push_back(Base);
2524  StOps.push_back(Offset);
2525  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2526  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2527  if (TM.is64Bit()) {
2528  switch (N->getOpcode()) {
2529  default:
2530  return false;
2531  case NVPTXISD::StoreV2:
2532  switch (EltVT.getSimpleVT().SimpleTy) {
2533  default:
2534  return false;
2535  case MVT::i8:
2536  Opcode = NVPTX::STV_i8_v2_ari_64;
2537  break;
2538  case MVT::i16:
2539  Opcode = NVPTX::STV_i16_v2_ari_64;
2540  break;
2541  case MVT::i32:
2542  Opcode = NVPTX::STV_i32_v2_ari_64;
2543  break;
2544  case MVT::i64:
2545  Opcode = NVPTX::STV_i64_v2_ari_64;
2546  break;
2547  case MVT::f32:
2548  Opcode = NVPTX::STV_f32_v2_ari_64;
2549  break;
2550  case MVT::f64:
2551  Opcode = NVPTX::STV_f64_v2_ari_64;
2552  break;
2553  }
2554  break;
2555  case NVPTXISD::StoreV4:
2556  switch (EltVT.getSimpleVT().SimpleTy) {
2557  default:
2558  return false;
2559  case MVT::i8:
2560  Opcode = NVPTX::STV_i8_v4_ari_64;
2561  break;
2562  case MVT::i16:
2563  Opcode = NVPTX::STV_i16_v4_ari_64;
2564  break;
2565  case MVT::i32:
2566  Opcode = NVPTX::STV_i32_v4_ari_64;
2567  break;
2568  case MVT::f32:
2569  Opcode = NVPTX::STV_f32_v4_ari_64;
2570  break;
2571  }
2572  break;
2573  }
2574  } else {
2575  switch (N->getOpcode()) {
2576  default:
2577  return false;
2578  case NVPTXISD::StoreV2:
2579  switch (EltVT.getSimpleVT().SimpleTy) {
2580  default:
2581  return false;
2582  case MVT::i8:
2583  Opcode = NVPTX::STV_i8_v2_ari;
2584  break;
2585  case MVT::i16:
2586  Opcode = NVPTX::STV_i16_v2_ari;
2587  break;
2588  case MVT::i32:
2589  Opcode = NVPTX::STV_i32_v2_ari;
2590  break;
2591  case MVT::i64:
2592  Opcode = NVPTX::STV_i64_v2_ari;
2593  break;
2594  case MVT::f32:
2595  Opcode = NVPTX::STV_f32_v2_ari;
2596  break;
2597  case MVT::f64:
2598  Opcode = NVPTX::STV_f64_v2_ari;
2599  break;
2600  }
2601  break;
2602  case NVPTXISD::StoreV4:
2603  switch (EltVT.getSimpleVT().SimpleTy) {
2604  default:
2605  return false;
2606  case MVT::i8:
2607  Opcode = NVPTX::STV_i8_v4_ari;
2608  break;
2609  case MVT::i16:
2610  Opcode = NVPTX::STV_i16_v4_ari;
2611  break;
2612  case MVT::i32:
2613  Opcode = NVPTX::STV_i32_v4_ari;
2614  break;
2615  case MVT::f32:
2616  Opcode = NVPTX::STV_f32_v4_ari;
2617  break;
2618  }
2619  break;
2620  }
2621  }
2622  StOps.push_back(Base);
2623  StOps.push_back(Offset);
2624  } else {
2625  if (TM.is64Bit()) {
2626  switch (N->getOpcode()) {
2627  default:
2628  return false;
2629  case NVPTXISD::StoreV2:
2630  switch (EltVT.getSimpleVT().SimpleTy) {
2631  default:
2632  return false;
2633  case MVT::i8:
2634  Opcode = NVPTX::STV_i8_v2_areg_64;
2635  break;
2636  case MVT::i16:
2637  Opcode = NVPTX::STV_i16_v2_areg_64;
2638  break;
2639  case MVT::i32:
2640  Opcode = NVPTX::STV_i32_v2_areg_64;
2641  break;
2642  case MVT::i64:
2643  Opcode = NVPTX::STV_i64_v2_areg_64;
2644  break;
2645  case MVT::f32:
2646  Opcode = NVPTX::STV_f32_v2_areg_64;
2647  break;
2648  case MVT::f64:
2649  Opcode = NVPTX::STV_f64_v2_areg_64;
2650  break;
2651  }
2652  break;
2653  case NVPTXISD::StoreV4:
2654  switch (EltVT.getSimpleVT().SimpleTy) {
2655  default:
2656  return false;
2657  case MVT::i8:
2658  Opcode = NVPTX::STV_i8_v4_areg_64;
2659  break;
2660  case MVT::i16:
2661  Opcode = NVPTX::STV_i16_v4_areg_64;
2662  break;
2663  case MVT::i32:
2664  Opcode = NVPTX::STV_i32_v4_areg_64;
2665  break;
2666  case MVT::f32:
2667  Opcode = NVPTX::STV_f32_v4_areg_64;
2668  break;
2669  }
2670  break;
2671  }
2672  } else {
2673  switch (N->getOpcode()) {
2674  default:
2675  return false;
2676  case NVPTXISD::StoreV2:
2677  switch (EltVT.getSimpleVT().SimpleTy) {
2678  default:
2679  return false;
2680  case MVT::i8:
2681  Opcode = NVPTX::STV_i8_v2_areg;
2682  break;
2683  case MVT::i16:
2684  Opcode = NVPTX::STV_i16_v2_areg;
2685  break;
2686  case MVT::i32:
2687  Opcode = NVPTX::STV_i32_v2_areg;
2688  break;
2689  case MVT::i64:
2690  Opcode = NVPTX::STV_i64_v2_areg;
2691  break;
2692  case MVT::f32:
2693  Opcode = NVPTX::STV_f32_v2_areg;
2694  break;
2695  case MVT::f64:
2696  Opcode = NVPTX::STV_f64_v2_areg;
2697  break;
2698  }
2699  break;
2700  case NVPTXISD::StoreV4:
2701  switch (EltVT.getSimpleVT().SimpleTy) {
2702  default:
2703  return false;
2704  case MVT::i8:
2705  Opcode = NVPTX::STV_i8_v4_areg;
2706  break;
2707  case MVT::i16:
2708  Opcode = NVPTX::STV_i16_v4_areg;
2709  break;
2710  case MVT::i32:
2711  Opcode = NVPTX::STV_i32_v4_areg;
2712  break;
2713  case MVT::f32:
2714  Opcode = NVPTX::STV_f32_v4_areg;
2715  break;
2716  }
2717  break;
2718  }
2719  }
2720  StOps.push_back(N2);
2721  }
2722 
2723  StOps.push_back(Chain);
2724 
2725  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2726 
2728  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2729  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2730 
2731  ReplaceNode(N, ST);
2732  return true;
2733 }
2734 
2735 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2736  SDValue Chain = Node->getOperand(0);
2737  SDValue Offset = Node->getOperand(2);
2738  SDValue Flag = Node->getOperand(3);
2739  SDLoc DL(Node);
2740  MemSDNode *Mem = cast<MemSDNode>(Node);
2741 
2742  unsigned VecSize;
2743  switch (Node->getOpcode()) {
2744  default:
2745  return false;
2746  case NVPTXISD::LoadParam:
2747  VecSize = 1;
2748  break;
2749  case NVPTXISD::LoadParamV2:
2750  VecSize = 2;
2751  break;
2752  case NVPTXISD::LoadParamV4:
2753  VecSize = 4;
2754  break;
2755  }
2756 
2757  EVT EltVT = Node->getValueType(0);
2758  EVT MemVT = Mem->getMemoryVT();
2759 
2760  unsigned Opc = 0;
2761 
2762  switch (VecSize) {
2763  default:
2764  return false;
2765  case 1:
2766  switch (MemVT.getSimpleVT().SimpleTy) {
2767  default:
2768  return false;
2769  case MVT::i1:
2770  Opc = NVPTX::LoadParamMemI8;
2771  break;
2772  case MVT::i8:
2773  Opc = NVPTX::LoadParamMemI8;
2774  break;
2775  case MVT::i16:
2776  Opc = NVPTX::LoadParamMemI16;
2777  break;
2778  case MVT::i32:
2779  Opc = NVPTX::LoadParamMemI32;
2780  break;
2781  case MVT::i64:
2782  Opc = NVPTX::LoadParamMemI64;
2783  break;
2784  case MVT::f32:
2785  Opc = NVPTX::LoadParamMemF32;
2786  break;
2787  case MVT::f64:
2788  Opc = NVPTX::LoadParamMemF64;
2789  break;
2790  }
2791  break;
2792  case 2:
2793  switch (MemVT.getSimpleVT().SimpleTy) {
2794  default:
2795  return false;
2796  case MVT::i1:
2797  Opc = NVPTX::LoadParamMemV2I8;
2798  break;
2799  case MVT::i8:
2800  Opc = NVPTX::LoadParamMemV2I8;
2801  break;
2802  case MVT::i16:
2803  Opc = NVPTX::LoadParamMemV2I16;
2804  break;
2805  case MVT::i32:
2806  Opc = NVPTX::LoadParamMemV2I32;
2807  break;
2808  case MVT::i64:
2809  Opc = NVPTX::LoadParamMemV2I64;
2810  break;
2811  case MVT::f32:
2812  Opc = NVPTX::LoadParamMemV2F32;
2813  break;
2814  case MVT::f64:
2815  Opc = NVPTX::LoadParamMemV2F64;
2816  break;
2817  }
2818  break;
2819  case 4:
2820  switch (MemVT.getSimpleVT().SimpleTy) {
2821  default:
2822  return false;
2823  case MVT::i1:
2824  Opc = NVPTX::LoadParamMemV4I8;
2825  break;
2826  case MVT::i8:
2827  Opc = NVPTX::LoadParamMemV4I8;
2828  break;
2829  case MVT::i16:
2830  Opc = NVPTX::LoadParamMemV4I16;
2831  break;
2832  case MVT::i32:
2833  Opc = NVPTX::LoadParamMemV4I32;
2834  break;
2835  case MVT::f32:
2836  Opc = NVPTX::LoadParamMemV4F32;
2837  break;
2838  }
2839  break;
2840  }
2841 
2842  SDVTList VTs;
2843  if (VecSize == 1) {
2844  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2845  } else if (VecSize == 2) {
2846  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2847  } else {
2848  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2849  VTs = CurDAG->getVTList(EVTs);
2850  }
2851 
2852  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2853 
2855  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2856  Ops.push_back(Chain);
2857  Ops.push_back(Flag);
2858 
2859  ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
2860  return true;
2861 }
2862 
2863 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2864  SDLoc DL(N);
2865  SDValue Chain = N->getOperand(0);
2866  SDValue Offset = N->getOperand(1);
2867  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2868  MemSDNode *Mem = cast<MemSDNode>(N);
2869 
2870  // How many elements do we have?
2871  unsigned NumElts = 1;
2872  switch (N->getOpcode()) {
2873  default:
2874  return false;
2875  case NVPTXISD::StoreRetval:
2876  NumElts = 1;
2877  break;
2879  NumElts = 2;
2880  break;
2882  NumElts = 4;
2883  break;
2884  }
2885 
2886  // Build vector of operands
2888  for (unsigned i = 0; i < NumElts; ++i)
2889  Ops.push_back(N->getOperand(i + 2));
2890  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2891  Ops.push_back(Chain);
2892 
2893  // Determine target opcode
2894  // If we have an i1, use an 8-bit store. The lowering code in
2895  // NVPTXISelLowering will have already emitted an upcast.
2896  unsigned Opcode = 0;
2897  switch (NumElts) {
2898  default:
2899  return false;
2900  case 1:
2901  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2902  default:
2903  return false;
2904  case MVT::i1:
2905  Opcode = NVPTX::StoreRetvalI8;
2906  break;
2907  case MVT::i8:
2908  Opcode = NVPTX::StoreRetvalI8;
2909  break;
2910  case MVT::i16:
2911  Opcode = NVPTX::StoreRetvalI16;
2912  break;
2913  case MVT::i32:
2914  Opcode = NVPTX::StoreRetvalI32;
2915  break;
2916  case MVT::i64:
2917  Opcode = NVPTX::StoreRetvalI64;
2918  break;
2919  case MVT::f32:
2920  Opcode = NVPTX::StoreRetvalF32;
2921  break;
2922  case MVT::f64:
2923  Opcode = NVPTX::StoreRetvalF64;
2924  break;
2925  }
2926  break;
2927  case 2:
2928  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2929  default:
2930  return false;
2931  case MVT::i1:
2932  Opcode = NVPTX::StoreRetvalV2I8;
2933  break;
2934  case MVT::i8:
2935  Opcode = NVPTX::StoreRetvalV2I8;
2936  break;
2937  case MVT::i16:
2938  Opcode = NVPTX::StoreRetvalV2I16;
2939  break;
2940  case MVT::i32:
2941  Opcode = NVPTX::StoreRetvalV2I32;
2942  break;
2943  case MVT::i64:
2944  Opcode = NVPTX::StoreRetvalV2I64;
2945  break;
2946  case MVT::f32:
2947  Opcode = NVPTX::StoreRetvalV2F32;
2948  break;
2949  case MVT::f64:
2950  Opcode = NVPTX::StoreRetvalV2F64;
2951  break;
2952  }
2953  break;
2954  case 4:
2955  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2956  default:
2957  return false;
2958  case MVT::i1:
2959  Opcode = NVPTX::StoreRetvalV4I8;
2960  break;
2961  case MVT::i8:
2962  Opcode = NVPTX::StoreRetvalV4I8;
2963  break;
2964  case MVT::i16:
2965  Opcode = NVPTX::StoreRetvalV4I16;
2966  break;
2967  case MVT::i32:
2968  Opcode = NVPTX::StoreRetvalV4I32;
2969  break;
2970  case MVT::f32:
2971  Opcode = NVPTX::StoreRetvalV4F32;
2972  break;
2973  }
2974  break;
2975  }
2976 
2977  SDNode *Ret =
2978  CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2980  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2981  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2982 
2983  ReplaceNode(N, Ret);
2984  return true;
2985 }
2986 
2987 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2988  SDLoc DL(N);
2989  SDValue Chain = N->getOperand(0);
2990  SDValue Param = N->getOperand(1);
2991  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2992  SDValue Offset = N->getOperand(2);
2993  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2994  MemSDNode *Mem = cast<MemSDNode>(N);
2995  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2996 
2997  // How many elements do we have?
2998  unsigned NumElts = 1;
2999  switch (N->getOpcode()) {
3000  default:
3001  return false;
3004  case NVPTXISD::StoreParam:
3005  NumElts = 1;
3006  break;
3008  NumElts = 2;
3009  break;
3011  NumElts = 4;
3012  break;
3013  }
3014 
3015  // Build vector of operands
3017  for (unsigned i = 0; i < NumElts; ++i)
3018  Ops.push_back(N->getOperand(i + 3));
3019  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3020  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
3021  Ops.push_back(Chain);
3022  Ops.push_back(Flag);
3023 
3024  // Determine target opcode
3025  // If we have an i1, use an 8-bit store. The lowering code in
3026  // NVPTXISelLowering will have already emitted an upcast.
3027  unsigned Opcode = 0;
3028  switch (N->getOpcode()) {
3029  default:
3030  switch (NumElts) {
3031  default:
3032  return false;
3033  case 1:
3034  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3035  default:
3036  return false;
3037  case MVT::i1:
3038  Opcode = NVPTX::StoreParamI8;
3039  break;
3040  case MVT::i8:
3041  Opcode = NVPTX::StoreParamI8;
3042  break;
3043  case MVT::i16:
3044  Opcode = NVPTX::StoreParamI16;
3045  break;
3046  case MVT::i32:
3047  Opcode = NVPTX::StoreParamI32;
3048  break;
3049  case MVT::i64:
3050  Opcode = NVPTX::StoreParamI64;
3051  break;
3052  case MVT::f32:
3053  Opcode = NVPTX::StoreParamF32;
3054  break;
3055  case MVT::f64:
3056  Opcode = NVPTX::StoreParamF64;
3057  break;
3058  }
3059  break;
3060  case 2:
3061  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3062  default:
3063  return false;
3064  case MVT::i1:
3065  Opcode = NVPTX::StoreParamV2I8;
3066  break;
3067  case MVT::i8:
3068  Opcode = NVPTX::StoreParamV2I8;
3069  break;
3070  case MVT::i16:
3071  Opcode = NVPTX::StoreParamV2I16;
3072  break;
3073  case MVT::i32:
3074  Opcode = NVPTX::StoreParamV2I32;
3075  break;
3076  case MVT::i64:
3077  Opcode = NVPTX::StoreParamV2I64;
3078  break;
3079  case MVT::f32:
3080  Opcode = NVPTX::StoreParamV2F32;
3081  break;
3082  case MVT::f64:
3083  Opcode = NVPTX::StoreParamV2F64;
3084  break;
3085  }
3086  break;
3087  case 4:
3088  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3089  default:
3090  return false;
3091  case MVT::i1:
3092  Opcode = NVPTX::StoreParamV4I8;
3093  break;
3094  case MVT::i8:
3095  Opcode = NVPTX::StoreParamV4I8;
3096  break;
3097  case MVT::i16:
3098  Opcode = NVPTX::StoreParamV4I16;
3099  break;
3100  case MVT::i32:
3101  Opcode = NVPTX::StoreParamV4I32;
3102  break;
3103  case MVT::f32:
3104  Opcode = NVPTX::StoreParamV4F32;
3105  break;
3106  }
3107  break;
3108  }
3109  break;
3110  // Special case: if we have a sign-extend/zero-extend node, insert the
3111  // conversion instruction first, and use that as the value operand to
3112  // the selected StoreParam node.
3113  case NVPTXISD::StoreParamU32: {
3114  Opcode = NVPTX::StoreParamI32;
3116  MVT::i32);
3117  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3118  MVT::i32, Ops[0], CvtNone);
3119  Ops[0] = SDValue(Cvt, 0);
3120  break;
3121  }
3122  case NVPTXISD::StoreParamS32: {
3123  Opcode = NVPTX::StoreParamI32;
3125  MVT::i32);
3126  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3127  MVT::i32, Ops[0], CvtNone);
3128  Ops[0] = SDValue(Cvt, 0);
3129  break;
3130  }
3131  }
3132 
3134  SDNode *Ret =
3135  CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3137  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3138  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3139 
3140  ReplaceNode(N, Ret);
3141  return true;
3142 }
3143 
3144 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
3145  SDValue Chain = N->getOperand(0);
3146  unsigned Opc = 0;
3148 
3149  switch (N->getOpcode()) {
3150  default: return false;
3152  Opc = NVPTX::TEX_1D_F32_S32;
3153  break;
3155  Opc = NVPTX::TEX_1D_F32_F32;
3156  break;
3158  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3159  break;
3161  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3162  break;
3163  case NVPTXISD::Tex1DS32S32:
3164  Opc = NVPTX::TEX_1D_S32_S32;
3165  break;
3167  Opc = NVPTX::TEX_1D_S32_F32;
3168  break;
3170  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3171  break;
3173  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3174  break;
3175  case NVPTXISD::Tex1DU32S32:
3176  Opc = NVPTX::TEX_1D_U32_S32;
3177  break;
3179  Opc = NVPTX::TEX_1D_U32_F32;
3180  break;
3182  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3183  break;
3185  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3186  break;
3188  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3189  break;
3191  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3192  break;
3194  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3195  break;
3197  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3198  break;
3200  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3201  break;
3203  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3204  break;
3206  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3207  break;
3209  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3210  break;
3212  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3213  break;
3215  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3216  break;
3218  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3219  break;
3221  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3222  break;
3224  Opc = NVPTX::TEX_2D_F32_S32;
3225  break;
3227  Opc = NVPTX::TEX_2D_F32_F32;
3228  break;
3230  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3231  break;
3233  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3234  break;
3235  case NVPTXISD::Tex2DS32S32:
3236  Opc = NVPTX::TEX_2D_S32_S32;
3237  break;
3239  Opc = NVPTX::TEX_2D_S32_F32;
3240  break;
3242  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3243  break;
3245  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3246  break;
3247  case NVPTXISD::Tex2DU32S32:
3248  Opc = NVPTX::TEX_2D_U32_S32;
3249  break;
3251  Opc = NVPTX::TEX_2D_U32_F32;
3252  break;
3254  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3255  break;
3257  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3258  break;
3260  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3261  break;
3263  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3264  break;
3266  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3267  break;
3269  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3270  break;
3272  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3273  break;
3275  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3276  break;
3278  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3279  break;
3281  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3282  break;
3284  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3285  break;
3287  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3288  break;
3290  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3291  break;
3293  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3294  break;
3296  Opc = NVPTX::TEX_3D_F32_S32;
3297  break;
3299  Opc = NVPTX::TEX_3D_F32_F32;
3300  break;
3302  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3303  break;
3305  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3306  break;
3307  case NVPTXISD::Tex3DS32S32:
3308  Opc = NVPTX::TEX_3D_S32_S32;
3309  break;
3311  Opc = NVPTX::TEX_3D_S32_F32;
3312  break;
3314  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3315  break;
3317  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3318  break;
3319  case NVPTXISD::Tex3DU32S32:
3320  Opc = NVPTX::TEX_3D_U32_S32;
3321  break;
3323  Opc = NVPTX::TEX_3D_U32_F32;
3324  break;
3326  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3327  break;
3329  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3330  break;
3332  Opc = NVPTX::TEX_CUBE_F32_F32;
3333  break;
3335  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3336  break;
3338  Opc = NVPTX::TEX_CUBE_S32_F32;
3339  break;
3341  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3342  break;
3344  Opc = NVPTX::TEX_CUBE_U32_F32;
3345  break;
3347  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3348  break;
3350  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3351  break;
3353  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3354  break;
3356  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3357  break;
3359  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3360  break;
3362  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3363  break;
3365  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3366  break;
3368  Opc = NVPTX::TLD4_R_2D_F32_F32;
3369  break;
3371  Opc = NVPTX::TLD4_G_2D_F32_F32;
3372  break;
3374  Opc = NVPTX::TLD4_B_2D_F32_F32;
3375  break;
3377  Opc = NVPTX::TLD4_A_2D_F32_F32;
3378  break;
3380  Opc = NVPTX::TLD4_R_2D_S32_F32;
3381  break;
3383  Opc = NVPTX::TLD4_G_2D_S32_F32;
3384  break;
3386  Opc = NVPTX::TLD4_B_2D_S32_F32;
3387  break;
3389  Opc = NVPTX::TLD4_A_2D_S32_F32;
3390  break;
3392  Opc = NVPTX::TLD4_R_2D_U32_F32;
3393  break;
3395  Opc = NVPTX::TLD4_G_2D_U32_F32;
3396  break;
3398  Opc = NVPTX::TLD4_B_2D_U32_F32;
3399  break;
3401  Opc = NVPTX::TLD4_A_2D_U32_F32;
3402  break;
3404  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3405  break;
3407  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3408  break;
3410  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3411  break;
3413  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3414  break;
3416  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3417  break;
3419  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3420  break;
3422  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3423  break;
3425  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3426  break;
3428  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3429  break;
3431  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3432  break;
3434  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3435  break;
3437  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3438  break;
3440  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3441  break;
3443  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3444  break;
3446  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3447  break;
3449  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3450  break;
3452  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3453  break;
3455  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3456  break;
3458  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3459  break;
3461  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3462  break;
3464  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3465  break;
3467  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3468  break;
3470  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3471  break;
3473  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3474  break;
3476  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3477  break;
3479  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3480  break;
3482  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3483  break;
3485  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3486  break;
3488  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3489  break;
3491  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3492  break;
3494  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3495  break;
3497  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3498  break;
3500  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3501  break;
3503  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3504  break;
3506  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3507  break;
3509  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3510  break;
3512  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3513  break;
3515  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3516  break;
3518  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3519  break;
3521  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3522  break;
3524  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3525  break;
3527  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3528  break;
3530  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3531  break;
3533  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3534  break;
3536  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3537  break;
3539  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3540  break;
3542  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3543  break;
3545  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3546  break;
3548  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3549  break;
3551  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3552  break;
3554  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3555  break;
3557  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3558  break;
3560  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3561  break;
3563  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3564  break;
3566  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3567  break;
3569  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3570  break;
3572  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3573  break;
3575  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3576  break;
3578  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3579  break;
3581  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3582  break;
3584  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3585  break;
3587  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3588  break;
3590  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3591  break;
3593  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3594  break;
3596  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3597  break;
3599  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3600  break;
3602  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3603  break;
3605  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3606  break;
3608  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3609  break;
3611  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3612  break;
3614  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3615  break;
3617  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3618  break;
3620  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3621  break;
3623  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3624  break;
3626  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3627  break;
3629  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3630  break;
3632  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3633  break;
3635  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3636  break;
3638  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3639  break;
3641  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3642  break;
3644  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3645  break;
3647  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3648  break;
3650  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3651  break;
3653  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3654  break;
3655  }
3656 
3657  // Copy over operands
3658  for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3659  Ops.push_back(N->getOperand(i));
3660  }
3661 
3662  Ops.push_back(Chain);
3663  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3664  return true;
3665 }
3666 
3667 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
3668  SDValue Chain = N->getOperand(0);
3669  SDValue TexHandle = N->getOperand(1);
3670  unsigned Opc = 0;
3672  switch (N->getOpcode()) {
3673  default: return false;
3675  Opc = NVPTX::SULD_1D_I8_CLAMP;
3676  Ops.push_back(TexHandle);
3677  Ops.push_back(N->getOperand(2));
3678  Ops.push_back(Chain);
3679  break;
3681  Opc = NVPTX::SULD_1D_I16_CLAMP;
3682  Ops.push_back(TexHandle);
3683  Ops.push_back(N->getOperand(2));
3684  Ops.push_back(Chain);
3685  break;
3687  Opc = NVPTX::SULD_1D_I32_CLAMP;
3688  Ops.push_back(TexHandle);
3689  Ops.push_back(N->getOperand(2));
3690  Ops.push_back(Chain);
3691  break;
3693  Opc = NVPTX::SULD_1D_I64_CLAMP;
3694  Ops.push_back(TexHandle);
3695  Ops.push_back(N->getOperand(2));
3696  Ops.push_back(Chain);
3697  break;
3699  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3700  Ops.push_back(TexHandle);
3701  Ops.push_back(N->getOperand(2));
3702  Ops.push_back(Chain);
3703  break;
3705  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3706  Ops.push_back(TexHandle);
3707  Ops.push_back(N->getOperand(2));
3708  Ops.push_back(Chain);
3709  break;
3711  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3712  Ops.push_back(TexHandle);
3713  Ops.push_back(N->getOperand(2));
3714  Ops.push_back(Chain);
3715  break;
3717  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3718  Ops.push_back(TexHandle);
3719  Ops.push_back(N->getOperand(2));
3720  Ops.push_back(Chain);
3721  break;
3723  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3724  Ops.push_back(TexHandle);
3725  Ops.push_back(N->getOperand(2));
3726  Ops.push_back(Chain);
3727  break;
3729  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3730  Ops.push_back(TexHandle);
3731  Ops.push_back(N->getOperand(2));
3732  Ops.push_back(Chain);
3733  break;
3735  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3736  Ops.push_back(TexHandle);
3737  Ops.push_back(N->getOperand(2));
3738  Ops.push_back(Chain);
3739  break;
3741  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3742  Ops.push_back(TexHandle);
3743  Ops.push_back(N->getOperand(2));
3744  Ops.push_back(N->getOperand(3));
3745  Ops.push_back(Chain);
3746  break;
3748  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3749  Ops.push_back(TexHandle);
3750  Ops.push_back(N->getOperand(2));
3751  Ops.push_back(N->getOperand(3));
3752  Ops.push_back(Chain);
3753  break;
3755  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3756  Ops.push_back(TexHandle);
3757  Ops.push_back(N->getOperand(2));
3758  Ops.push_back(N->getOperand(3));
3759  Ops.push_back(Chain);
3760  break;
3762  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3763  Ops.push_back(TexHandle);
3764  Ops.push_back(N->getOperand(2));
3765  Ops.push_back(N->getOperand(3));
3766  Ops.push_back(Chain);
3767  break;
3769  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3770  Ops.push_back(TexHandle);
3771  Ops.push_back(N->getOperand(2));
3772  Ops.push_back(N->getOperand(3));
3773  Ops.push_back(Chain);
3774  break;
3776  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3777  Ops.push_back(TexHandle);
3778  Ops.push_back(N->getOperand(2));
3779  Ops.push_back(N->getOperand(3));
3780  Ops.push_back(Chain);
3781  break;
3783  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3784  Ops.push_back(TexHandle);
3785  Ops.push_back(N->getOperand(2));
3786  Ops.push_back(N->getOperand(3));
3787  Ops.push_back(Chain);
3788  break;
3790  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3791  Ops.push_back(TexHandle);
3792  Ops.push_back(N->getOperand(2));
3793  Ops.push_back(N->getOperand(3));
3794  Ops.push_back(Chain);
3795  break;
3797  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3798  Ops.push_back(TexHandle);
3799  Ops.push_back(N->getOperand(2));
3800  Ops.push_back(N->getOperand(3));
3801  Ops.push_back(Chain);
3802  break;
3804  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3805  Ops.push_back(TexHandle);
3806  Ops.push_back(N->getOperand(2));
3807  Ops.push_back(N->getOperand(3));
3808  Ops.push_back(Chain);
3809  break;
3811  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3812  Ops.push_back(TexHandle);
3813  Ops.push_back(N->getOperand(2));
3814  Ops.push_back(N->getOperand(3));
3815  Ops.push_back(Chain);
3816  break;
3818  Opc = NVPTX::SULD_2D_I8_CLAMP;
3819  Ops.push_back(TexHandle);
3820  Ops.push_back(N->getOperand(2));
3821  Ops.push_back(N->getOperand(3));
3822  Ops.push_back(Chain);
3823  break;
3825  Opc = NVPTX::SULD_2D_I16_CLAMP;
3826  Ops.push_back(TexHandle);
3827  Ops.push_back(N->getOperand(2));
3828  Ops.push_back(N->getOperand(3));
3829  Ops.push_back(Chain);
3830  break;
3832  Opc = NVPTX::SULD_2D_I32_CLAMP;
3833  Ops.push_back(TexHandle);
3834  Ops.push_back(N->getOperand(2));
3835  Ops.push_back(N->getOperand(3));
3836  Ops.push_back(Chain);
3837  break;
3839  Opc = NVPTX::SULD_2D_I64_CLAMP;
3840  Ops.push_back(TexHandle);
3841  Ops.push_back(N->getOperand(2));
3842  Ops.push_back(N->getOperand(3));
3843  Ops.push_back(Chain);
3844  break;
3846  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3847  Ops.push_back(TexHandle);
3848  Ops.push_back(N->getOperand(2));
3849  Ops.push_back(N->getOperand(3));
3850  Ops.push_back(Chain);
3851  break;
3853  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3854  Ops.push_back(TexHandle);
3855  Ops.push_back(N->getOperand(2));
3856  Ops.push_back(N->getOperand(3));
3857  Ops.push_back(Chain);
3858  break;
3860  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3861  Ops.push_back(TexHandle);
3862  Ops.push_back(N->getOperand(2));
3863  Ops.push_back(N->getOperand(3));
3864  Ops.push_back(Chain);
3865  break;
3867  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3868  Ops.push_back(TexHandle);
3869  Ops.push_back(N->getOperand(2));
3870  Ops.push_back(N->getOperand(3));
3871  Ops.push_back(Chain);
3872  break;
3874  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3875  Ops.push_back(TexHandle);
3876  Ops.push_back(N->getOperand(2));
3877  Ops.push_back(N->getOperand(3));
3878  Ops.push_back(Chain);
3879  break;
3881  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3882  Ops.push_back(TexHandle);
3883  Ops.push_back(N->getOperand(2));
3884  Ops.push_back(N->getOperand(3));
3885  Ops.push_back(Chain);
3886  break;
3888  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3889  Ops.push_back(TexHandle);
3890  Ops.push_back(N->getOperand(2));
3891  Ops.push_back(N->getOperand(3));
3892  Ops.push_back(Chain);
3893  break;
3895  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3896  Ops.push_back(TexHandle);
3897  Ops.push_back(N->getOperand(2));
3898  Ops.push_back(N->getOperand(3));
3899  Ops.push_back(N->getOperand(4));
3900  Ops.push_back(Chain);
3901  break;
3903  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3904  Ops.push_back(TexHandle);
3905  Ops.push_back(N->getOperand(2));
3906  Ops.push_back(N->getOperand(3));
3907  Ops.push_back(N->getOperand(4));
3908  Ops.push_back(Chain);
3909  break;
3911  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3912  Ops.push_back(TexHandle);
3913  Ops.push_back(N->getOperand(2));
3914  Ops.push_back(N->getOperand(3));
3915  Ops.push_back(N->getOperand(4));
3916  Ops.push_back(Chain);
3917  break;
3919  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3920  Ops.push_back(TexHandle);
3921  Ops.push_back(N->getOperand(2));
3922  Ops.push_back(N->getOperand(3));
3923  Ops.push_back(N->getOperand(4));
3924  Ops.push_back(Chain);
3925  break;
3927  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3928  Ops.push_back(TexHandle);
3929  Ops.push_back(N->getOperand(2));
3930  Ops.push_back(N->getOperand(3));
3931  Ops.push_back(N->getOperand(4));
3932  Ops.push_back(Chain);
3933  break;
3935  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3936  Ops.push_back(TexHandle);
3937  Ops.push_back(N->getOperand(2));
3938  Ops.push_back(N->getOperand(3));
3939  Ops.push_back(N->getOperand(4));
3940  Ops.push_back(Chain);
3941  break;
3943  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3944  Ops.push_back(TexHandle);
3945  Ops.push_back(N->getOperand(2));
3946  Ops.push_back(N->getOperand(3));
3947  Ops.push_back(N->getOperand(4));
3948  Ops.push_back(Chain);
3949  break;
3951  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3952  Ops.push_back(TexHandle);
3953  Ops.push_back(N->getOperand(2));
3954  Ops.push_back(N->getOperand(3));
3955  Ops.push_back(N->getOperand(4));
3956  Ops.push_back(Chain);
3957  break;
3959  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3960  Ops.push_back(TexHandle);
3961  Ops.push_back(N->getOperand(2));
3962  Ops.push_back(N->getOperand(3));
3963  Ops.push_back(N->getOperand(4));
3964  Ops.push_back(Chain);
3965  break;
3967  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3968  Ops.push_back(TexHandle);
3969  Ops.push_back(N->getOperand(2));
3970  Ops.push_back(N->getOperand(3));
3971  Ops.push_back(N->getOperand(4));
3972  Ops.push_back(Chain);
3973  break;
3975  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3976  Ops.push_back(TexHandle);
3977  Ops.push_back(N->getOperand(2));
3978  Ops.push_back(N->getOperand(3));
3979  Ops.push_back(N->getOperand(4));
3980  Ops.push_back(Chain);
3981  break;
3983  Opc = NVPTX::SULD_3D_I8_CLAMP;
3984  Ops.push_back(TexHandle);
3985  Ops.push_back(N->getOperand(2));
3986  Ops.push_back(N->getOperand(3));
3987  Ops.push_back(N->getOperand(4));
3988  Ops.push_back(Chain);
3989  break;
3991  Opc = NVPTX::SULD_3D_I16_CLAMP;
3992  Ops.push_back(TexHandle);
3993  Ops.push_back(N->getOperand(2));
3994  Ops.push_back(N->getOperand(3));
3995  Ops.push_back(N->getOperand(4));
3996  Ops.push_back(Chain);
3997  break;
3999  Opc = NVPTX::SULD_3D_I32_CLAMP;
4000  Ops.push_back(TexHandle);
4001  Ops.push_back(N->getOperand(2));
4002  Ops.push_back(N->getOperand(3));
4003  Ops.push_back(N->getOperand(4));
4004  Ops.push_back(Chain);
4005  break;
4007  Opc = NVPTX::SULD_3D_I64_CLAMP;
4008  Ops.push_back(TexHandle);
4009  Ops.push_back(N->getOperand(2));
4010  Ops.push_back(N->getOperand(3));
4011  Ops.push_back(N->getOperand(4));
4012  Ops.push_back(Chain);
4013  break;
4015  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4016  Ops.push_back(TexHandle);
4017  Ops.push_back(N->getOperand(2));
4018  Ops.push_back(N->getOperand(3));
4019  Ops.push_back(N->getOperand(4));
4020  Ops.push_back(Chain);
4021  break;
4023  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4024  Ops.push_back(TexHandle);
4025  Ops.push_back(N->getOperand(2));
4026  Ops.push_back(N->getOperand(3));
4027  Ops.push_back(N->getOperand(4));
4028  Ops.push_back(Chain);
4029  break;
4031  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4032  Ops.push_back(TexHandle);
4033  Ops.push_back(N->getOperand(2));
4034  Ops.push_back(N->getOperand(3));
4035  Ops.push_back(N->getOperand(4));
4036  Ops.push_back(Chain);
4037  break;
4039  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4040  Ops.push_back(TexHandle);
4041  Ops.push_back(N->getOperand(2));
4042  Ops.push_back(N->getOperand(3));
4043  Ops.push_back(N->getOperand(4));
4044  Ops.push_back(Chain);
4045  break;
4047  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4048  Ops.push_back(TexHandle);
4049  Ops.push_back(N->getOperand(2));
4050  Ops.push_back(N->getOperand(3));
4051  Ops.push_back(N->getOperand(4));
4052  Ops.push_back(Chain);
4053  break;
4055  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4056  Ops.push_back(TexHandle);
4057  Ops.push_back(N->getOperand(2));
4058  Ops.push_back(N->getOperand(3));
4059  Ops.push_back(N->getOperand(4));
4060  Ops.push_back(Chain);
4061  break;
4063  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4064  Ops.push_back(TexHandle);
4065  Ops.push_back(N->getOperand(2));
4066  Ops.push_back(N->getOperand(3));
4067  Ops.push_back(N->getOperand(4));
4068  Ops.push_back(Chain);
4069  break;
4071  Opc = NVPTX::SULD_1D_I8_TRAP;
4072  Ops.push_back(TexHandle);
4073  Ops.push_back(N->getOperand(2));
4074  Ops.push_back(Chain);
4075  break;
4077  Opc = NVPTX::SULD_1D_I16_TRAP;
4078  Ops.push_back(TexHandle);
4079  Ops.push_back(N->getOperand(2));
4080  Ops.push_back(Chain);
4081  break;
4083  Opc = NVPTX::SULD_1D_I32_TRAP;
4084  Ops.push_back(TexHandle);
4085  Ops.push_back(N->getOperand(2));
4086  Ops.push_back(Chain);
4087  break;
4089  Opc = NVPTX::SULD_1D_I64_TRAP;
4090  Ops.push_back(TexHandle);
4091  Ops.push_back(N->getOperand(2));
4092  Ops.push_back(Chain);
4093  break;
4095  Opc = NVPTX::SULD_1D_V2I8_TRAP;
4096  Ops.push_back(TexHandle);
4097  Ops.push_back(N->getOperand(2));
4098  Ops.push_back(Chain);
4099  break;
4101  Opc = NVPTX::SULD_1D_V2I16_TRAP;
4102  Ops.push_back(TexHandle);
4103  Ops.push_back(N->getOperand(2));
4104  Ops.push_back(Chain);
4105  break;
4107  Opc = NVPTX::SULD_1D_V2I32_TRAP;
4108  Ops.push_back(TexHandle);
4109  Ops.push_back(N->getOperand(2));
4110  Ops.push_back(Chain);
4111  break;
4113  Opc = NVPTX::SULD_1D_V2I64_TRAP;
4114  Ops.push_back(TexHandle);
4115  Ops.push_back(N->getOperand(2));
4116  Ops.push_back(Chain);
4117  break;
4119  Opc = NVPTX::SULD_1D_V4I8_TRAP;
4120  Ops.push_back(TexHandle);
4121  Ops.push_back(N->getOperand(2));
4122  Ops.push_back(Chain);
4123  break;
4125  Opc = NVPTX::SULD_1D_V4I16_TRAP;
4126  Ops.push_back(TexHandle);
4127  Ops.push_back(N->getOperand(2));
4128  Ops.push_back(Chain);
4129  break;
4131  Opc = NVPTX::SULD_1D_V4I32_TRAP;
4132  Ops.push_back(TexHandle);
4133  Ops.push_back(N->getOperand(2));
4134  Ops.push_back(Chain);
4135  break;
4137  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4138  Ops.push_back(TexHandle);
4139  Ops.push_back(N->getOperand(2));
4140  Ops.push_back(N->getOperand(3));
4141  Ops.push_back(Chain);
4142  break;
4144  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4145  Ops.push_back(TexHandle);
4146  Ops.push_back(N->getOperand(2));
4147  Ops.push_back(N->getOperand(3));
4148  Ops.push_back(Chain);
4149  break;
4151  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4152  Ops.push_back(TexHandle);
4153  Ops.push_back(N->getOperand(2));
4154  Ops.push_back(N->getOperand(3));
4155  Ops.push_back(Chain);
4156  break;
4158  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4159  Ops.push_back(TexHandle);
4160  Ops.push_back(N->getOperand(2));
4161  Ops.push_back(N->getOperand(3));
4162  Ops.push_back(Chain);
4163  break;
4165  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4166  Ops.push_back(TexHandle);
4167  Ops.push_back(N->getOperand(2));
4168  Ops.push_back(N->getOperand(3));
4169  Ops.push_back(Chain);
4170  break;
4172  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4173  Ops.push_back(TexHandle);
4174  Ops.push_back(N->getOperand(2));
4175  Ops.push_back(N->getOperand(3));
4176  Ops.push_back(Chain);
4177  break;
4179  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4180  Ops.push_back(TexHandle);
4181  Ops.push_back(N->getOperand(2));
4182  Ops.push_back(N->getOperand(3));
4183  Ops.push_back(Chain);
4184  break;
4186  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4187  Ops.push_back(TexHandle);
4188  Ops.push_back(N->getOperand(2));
4189  Ops.push_back(N->getOperand(3));
4190  Ops.push_back(Chain);
4191  break;
4193  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4194  Ops.push_back(TexHandle);
4195  Ops.push_back(N->getOperand(2));
4196  Ops.push_back(N->getOperand(3));
4197  Ops.push_back(Chain);
4198  break;
4200  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4201  Ops.push_back(TexHandle);
4202  Ops.push_back(N->getOperand(2));
4203  Ops.push_back(N->getOperand(3));
4204  Ops.push_back(Chain);
4205  break;
4207  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4208  Ops.push_back(TexHandle);
4209  Ops.push_back(N->getOperand(2));
4210  Ops.push_back(N->getOperand(3));
4211  Ops.push_back(Chain);
4212  break;
4214  Opc = NVPTX::SULD_2D_I8_TRAP;
4215  Ops.push_back(TexHandle);
4216  Ops.push_back(N->getOperand(2));
4217  Ops.push_back(N->getOperand(3));
4218  Ops.push_back(Chain);
4219  break;
4221  Opc = NVPTX::SULD_2D_I16_TRAP;
4222  Ops.push_back(TexHandle);
4223  Ops.push_back(N->getOperand(2));
4224  Ops.push_back(N->getOperand(3));
4225  Ops.push_back(Chain);
4226  break;
4228  Opc = NVPTX::SULD_2D_I32_TRAP;
4229  Ops.push_back(TexHandle);
4230  Ops.push_back(N->getOperand(2));
4231  Ops.push_back(N->getOperand(3));
4232  Ops.push_back(Chain);
4233  break;
4235  Opc = NVPTX::SULD_2D_I64_TRAP;
4236  Ops.push_back(TexHandle);
4237  Ops.push_back(N->getOperand(2));
4238  Ops.push_back(N->getOperand(3));
4239  Ops.push_back(Chain);
4240  break;
4242  Opc = NVPTX::SULD_2D_V2I8_TRAP;
4243  Ops.push_back(TexHandle);
4244  Ops.push_back(N->getOperand(2));
4245  Ops.push_back(N->getOperand(3));
4246  Ops.push_back(Chain);
4247  break;
4249  Opc = NVPTX::SULD_2D_V2I16_TRAP;
4250  Ops.push_back(TexHandle);
4251  Ops.push_back(N->getOperand(2));
4252  Ops.push_back(N->getOperand(3));
4253  Ops.push_back(Chain);
4254  break;
4256  Opc = NVPTX::SULD_2D_V2I32_TRAP;
4257  Ops.push_back(TexHandle);
4258  Ops.push_back(N->getOperand(2));
4259  Ops.push_back(N->getOperand(3));
4260  Ops.push_back(Chain);
4261  break;
4263  Opc = NVPTX::SULD_2D_V2I64_TRAP;
4264  Ops.push_back(TexHandle);
4265  Ops.push_back(N->getOperand(2));
4266  Ops.push_back(N->getOperand(3));
4267  Ops.push_back(Chain);
4268  break;
4270  Opc = NVPTX::SULD_2D_V4I8_TRAP;
4271  Ops.push_back(TexHandle);
4272  Ops.push_back(N->getOperand(2));
4273  Ops.push_back(N->getOperand(3));
4274  Ops.push_back(Chain);
4275  break;
4277  Opc = NVPTX::SULD_2D_V4I16_TRAP;
4278  Ops.push_back(TexHandle);
4279  Ops.push_back(N->getOperand(2));
4280  Ops.push_back(N->getOperand(3));
4281  Ops.push_back(Chain);
4282  break;
4284  Opc = NVPTX::SULD_2D_V4I32_TRAP;
4285  Ops.push_back(TexHandle);
4286  Ops.push_back(N->getOperand(2));
4287  Ops.push_back(N->getOperand(3));
4288  Ops.push_back(Chain);
4289  break;
4291  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4292  Ops.push_back(TexHandle);
4293  Ops.push_back(N->getOperand(2));
4294  Ops.push_back(N->getOperand(3));
4295  Ops.push_back(N->getOperand(4));
4296  Ops.push_back(Chain);
4297  break;
4299  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4300  Ops.push_back(TexHandle);
4301  Ops.push_back(N->getOperand(2));
4302  Ops.push_back(N->getOperand(3));
4303  Ops.push_back(N->getOperand(4));
4304  Ops.push_back(Chain);
4305  break;
4307  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4308  Ops.push_back(TexHandle);
4309  Ops.push_back(N->getOperand(2));
4310  Ops.push_back(N->getOperand(3));
4311  Ops.push_back(N->getOperand(4));
4312  Ops.push_back(Chain);
4313  break;
4315  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4316  Ops.push_back(TexHandle);
4317  Ops.push_back(N->getOperand(2));
4318  Ops.push_back(N->getOperand(3));
4319  Ops.push_back(N->getOperand(4));
4320  Ops.push_back(Chain);
4321  break;
4323  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4324  Ops.push_back(TexHandle);
4325  Ops.push_back(N->getOperand(2));
4326  Ops.push_back(N->getOperand(3));
4327  Ops.push_back(N->getOperand(4));
4328  Ops.push_back(Chain);
4329  break;
4331  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4332  Ops.push_back(TexHandle);
4333  Ops.push_back(N->getOperand(2));
4334  Ops.push_back(N->getOperand(3));
4335  Ops.push_back(N->getOperand(4));
4336  Ops.push_back(Chain);
4337  break;
4339  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4340  Ops.push_back(TexHandle);
4341  Ops.push_back(N->getOperand(2));
4342  Ops.push_back(N->getOperand(3));
4343  Ops.push_back(N->getOperand(4));
4344  Ops.push_back(Chain);
4345  break;
4347  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4348  Ops.push_back(TexHandle);
4349  Ops.push_back(N->getOperand(2));
4350  Ops.push_back(N->getOperand(3));
4351  Ops.push_back(N->getOperand(4));
4352  Ops.push_back(Chain);
4353  break;
4355  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4356  Ops.push_back(TexHandle);
4357  Ops.push_back(N->getOperand(2));
4358  Ops.push_back(N->getOperand(3));
4359  Ops.push_back(N->getOperand(4));
4360  Ops.push_back(Chain);
4361  break;
4363  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4364  Ops.push_back(TexHandle);
4365  Ops.push_back(N->getOperand(2));
4366  Ops.push_back(N->getOperand(3));
4367  Ops.push_back(N->getOperand(4));
4368  Ops.push_back(Chain);
4369  break;
4371  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4372  Ops.push_back(TexHandle);
4373  Ops.push_back(N->getOperand(2));
4374  Ops.push_back(N->getOperand(3));
4375  Ops.push_back(N->getOperand(4));
4376  Ops.push_back(Chain);
4377  break;
4379  Opc = NVPTX::SULD_3D_I8_TRAP;
4380  Ops.push_back(TexHandle);
4381  Ops.push_back(N->getOperand(2));
4382  Ops.push_back(N->getOperand(3));
4383  Ops.push_back(N->getOperand(4));
4384  Ops.push_back(Chain);
4385  break;
4387  Opc = NVPTX::SULD_3D_I16_TRAP;
4388  Ops.push_back(TexHandle);
4389  Ops.push_back(N->getOperand(2));
4390  Ops.push_back(N->getOperand(3));
4391  Ops.push_back(N->getOperand(4));
4392  Ops.push_back(Chain);
4393  break;
4395  Opc = NVPTX::SULD_3D_I32_TRAP;
4396  Ops.push_back(TexHandle);
4397  Ops.push_back(N->getOperand(2));
4398  Ops.push_back(N->getOperand(3));
4399  Ops.push_back(N->getOperand(4));
4400  Ops.push_back(Chain);
4401  break;
4403  Opc = NVPTX::SULD_3D_I64_TRAP;
4404  Ops.push_back(TexHandle);
4405  Ops.push_back(N->getOperand(2));
4406  Ops.push_back(N->getOperand(3));
4407  Ops.push_back(N->getOperand(4));
4408  Ops.push_back(Chain);
4409  break;
4411  Opc = NVPTX::SULD_3D_V2I8_TRAP;
4412  Ops.push_back(TexHandle);
4413  Ops.push_back(N->getOperand(2));
4414  Ops.push_back(N->getOperand(3));
4415  Ops.push_back(N->getOperand(4));
4416  Ops.push_back(Chain);
4417  break;
4419  Opc = NVPTX::SULD_3D_V2I16_TRAP;
4420  Ops.push_back(TexHandle);
4421  Ops.push_back(N->getOperand(2));
4422  Ops.push_back(N->getOperand(3));
4423  Ops.push_back(N->getOperand(4));
4424  Ops.push_back(Chain);
4425  break;
4427  Opc = NVPTX::SULD_3D_V2I32_TRAP;
4428  Ops.push_back(TexHandle);
4429  Ops.push_back(N->getOperand(2));
4430  Ops.push_back(N->getOperand(3));
4431  Ops.push_back(N->getOperand(4));
4432  Ops.push_back(Chain);
4433  break;
4435  Opc = NVPTX::SULD_3D_V2I64_TRAP;
4436  Ops.push_back(TexHandle);
4437  Ops.push_back(N->getOperand(2));
4438  Ops.push_back(N->getOperand(3));
4439  Ops.push_back(N->getOperand(4));
4440  Ops.push_back(Chain);
4441  break;
4443  Opc = NVPTX::SULD_3D_V4I8_TRAP;
4444  Ops.push_back(TexHandle);
4445  Ops.push_back(N->getOperand(2));
4446  Ops.push_back(N->getOperand(3));
4447  Ops.push_back(N->getOperand(4));
4448  Ops.push_back(Chain);
4449  break;
4451  Opc = NVPTX::SULD_3D_V4I16_TRAP;
4452  Ops.push_back(TexHandle);
4453  Ops.push_back(N->getOperand(2));
4454  Ops.push_back(N->getOperand(3));
4455  Ops.push_back(N->getOperand(4));
4456  Ops.push_back(Chain);
4457  break;
4459  Opc = NVPTX::SULD_3D_V4I32_TRAP;
4460  Ops.push_back(TexHandle);
4461  Ops.push_back(N->getOperand(2));
4462  Ops.push_back(N->getOperand(3));
4463  Ops.push_back(N->getOperand(4));
4464  Ops.push_back(Chain);
4465  break;
4467  Opc = NVPTX::SULD_1D_I8_ZERO;
4468  Ops.push_back(TexHandle);
4469  Ops.push_back(N->getOperand(2));
4470  Ops.push_back(Chain);
4471  break;
4473  Opc = NVPTX::SULD_1D_I16_ZERO;
4474  Ops.push_back(TexHandle);
4475  Ops.push_back(N->getOperand(2));
4476  Ops.push_back(Chain);
4477  break;
4479  Opc = NVPTX::SULD_1D_I32_ZERO;
4480  Ops.push_back(TexHandle);
4481  Ops.push_back(N->getOperand(2));
4482  Ops.push_back(Chain);
4483  break;
4485  Opc = NVPTX::SULD_1D_I64_ZERO;
4486  Ops.push_back(TexHandle);
4487  Ops.push_back(N->getOperand(2));
4488  Ops.push_back(Chain);
4489  break;
4491  Opc = NVPTX::SULD_1D_V2I8_ZERO;
4492  Ops.push_back(TexHandle);
4493  Ops.push_back(N->getOperand(2));
4494  Ops.push_back(Chain);
4495  break;
4497  Opc = NVPTX::SULD_1D_V2I16_ZERO;
4498  Ops.push_back(TexHandle);
4499  Ops.push_back(N->getOperand(2));
4500  Ops.push_back(Chain);
4501  break;
4503  Opc = NVPTX::SULD_1D_V2I32_ZERO;
4504  Ops.push_back(TexHandle);
4505  Ops.push_back(N->getOperand(2));
4506  Ops.push_back(Chain);
4507  break;
4509  Opc = NVPTX::SULD_1D_V2I64_ZERO;
4510  Ops.push_back(TexHandle);
4511  Ops.push_back(N->getOperand(2));
4512  Ops.push_back(Chain);
4513  break;
4515  Opc = NVPTX::SULD_1D_V4I8_ZERO;
4516  Ops.push_back(TexHandle);
4517  Ops.push_back(N->getOperand(2));
4518  Ops.push_back(Chain);
4519  break;
4521  Opc = NVPTX::SULD_1D_V4I16_ZERO;
4522  Ops.push_back(TexHandle);
4523  Ops.push_back(N->getOperand(2));
4524  Ops.push_back(Chain);
4525  break;
4527  Opc = NVPTX::SULD_1D_V4I32_ZERO;
4528  Ops.push_back(TexHandle);
4529  Ops.push_back(N->getOperand(2));
4530  Ops.push_back(Chain);
4531  break;
4533  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4534  Ops.push_back(TexHandle);
4535  Ops.push_back(N->getOperand(2));
4536  Ops.push_back(N->getOperand(3));
4537  Ops.push_back(Chain);
4538  break;
4540  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4541  Ops.push_back(TexHandle);
4542  Ops.push_back(N->getOperand(2));
4543  Ops.push_back(N->getOperand(3));
4544  Ops.push_back(Chain);
4545  break;
4547  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4548  Ops.push_back(TexHandle);
4549  Ops.push_back(N->getOperand(2));
4550  Ops.push_back(N->getOperand(3));
4551  Ops.push_back(Chain);
4552  break;
4554  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4555  Ops.push_back(TexHandle);
4556  Ops.push_back(N->getOperand(2));
4557  Ops.push_back(N->getOperand(3));
4558  Ops.push_back(Chain);
4559  break;
4561  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4562  Ops.push_back(TexHandle);
4563  Ops.push_back(N->getOperand(2));
4564  Ops.push_back(N->getOperand(3));
4565  Ops.push_back(Chain);
4566  break;
4568  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4569  Ops.push_back(TexHandle);
4570  Ops.push_back(N->getOperand(2));
4571  Ops.push_back(N->getOperand(3));
4572  Ops.push_back(Chain);
4573  break;
4575  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4576  Ops.push_back(TexHandle);
4577  Ops.push_back(N->getOperand(2));
4578  Ops.push_back(N->getOperand(3));
4579  Ops.push_back(Chain);
4580  break;
4582  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4583  Ops.push_back(TexHandle);
4584  Ops.push_back(N->getOperand(2));
4585  Ops.push_back(N->getOperand(3));
4586  Ops.push_back(Chain);
4587  break;
4589  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4590  Ops.push_back(TexHandle);
4591  Ops.push_back(N->getOperand(2));
4592  Ops.push_back(N->getOperand(3));
4593  Ops.push_back(Chain);
4594  break;
4596  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4597  Ops.push_back(TexHandle);
4598  Ops.push_back(N->getOperand(2));
4599  Ops.push_back(N->getOperand(3));
4600  Ops.push_back(Chain);
4601  break;
4603  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4604  Ops.push_back(TexHandle);
4605  Ops.push_back(N->getOperand(2));
4606  Ops.push_back(N->getOperand(3));
4607  Ops.push_back(Chain);
4608  break;
4610  Opc = NVPTX::SULD_2D_I8_ZERO;
4611  Ops.push_back(TexHandle);
4612  Ops.push_back(N->getOperand(2));
4613  Ops.push_back(N->getOperand(3));
4614  Ops.push_back(Chain);
4615  break;
4617  Opc = NVPTX::SULD_2D_I16_ZERO;
4618  Ops.push_back(TexHandle);
4619  Ops.push_back(N->getOperand(2));
4620  Ops.push_back(N->getOperand(3));
4621  Ops.push_back(Chain);
4622  break;
4624  Opc = NVPTX::SULD_2D_I32_ZERO;
4625  Ops.push_back(TexHandle);
4626  Ops.push_back(N->getOperand(2));
4627  Ops.push_back(N->getOperand(3));
4628  Ops.push_back(Chain);
4629  break;
4631  Opc = NVPTX::SULD_2D_I64_ZERO;
4632  Ops.push_back(TexHandle);
4633  Ops.push_back(N->getOperand(2));
4634  Ops.push_back(N->getOperand(3));
4635  Ops.push_back(Chain);
4636  break;
4638  Opc = NVPTX::SULD_2D_V2I8_ZERO;
4639  Ops.push_back(TexHandle);
4640  Ops.push_back(N->getOperand(2));
4641  Ops.push_back(N->getOperand(3));
4642  Ops.push_back(Chain);
4643  break;
4645  Opc = NVPTX::SULD_2D_V2I16_ZERO;
4646  Ops.push_back(TexHandle);
4647  Ops.push_back(N->getOperand(2));
4648  Ops.push_back(N->getOperand(3));
4649  Ops.push_back(Chain);
4650  break;
4652  Opc = NVPTX::SULD_2D_V2I32_ZERO;
4653  Ops.push_back(TexHandle);
4654  Ops.push_back(N->getOperand(2));
4655  Ops.push_back(N->getOperand(3));
4656  Ops.push_back(Chain);
4657  break;
4659  Opc = NVPTX::SULD_2D_V2I64_ZERO;
4660  Ops.push_back(TexHandle);
4661  Ops.push_back(N->getOperand(2));
4662  Ops.push_back(N->getOperand(3));
4663  Ops.push_back(Chain);
4664  break;
4666  Opc = NVPTX::SULD_2D_V4I8_ZERO;
4667  Ops.push_back(TexHandle);
4668  Ops.push_back(N->getOperand(2));
4669  Ops.push_back(N->getOperand(3));
4670  Ops.push_back(Chain);
4671  break;
4673  Opc = NVPTX::SULD_2D_V4I16_ZERO;
4674  Ops.push_back(TexHandle);
4675  Ops.push_back(N->getOperand(2));
4676  Ops.push_back(N->getOperand(3));
4677  Ops.push_back(Chain);
4678  break;
4680  Opc = NVPTX::SULD_2D_V4I32_ZERO;
4681  Ops.push_back(TexHandle);
4682  Ops.push_back(N->getOperand(2));
4683  Ops.push_back(N->getOperand(3));
4684  Ops.push_back(Chain);
4685  break;
4687  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4688  Ops.push_back(TexHandle);
4689  Ops.push_back(N->getOperand(2));
4690  Ops.push_back(N->getOperand(3));
4691  Ops.push_back(N->getOperand(4));
4692  Ops.push_back(Chain);
4693  break;
4695  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4696  Ops.push_back(TexHandle);
4697  Ops.push_back(N->getOperand(2));
4698  Ops.push_back(N->getOperand(3));
4699  Ops.push_back(N->getOperand(4));
4700  Ops.push_back(Chain);
4701  break;
4703  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4704  Ops.push_back(TexHandle);
4705  Ops.push_back(N->getOperand(2));
4706  Ops.push_back(N->getOperand(3));
4707  Ops.push_back(N->getOperand(4));
4708  Ops.push_back(Chain);
4709  break;
4711  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4712  Ops.push_back(TexHandle);
4713  Ops.push_back(N->getOperand(2));
4714  Ops.push_back(N->getOperand(3));
4715  Ops.push_back(N->getOperand(4));
4716  Ops.push_back(Chain);
4717  break;
4719  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4720  Ops.push_back(TexHandle);
4721  Ops.push_back(N->getOperand(2));
4722  Ops.push_back(N->getOperand(3));
4723  Ops.push_back(N->getOperand(4));
4724  Ops.push_back(Chain);
4725  break;
4727  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4728  Ops.push_back(TexHandle);
4729  Ops.push_back(N->getOperand(2));
4730  Ops.push_back(N->getOperand(3));
4731  Ops.push_back(N->getOperand(4));
4732  Ops.push_back(Chain);
4733  break;
4735  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4736  Ops.push_back(TexHandle);
4737  Ops.push_back(N->getOperand(2));
4738  Ops.push_back(N->getOperand(3));
4739  Ops.push_back(N->getOperand(4));
4740  Ops.push_back(Chain);
4741  break;
4743  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4744  Ops.push_back(TexHandle);
4745  Ops.push_back(N->getOperand(2));
4746  Ops.push_back(N->getOperand(3));
4747  Ops.push_back(N->getOperand(4));
4748  Ops.push_back(Chain);
4749  break;
4751  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4752  Ops.push_back(TexHandle);
4753  Ops.push_back(N->getOperand(2));
4754  Ops.push_back(N->getOperand(3));
4755  Ops.push_back(N->getOperand(4));
4756  Ops.push_back(Chain);
4757  break;
4759  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4760  Ops.push_back(TexHandle);
4761  Ops.push_back(N->getOperand(2));
4762  Ops.push_back(N->getOperand(3));
4763  Ops.push_back(N->getOperand(4));
4764  Ops.push_back(Chain);
4765  break;
4767  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4768  Ops.push_back(TexHandle);
4769  Ops.push_back(N->getOperand(2));
4770  Ops.push_back(N->getOperand(3));
4771  Ops.push_back(N->getOperand(4));
4772  Ops.push_back(Chain);
4773  break;
4775  Opc = NVPTX::SULD_3D_I8_ZERO;
4776  Ops.push_back(TexHandle);
4777  Ops.push_back(N->getOperand(2));
4778  Ops.push_back(N->getOperand(3));
4779  Ops.push_back(N->getOperand(4));
4780  Ops.push_back(Chain);
4781  break;
4783  Opc = NVPTX::SULD_3D_I16_ZERO;
4784  Ops.push_back(TexHandle);
4785  Ops.push_back(N->getOperand(2));
4786  Ops.push_back(N->getOperand(3));
4787  Ops.push_back(N->getOperand(4));
4788  Ops.push_back(Chain);
4789  break;
4791  Opc = NVPTX::SULD_3D_I32_ZERO;
4792  Ops.push_back(TexHandle);
4793  Ops.push_back(N->getOperand(2));
4794  Ops.push_back(N->getOperand(3));
4795  Ops.push_back(N->getOperand(4));
4796  Ops.push_back(Chain);
4797  break;
4799  Opc = NVPTX::SULD_3D_I64_ZERO;
4800  Ops.push_back(TexHandle);
4801  Ops.push_back(N->getOperand(2));
4802  Ops.push_back(N->getOperand(3));
4803  Ops.push_back(N->getOperand(4));
4804  Ops.push_back(Chain);
4805  break;
4807  Opc = NVPTX::SULD_3D_V2I8_ZERO;
4808  Ops.push_back(TexHandle);
4809  Ops.push_back(N->getOperand(2));
4810  Ops.push_back(N->getOperand(3));
4811  Ops.push_back(N->getOperand(4));
4812  Ops.push_back(Chain);
4813  break;
4815  Opc = NVPTX::SULD_3D_V2I16_ZERO;
4816  Ops.push_back(TexHandle);
4817  Ops.push_back(N->getOperand(2));
4818  Ops.push_back(N->getOperand(3));
4819  Ops.push_back(N->getOperand(4));
4820  Ops.push_back(Chain);
4821  break;
4823  Opc = NVPTX::SULD_3D_V2I32_ZERO;
4824  Ops.push_back(TexHandle);
4825  Ops.push_back(N->getOperand(2));
4826  Ops.push_back(N->getOperand(3));
4827  Ops.push_back(N->getOperand(4));
4828  Ops.push_back(Chain);
4829  break;
4831  Opc = NVPTX::SULD_3D_V2I64_ZERO;
4832  Ops.push_back(TexHandle);
4833  Ops.push_back(N->getOperand(2));
4834  Ops.push_back(N->getOperand(3));
4835  Ops.push_back(N->getOperand(4));
4836  Ops.push_back(Chain);
4837  break;
4839  Opc = NVPTX::SULD_3D_V4I8_ZERO;
4840  Ops.push_back(TexHandle);
4841  Ops.push_back(N->getOperand(2));
4842  Ops.push_back(N->getOperand(3));
4843  Ops.push_back(N->getOperand(4));
4844  Ops.push_back(Chain);
4845  break;
4847  Opc = NVPTX::SULD_3D_V4I16_ZERO;
4848  Ops.push_back(TexHandle);
4849  Ops.push_back(N->getOperand(2));
4850  Ops.push_back(N->getOperand(3));
4851  Ops.push_back(N->getOperand(4));
4852  Ops.push_back(Chain);
4853  break;
4855  Opc = NVPTX::SULD_3D_V4I32_ZERO;
4856  Ops.push_back(TexHandle);
4857  Ops.push_back(N->getOperand(2));
4858  Ops.push_back(N->getOperand(3));
4859  Ops.push_back(N->getOperand(4));
4860  Ops.push_back(Chain);
4861  break;
4862  }
4863  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
4864  return true;
4865 }
4866 
4867 
4868 /// SelectBFE - Look for instruction sequences that can be made more efficient
4869 /// by using the 'bfe' (bit-field extract) PTX instruction
4870 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4871  SDLoc DL(N);
4872  SDValue LHS = N->getOperand(0);
4873  SDValue RHS = N->getOperand(1);
4874  SDValue Len;
4875  SDValue Start;
4876  SDValue Val;
4877  bool IsSigned = false;
4878 
4879  if (N->getOpcode() == ISD::AND) {
4880  // Canonicalize the operands
4881  // We want 'and %val, %mask'
4882  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4883  std::swap(LHS, RHS);
4884  }
4885 
4887  if (!Mask) {
4888  // We need a constant mask on the RHS of the AND
4889  return false;
4890  }
4891 
4892  // Extract the mask bits
4893  uint64_t MaskVal = Mask->getZExtValue();
4894  if (!isMask_64(MaskVal)) {
4895  // We *could* handle shifted masks here, but doing so would require an
4896  // 'and' operation to fix up the low-order bits so we would trade
4897  // shr+and for bfe+and, which has the same throughput
4898  return false;
4899  }
4900 
4901  // How many bits are in our mask?
4902  uint64_t NumBits = countTrailingOnes(MaskVal);
4903  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4904 
4905  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4906  // We have a 'srl/and' pair, extract the effective start bit and length
4907  Val = LHS.getNode()->getOperand(0);
4908  Start = LHS.getNode()->getOperand(1);
4909  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4910  if (StartConst) {
4911  uint64_t StartVal = StartConst->getZExtValue();
4912  // How many "good" bits do we have left? "good" is defined here as bits
4913  // that exist in the original value, not shifted in.
4914  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
4915  if (NumBits > GoodBits) {
4916  // Do not handle the case where bits have been shifted in. In theory
4917  // we could handle this, but the cost is likely higher than just
4918  // emitting the srl/and pair.
4919  return false;
4920  }
4921  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4922  } else {
4923  // Do not handle the case where the shift amount (can be zero if no srl
4924  // was found) is not constant. We could handle this case, but it would
4925  // require run-time logic that would be more expensive than just
4926  // emitting the srl/and pair.
4927  return false;
4928  }
4929  } else {
4930  // Do not handle the case where the LHS of the and is not a shift. While
4931  // it would be trivial to handle this case, it would just transform
4932  // 'and' -> 'bfe', but 'and' has higher-throughput.
4933  return false;
4934  }
4935  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4936  if (LHS->getOpcode() == ISD::AND) {
4937  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4938  if (!ShiftCnst) {
4939  // Shift amount must be constant
4940  return false;
4941  }
4942 
4943  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4944 
4945  SDValue AndLHS = LHS->getOperand(0);
4946  SDValue AndRHS = LHS->getOperand(1);
4947 
4948  // Canonicalize the AND to have the mask on the RHS
4949  if (isa<ConstantSDNode>(AndLHS)) {
4950  std::swap(AndLHS, AndRHS);
4951  }
4952 
4953  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4954  if (!MaskCnst) {
4955  // Mask must be constant
4956  return false;
4957  }
4958 
4959  uint64_t MaskVal = MaskCnst->getZExtValue();
4960  uint64_t NumZeros;
4961  uint64_t NumBits;
4962  if (isMask_64(MaskVal)) {
4963  NumZeros = 0;
4964  // The number of bits in the result bitfield will be the number of
4965  // trailing ones (the AND) minus the number of bits we shift off
4966  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4967  } else if (isShiftedMask_64(MaskVal)) {
4968  NumZeros = countTrailingZeros(MaskVal);
4969  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4970  // The number of bits in the result bitfield will be the number of
4971  // trailing zeros plus the number of set bits in the mask minus the
4972  // number of bits we shift off
4973  NumBits = NumZeros + NumOnes - ShiftAmt;
4974  } else {
4975  // This is not a mask we can handle
4976  return false;
4977  }
4978 
4979  if (ShiftAmt < NumZeros) {
4980  // Handling this case would require extra logic that would make this
4981  // transformation non-profitable
4982  return false;
4983  }
4984 
4985  Val = AndLHS;
4986  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4987  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4988  } else if (LHS->getOpcode() == ISD::SHL) {
4989  // Here, we have a pattern like:
4990  //
4991  // (sra (shl val, NN), MM)
4992  // or
4993  // (srl (shl val, NN), MM)
4994  //
4995  // If MM >= NN, we can efficiently optimize this with bfe
4996  Val = LHS->getOperand(0);
4997 
4998  SDValue ShlRHS = LHS->getOperand(1);
4999  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
5000  if (!ShlCnst) {
5001  // Shift amount must be constant
5002  return false;
5003  }
5004  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
5005 
5006  SDValue ShrRHS = RHS;
5007  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
5008  if (!ShrCnst) {
5009  // Shift amount must be constant
5010  return false;
5011  }
5012  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5013 
5014  // To avoid extra codegen and be profitable, we need Outer >= Inner
5015  if (OuterShiftAmt < InnerShiftAmt) {
5016  return false;
5017  }
5018 
5019  // If the outer shift is more than the type size, we have no bitfield to
5020  // extract (since we also check that the inner shift is <= the outer shift
5021  // then this also implies that the inner shift is < the type size)
5022  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
5023  return false;
5024  }
5025 
5026  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
5027  MVT::i32);
5028  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
5029  DL, MVT::i32);
5030 
5031  if (N->getOpcode() == ISD::SRA) {
5032  // If we have a arithmetic right shift, we need to use the signed bfe
5033  // variant
5034  IsSigned = true;
5035  }
5036  } else {
5037  // No can do...
5038  return false;
5039  }
5040  } else {
5041  // No can do...
5042  return false;
5043  }
5044 
5045 
5046  unsigned Opc;
5047  // For the BFE operations we form here from "and" and "srl", always use the
5048  // unsigned variants.
5049  if (Val.getValueType() == MVT::i32) {
5050  if (IsSigned) {
5051  Opc = NVPTX::BFE_S32rii;
5052  } else {
5053  Opc = NVPTX::BFE_U32rii;
5054  }
5055  } else if (Val.getValueType() == MVT::i64) {
5056  if (IsSigned) {
5057  Opc = NVPTX::BFE_S64rii;
5058  } else {
5059  Opc = NVPTX::BFE_U64rii;
5060  }
5061  } else {
5062  // We cannot handle this type
5063  return false;
5064  }
5065 
5066  SDValue Ops[] = {
5067  Val, Start, Len
5068  };
5069 
5070  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5071  return true;
5072 }
5073 
5074 // SelectDirectAddr - Match a direct address for DAG.
5075 // A direct address could be a globaladdress or externalsymbol.
5076 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5077  // Return true if TGA or ES.
5078  if (N.getOpcode() == ISD::TargetGlobalAddress ||
5080  Address = N;
5081  return true;
5082  }
5083  if (N.getOpcode() == NVPTXISD::Wrapper) {
5084  Address = N.getOperand(0);
5085  return true;
5086  }
5087  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
5088  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
5089  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
5091  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
5092  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
5093  }
5094  return false;
5095 }
5096 
5097 // symbol+offset
5098 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5099  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5100  if (Addr.getOpcode() == ISD::ADD) {
5101  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5102  SDValue base = Addr.getOperand(0);
5103  if (SelectDirectAddr(base, Base)) {
5104  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5105  mvt);
5106  return true;
5107  }
5108  }
5109  }
5110  return false;
5111 }
5112 
5113 // symbol+offset
5114 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5115  SDValue &Base, SDValue &Offset) {
5116  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5117 }
5118 
5119 // symbol+offset
5120 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5121  SDValue &Base, SDValue &Offset) {
5122  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5123 }
5124 
5125 // register+offset
5126 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5127  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5128  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5129  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5130  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5131  return true;
5132  }
5133  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5135  return false; // direct calls.
5136 
5137  if (Addr.getOpcode() == ISD::ADD) {
5138  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5139  return false;
5140  }
5141  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5142  if (FrameIndexSDNode *FIN =
5143  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5144  // Constant offset from frame ref.
5145  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5146  else
5147  Base = Addr.getOperand(0);
5148  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5149  mvt);
5150  return true;
5151  }
5152  }
5153  return false;
5154 }
5155 
5156 // register+offset
5157 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5158  SDValue &Base, SDValue &Offset) {
5159  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5160 }
5161 
5162 // register+offset
5163 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5164  SDValue &Base, SDValue &Offset) {
5165  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5166 }
5167 
5168 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5169  unsigned int spN) const {
5170  const Value *Src = nullptr;
5171  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5172  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5173  return true;
5174  Src = mN->getMemOperand()->getValue();
5175  }
5176  if (!Src)
5177  return false;
5178  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5179  return (PT->getAddressSpace() == spN);
5180  return false;
5181 }
5182 
5183 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5184 /// inline asm expressions.
5186  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5187  SDValue Op0, Op1;
5188  switch (ConstraintID) {
5189  default:
5190  return true;
5191  case InlineAsm::Constraint_m: // memory
5192  if (SelectDirectAddr(Op, Op0)) {
5193  OutOps.push_back(Op0);
5194  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5195  return false;
5196  }
5197  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5198  OutOps.push_back(Op0);
5199  OutOps.push_back(Op1);
5200  return false;
5201  }
5202  break;
5203  }
5204  return true;
5205 }
5206 
5207 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5208 /// conversion from \p SrcTy to \p DestTy.
5209 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5210  bool IsSigned) {
5211  switch (SrcTy.SimpleTy) {
5212  default:
5213  llvm_unreachable("Unhandled source type");
5214  case MVT::i8:
5215  switch (DestTy.SimpleTy) {
5216  default:
5217  llvm_unreachable("Unhandled dest type");
5218  case MVT::i16:
5219  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5220  case MVT::i32:
5221  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5222  case MVT::i64:
5223  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5224  }
5225  case MVT::i16:
5226  switch (DestTy.SimpleTy) {
5227  default:
5228  llvm_unreachable("Unhandled dest type");
5229  case MVT::i8:
5230  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5231  case MVT::i32:
5232  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5233  case MVT::i64:
5234  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5235  }
5236  case MVT::i32:
5237  switch (DestTy.SimpleTy) {
5238  default:
5239  llvm_unreachable("Unhandled dest type");
5240  case MVT::i8:
5241  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5242  case MVT::i16:
5243  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5244  case MVT::i64:
5245  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5246  }
5247  case MVT::i64:
5248  switch (DestTy.SimpleTy) {
5249  default:
5250  llvm_unreachable("Unhandled dest type");
5251  case MVT::i8:
5252  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5253  case MVT::i16:
5254  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5255  case MVT::i32:
5256  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5257  }
5258  }
5259 }
bool hasLDG() const
LLVM Argument representation.
Definition: Argument.h:34
size_t i
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned getDestAddressSpace() const
SDVTList getVTList() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
unsigned getSrcAddressSpace() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getSizeInBits() const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned Num) const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:234
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
void setNodeId(int Id)
Set unique node id.
const NVPTXSubtarget * Subtarget
MachineFunction * MF
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Shift and rotation operations.
Definition: ISDOpcodes.h:344
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
SimpleValueType SimpleTy
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isKernelFunction(const Function &F)
#define F(x, y, z)
Definition: MD5.cpp:51
constexpr bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:380
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:535
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
SDNode * getNode() const
get the SDNode which holds the desired result
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
CodeGenOpt::Level OptLevel
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
unsigned getVectorNumElements() const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
bool isVector() const
isVector - Return true if this is a vector value type.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const NVPTXTargetLowering * getTargetLowering() const override
static cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
uint32_t Offset
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
bool isVolatile() const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInvariant() const
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:504
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const Value * getValue() const
Return the base address of the memory access.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
static cl::opt< int > UsePrecDivF32("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"" IEEE Compliant F32 div.rnd if available."), cl::init(2))
NVPTXTargetMachine.
constexpr bool isShiftedMask_64(uint64_t Value)
isShiftedMask_64 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:393
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
#define N
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
EVT getValueType() const
Return the ValueType of the referenced return value.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:178
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool isVolatile(Instruction *Inst)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.