LLVM  6.0.0svn
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
20 #include "llvm/Support/Debug.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 /// createNVPTXISelDag - This pass converts a legalized DAG into a
30 /// NVPTX-specific DAG, ready for instruction scheduling.
32  llvm::CodeGenOpt::Level OptLevel) {
33  return new NVPTXDAGToDAGISel(TM, OptLevel);
34 }
35 
37  CodeGenOpt::Level OptLevel)
38  : SelectionDAGISel(tm, OptLevel), TM(tm) {
39  doMulWide = (OptLevel > 0);
40 }
41 
43  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45 }
46 
47 int NVPTXDAGToDAGISel::getDivF32Level() const {
49 }
50 
51 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
53 }
54 
55 bool NVPTXDAGToDAGISel::useF32FTZ() const {
57 }
58 
59 bool NVPTXDAGToDAGISel::allowFMA() const {
61  return TL->allowFMA(*MF, OptLevel);
62 }
63 
64 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66  return TL->allowUnsafeFPMath(*MF);
67 }
68 
69 /// Select - Select instructions not customized! Used for
70 /// expanded, promoted and normal instructions.
71 void NVPTXDAGToDAGISel::Select(SDNode *N) {
72 
73  if (N->isMachineOpcode()) {
74  N->setNodeId(-1);
75  return; // Already selected.
76  }
77 
78  switch (N->getOpcode()) {
79  case ISD::LOAD:
80  if (tryLoad(N))
81  return;
82  break;
83  case ISD::STORE:
84  if (tryStore(N))
85  return;
86  break;
88  if (tryEXTRACT_VECTOR_ELEMENT(N))
89  return;
90  break;
92  SelectSETP_F16X2(N);
93  return;
94 
95  case NVPTXISD::LoadV2:
96  case NVPTXISD::LoadV4:
97  if (tryLoadVector(N))
98  return;
99  break;
100  case NVPTXISD::LDGV2:
101  case NVPTXISD::LDGV4:
102  case NVPTXISD::LDUV2:
103  case NVPTXISD::LDUV4:
104  if (tryLDGLDU(N))
105  return;
106  break;
107  case NVPTXISD::StoreV2:
108  case NVPTXISD::StoreV4:
109  if (tryStoreVector(N))
110  return;
111  break;
112  case NVPTXISD::LoadParam:
115  if (tryLoadParam(N))
116  return;
117  break;
121  if (tryStoreRetval(N))
122  return;
123  break;
129  if (tryStoreParam(N))
130  return;
131  break;
133  if (tryIntrinsicNoChain(N))
134  return;
135  break;
137  if (tryIntrinsicChain(N))
138  return;
139  break;
308  if (tryTextureIntrinsic(N))
309  return;
310  break;
476  if (trySurfaceIntrinsic(N))
477  return;
478  break;
479  case ISD::AND:
480  case ISD::SRA:
481  case ISD::SRL:
482  // Try to select BFE
483  if (tryBFE(N))
484  return;
485  break;
486  case ISD::ADDRSPACECAST:
487  SelectAddrSpaceCast(N);
488  return;
489  case ISD::ConstantFP:
490  if (tryConstantFP16(N))
491  return;
492  break;
493  default:
494  break;
495  }
496  SelectCode(N);
497 }
498 
499 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
500  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501  switch (IID) {
502  default:
503  return false;
504  case Intrinsic::nvvm_ldg_global_f:
505  case Intrinsic::nvvm_ldg_global_i:
506  case Intrinsic::nvvm_ldg_global_p:
507  case Intrinsic::nvvm_ldu_global_f:
508  case Intrinsic::nvvm_ldu_global_i:
509  case Intrinsic::nvvm_ldu_global_p:
510  return tryLDGLDU(N);
511  }
512 }
513 
514 // There's no way to specify FP16 immediates in .f16 ops, so we have to
515 // load them into an .f16 register first.
516 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517  if (N->getValueType(0) != MVT::f16)
518  return false;
520  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521  SDNode *LoadConstF16 =
522  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523  ReplaceNode(N, LoadConstF16);
524  return true;
525 }
526 
527 // Map ISD:CONDCODE value to appropriate CmpMode expected by
528 // NVPTXInstPrinter::printCmpMode()
529 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
531  unsigned PTXCmpMode = [](ISD::CondCode CC) {
532  switch (CC) {
533  default:
534  llvm_unreachable("Unexpected condition code.");
535  case ISD::SETOEQ:
536  return CmpMode::EQ;
537  case ISD::SETOGT:
538  return CmpMode::GT;
539  case ISD::SETOGE:
540  return CmpMode::GE;
541  case ISD::SETOLT:
542  return CmpMode::LT;
543  case ISD::SETOLE:
544  return CmpMode::LE;
545  case ISD::SETONE:
546  return CmpMode::NE;
547  case ISD::SETO:
548  return CmpMode::NUM;
549  case ISD::SETUO:
550  return CmpMode::NotANumber;
551  case ISD::SETUEQ:
552  return CmpMode::EQU;
553  case ISD::SETUGT:
554  return CmpMode::GTU;
555  case ISD::SETUGE:
556  return CmpMode::GEU;
557  case ISD::SETULT:
558  return CmpMode::LTU;
559  case ISD::SETULE:
560  return CmpMode::LEU;
561  case ISD::SETUNE:
562  return CmpMode::NEU;
563  case ISD::SETEQ:
564  return CmpMode::EQ;
565  case ISD::SETGT:
566  return CmpMode::GT;
567  case ISD::SETGE:
568  return CmpMode::GE;
569  case ISD::SETLT:
570  return CmpMode::LT;
571  case ISD::SETLE:
572  return CmpMode::LE;
573  case ISD::SETNE:
574  return CmpMode::NE;
575  }
576  }(CondCode.get());
577 
578  if (FTZ)
579  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580 
581  return PTXCmpMode;
582 }
583 
584 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585  unsigned PTXCmpMode =
586  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587  SDLoc DL(N);
588  SDNode *SetP = CurDAG->getMachineNode(
589  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591  ReplaceNode(N, SetP);
592  return true;
593 }
594 
595 // Find all instances of extract_vector_elt that use this v2f16 vector
596 // and coalesce them into a scattering move instruction.
597 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598  SDValue Vector = N->getOperand(0);
599 
600  // We only care about f16x2 as it's the only real vector type we
601  // need to deal with.
602  if (Vector.getSimpleValueType() != MVT::v2f16)
603  return false;
604 
605  // Find and record all uses of this vector that extract element 0 or 1.
607  for (const auto &U : Vector.getNode()->uses()) {
608  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609  continue;
610  if (U->getOperand(0) != Vector)
611  continue;
612  if (const ConstantSDNode *IdxConst =
613  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614  if (IdxConst->getZExtValue() == 0)
615  E0.push_back(U);
616  else if (IdxConst->getZExtValue() == 1)
617  E1.push_back(U);
618  else
619  llvm_unreachable("Invalid vector index.");
620  }
621  }
622 
623  // There's no point scattering f16x2 if we only ever access one
624  // element of it.
625  if (E0.empty() || E1.empty())
626  return false;
627 
628  unsigned Op = NVPTX::SplitF16x2;
629  // If the vector has been BITCAST'ed from i32, we can use original
630  // value directly and avoid register-to-register move.
631  SDValue Source = Vector;
632  if (Vector->getOpcode() == ISD::BITCAST) {
633  Op = NVPTX::SplitI32toF16x2;
634  Source = Vector->getOperand(0);
635  }
636  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637  // into f16,f16 SplitF16x2(V)
638  SDNode *ScatterOp =
639  CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640  for (auto *Node : E0)
641  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642  for (auto *Node : E1)
643  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644 
645  return true;
646 }
647 
648 static unsigned int getCodeAddrSpace(MemSDNode *N) {
649  const Value *Src = N->getMemOperand()->getValue();
650 
651  if (!Src)
653 
654  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
655  switch (PT->getAddressSpace()) {
662  default: break;
663  }
664  }
666 }
667 
669  unsigned CodeAddrSpace, MachineFunction *F) {
670  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671  // space.
672  //
673  // We have two ways of identifying invariant loads: Loads may be explicitly
674  // marked as invariant, or we may infer them to be invariant.
675  //
676  // We currently infer invariance only for kernel function pointer params that
677  // are noalias (i.e. __restrict) and never written to.
678  //
679  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
680  // not during the SelectionDAG phase).
681  //
682  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
683  // explicitly invariant loads because these are how clang tells us to use ldg
684  // when the user uses a builtin.
685  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
686  return false;
687 
688  if (N->isInvariant())
689  return true;
690 
691  // Load wasn't explicitly invariant. Attempt to infer invariance.
692  if (!isKernelFunction(*F->getFunction()))
693  return false;
694 
695  // We use GetUnderlyingObjects() here instead of
696  // GetUnderlyingObject() mainly because the former looks through phi
697  // nodes while the latter does not. We need to look through phi
698  // nodes to handle pointer induction variables.
700  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
701  Objs, F->getDataLayout());
702  for (Value *Obj : Objs) {
703  auto *A = dyn_cast<const Argument>(Obj);
704  if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
705  }
706 
707  return true;
708 }
709 
710 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
711  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
712  switch (IID) {
713  default:
714  return false;
715  case Intrinsic::nvvm_texsurf_handle_internal:
716  SelectTexSurfHandle(N);
717  return true;
718  }
719 }
720 
721 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
722  // Op 0 is the intrinsic ID
723  SDValue Wrapper = N->getOperand(1);
724  SDValue GlobalVal = Wrapper.getOperand(0);
725  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
726  MVT::i64, GlobalVal));
727 }
728 
729 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
730  SDValue Src = N->getOperand(0);
731  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
732  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
733  unsigned DstAddrSpace = CastN->getDestAddressSpace();
734 
735  assert(SrcAddrSpace != DstAddrSpace &&
736  "addrspacecast must be between different address spaces");
737 
738  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
739  // Specific to generic
740  unsigned Opc;
741  switch (SrcAddrSpace) {
742  default: report_fatal_error("Bad address space in addrspacecast");
744  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
745  break;
747  Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
748  break;
749  case ADDRESS_SPACE_CONST:
750  Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
751  break;
752  case ADDRESS_SPACE_LOCAL:
753  Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
754  break;
755  }
757  Src));
758  return;
759  } else {
760  // Generic to specific
761  if (SrcAddrSpace != 0)
762  report_fatal_error("Cannot cast between two non-generic address spaces");
763  unsigned Opc;
764  switch (DstAddrSpace) {
765  default: report_fatal_error("Bad address space in addrspacecast");
767  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
768  : NVPTX::cvta_to_global_yes;
769  break;
771  Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
772  : NVPTX::cvta_to_shared_yes;
773  break;
774  case ADDRESS_SPACE_CONST:
775  Opc =
776  TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
777  break;
778  case ADDRESS_SPACE_LOCAL:
779  Opc =
780  TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
781  break;
782  case ADDRESS_SPACE_PARAM:
783  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
784  : NVPTX::nvvm_ptr_gen_to_param;
785  break;
786  }
788  Src));
789  return;
790  }
791 }
792 
793 // Helper function template to reduce amount of boilerplate code for
794 // opcode selection.
796  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
797  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
798  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
799  switch (VT) {
800  case MVT::i1:
801  case MVT::i8:
802  return Opcode_i8;
803  case MVT::i16:
804  return Opcode_i16;
805  case MVT::i32:
806  return Opcode_i32;
807  case MVT::i64:
808  return Opcode_i64;
809  case MVT::f16:
810  return Opcode_f16;
811  case MVT::v2f16:
812  return Opcode_f16x2;
813  case MVT::f32:
814  return Opcode_f32;
815  case MVT::f64:
816  return Opcode_f64;
817  default:
818  return None;
819  }
820 }
821 
822 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
823  SDLoc dl(N);
824  LoadSDNode *LD = cast<LoadSDNode>(N);
825  EVT LoadedVT = LD->getMemoryVT();
826  SDNode *NVPTXLD = nullptr;
827 
828  // do not support pre/post inc/dec
829  if (LD->isIndexed())
830  return false;
831 
832  if (!LoadedVT.isSimple())
833  return false;
834 
835  // Address Space Setting
836  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
837 
838  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
839  return tryLDGLDU(N);
840  }
841 
842  // Volatile Setting
843  // - .volatile is only availalble for .global and .shared
844  bool isVolatile = LD->isVolatile();
845  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
846  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
847  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
848  isVolatile = false;
849 
850  // Type Setting: fromType + fromTypeWidth
851  //
852  // Sign : ISD::SEXTLOAD
853  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
854  // type is integer
855  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
856  MVT SimpleVT = LoadedVT.getSimpleVT();
857  MVT ScalarVT = SimpleVT.getScalarType();
858  // Read at least 8 bits (predicates are stored as 8-bit values)
859  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
860  unsigned int fromType;
861 
862  // Vector Setting
863  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
864  if (SimpleVT.isVector()) {
865  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
866  // v2f16 is loaded using ld.b32
867  fromTypeWidth = 32;
868  }
869 
870  if ((LD->getExtensionType() == ISD::SEXTLOAD))
872  else if (ScalarVT.isFloatingPoint())
873  // f16 uses .b16 as its storage type.
874  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
876  else
878 
879  // Create the machine instruction DAG
880  SDValue Chain = N->getOperand(0);
881  SDValue N1 = N->getOperand(1);
882  SDValue Addr;
884  Optional<unsigned> Opcode;
886 
887  if (SelectDirectAddr(N1, Addr)) {
888  Opcode = pickOpcodeForVT(
889  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
890  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
891  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
892  if (!Opcode)
893  return false;
894  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
895  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
896  getI32Imm(fromTypeWidth, dl), Addr, Chain };
897  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
898  MVT::Other, Ops);
899  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
900  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
901  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
902  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
903  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
904  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
905  if (!Opcode)
906  return false;
907  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
908  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
909  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
910  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
911  MVT::Other, Ops);
912  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
913  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
914  if (TM.is64Bit())
915  Opcode = pickOpcodeForVT(
916  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
917  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
918  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
919  else
920  Opcode = pickOpcodeForVT(
921  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
922  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
923  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
924  if (!Opcode)
925  return false;
926  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
927  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
928  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
929  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
930  MVT::Other, Ops);
931  } else {
932  if (TM.is64Bit())
933  Opcode = pickOpcodeForVT(
934  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
935  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
936  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
937  NVPTX::LD_f64_areg_64);
938  else
939  Opcode = pickOpcodeForVT(
940  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
941  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
942  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
943  if (!Opcode)
944  return false;
945  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
946  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
947  getI32Imm(fromTypeWidth, dl), N1, Chain };
948  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
949  MVT::Other, Ops);
950  }
951 
952  if (!NVPTXLD)
953  return false;
954 
956  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
957  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
958 
959  ReplaceNode(N, NVPTXLD);
960  return true;
961 }
962 
963 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
964 
965  SDValue Chain = N->getOperand(0);
966  SDValue Op1 = N->getOperand(1);
967  SDValue Addr, Offset, Base;
968  Optional<unsigned> Opcode;
969  SDLoc DL(N);
970  SDNode *LD;
971  MemSDNode *MemSD = cast<MemSDNode>(N);
972  EVT LoadedVT = MemSD->getMemoryVT();
973 
974  if (!LoadedVT.isSimple())
975  return false;
976 
977  // Address Space Setting
978  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
979 
980  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
981  return tryLDGLDU(N);
982  }
983 
984  // Volatile Setting
985  // - .volatile is only availalble for .global and .shared
986  bool IsVolatile = MemSD->isVolatile();
987  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
988  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
989  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
990  IsVolatile = false;
991 
992  // Vector Setting
993  MVT SimpleVT = LoadedVT.getSimpleVT();
994 
995  // Type Setting: fromType + fromTypeWidth
996  //
997  // Sign : ISD::SEXTLOAD
998  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
999  // type is integer
1000  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1001  MVT ScalarVT = SimpleVT.getScalarType();
1002  // Read at least 8 bits (predicates are stored as 8-bit values)
1003  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
1004  unsigned int FromType;
1005  // The last operand holds the original LoadSDNode::getExtensionType() value
1006  unsigned ExtensionType = cast<ConstantSDNode>(
1007  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1008  if (ExtensionType == ISD::SEXTLOAD)
1009  FromType = NVPTX::PTXLdStInstCode::Signed;
1010  else if (ScalarVT.isFloatingPoint())
1011  FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1013  else
1015 
1016  unsigned VecType;
1017 
1018  switch (N->getOpcode()) {
1019  case NVPTXISD::LoadV2:
1021  break;
1022  case NVPTXISD::LoadV4:
1024  break;
1025  default:
1026  return false;
1027  }
1028 
1029  EVT EltVT = N->getValueType(0);
1030 
1031  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1032  // instruction. Instead, we split the vector into v2f16 chunks and
1033  // load them with ld.v4.b32.
1034  if (EltVT == MVT::v2f16) {
1035  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1036  EltVT = MVT::i32;
1038  FromTypeWidth = 32;
1039  }
1040 
1041  if (SelectDirectAddr(Op1, Addr)) {
1042  switch (N->getOpcode()) {
1043  default:
1044  return false;
1045  case NVPTXISD::LoadV2:
1046  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1047  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1048  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1049  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1050  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1051  break;
1052  case NVPTXISD::LoadV4:
1053  Opcode =
1054  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1055  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1056  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1057  NVPTX::LDV_f32_v4_avar, None);
1058  break;
1059  }
1060  if (!Opcode)
1061  return false;
1062  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1063  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1064  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1065  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1066  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1067  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1068  switch (N->getOpcode()) {
1069  default:
1070  return false;
1071  case NVPTXISD::LoadV2:
1072  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1073  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1074  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1075  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1076  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1077  break;
1078  case NVPTXISD::LoadV4:
1079  Opcode =
1080  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1081  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1082  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1083  NVPTX::LDV_f32_v4_asi, None);
1084  break;
1085  }
1086  if (!Opcode)
1087  return false;
1088  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1089  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1090  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1091  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1092  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1093  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1094  if (TM.is64Bit()) {
1095  switch (N->getOpcode()) {
1096  default:
1097  return false;
1098  case NVPTXISD::LoadV2:
1099  Opcode = pickOpcodeForVT(
1100  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1101  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1102  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1103  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1104  NVPTX::LDV_f64_v2_ari_64);
1105  break;
1106  case NVPTXISD::LoadV4:
1107  Opcode = pickOpcodeForVT(
1108  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1109  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1110  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1111  NVPTX::LDV_f32_v4_ari_64, None);
1112  break;
1113  }
1114  } else {
1115  switch (N->getOpcode()) {
1116  default:
1117  return false;
1118  case NVPTXISD::LoadV2:
1119  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1120  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1121  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1122  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1123  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1124  break;
1125  case NVPTXISD::LoadV4:
1126  Opcode =
1127  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1128  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1129  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1130  NVPTX::LDV_f32_v4_ari, None);
1131  break;
1132  }
1133  }
1134  if (!Opcode)
1135  return false;
1136  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1137  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1138  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1139 
1140  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1141  } else {
1142  if (TM.is64Bit()) {
1143  switch (N->getOpcode()) {
1144  default:
1145  return false;
1146  case NVPTXISD::LoadV2:
1147  Opcode = pickOpcodeForVT(
1148  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1149  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1150  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1151  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1152  NVPTX::LDV_f64_v2_areg_64);
1153  break;
1154  case NVPTXISD::LoadV4:
1155  Opcode = pickOpcodeForVT(
1156  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1157  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1158  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1159  NVPTX::LDV_f32_v4_areg_64, None);
1160  break;
1161  }
1162  } else {
1163  switch (N->getOpcode()) {
1164  default:
1165  return false;
1166  case NVPTXISD::LoadV2:
1167  Opcode =
1168  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1169  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1170  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1171  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1172  NVPTX::LDV_f64_v2_areg);
1173  break;
1174  case NVPTXISD::LoadV4:
1175  Opcode = pickOpcodeForVT(
1176  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1177  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1178  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1179  NVPTX::LDV_f32_v4_areg, None);
1180  break;
1181  }
1182  }
1183  if (!Opcode)
1184  return false;
1185  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1188  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1189  }
1190 
1192  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1193  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1194 
1195  ReplaceNode(N, LD);
1196  return true;
1197 }
1198 
1199 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1200 
1201  SDValue Chain = N->getOperand(0);
1202  SDValue Op1;
1203  MemSDNode *Mem;
1204  bool IsLDG = true;
1205 
1206  // If this is an LDG intrinsic, the address is the third operand. If its an
1207  // LDG/LDU SD node (from custom vector handling), then its the second operand
1208  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1209  Op1 = N->getOperand(2);
1210  Mem = cast<MemIntrinsicSDNode>(N);
1211  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1212  switch (IID) {
1213  default:
1214  return false;
1215  case Intrinsic::nvvm_ldg_global_f:
1216  case Intrinsic::nvvm_ldg_global_i:
1217  case Intrinsic::nvvm_ldg_global_p:
1218  IsLDG = true;
1219  break;
1220  case Intrinsic::nvvm_ldu_global_f:
1221  case Intrinsic::nvvm_ldu_global_i:
1222  case Intrinsic::nvvm_ldu_global_p:
1223  IsLDG = false;
1224  break;
1225  }
1226  } else {
1227  Op1 = N->getOperand(1);
1228  Mem = cast<MemSDNode>(N);
1229  }
1230 
1231  Optional<unsigned> Opcode;
1232  SDLoc DL(N);
1233  SDNode *LD;
1234  SDValue Base, Offset, Addr;
1235 
1236  EVT EltVT = Mem->getMemoryVT();
1237  unsigned NumElts = 1;
1238  if (EltVT.isVector()) {
1239  NumElts = EltVT.getVectorNumElements();
1240  EltVT = EltVT.getVectorElementType();
1241  }
1242 
1243  // Build the "promoted" result VTList for the load. If we are really loading
1244  // i8s, then the return type will be promoted to i16 since we do not expose
1245  // 8-bit registers in NVPTX.
1246  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1247  SmallVector<EVT, 5> InstVTs;
1248  for (unsigned i = 0; i != NumElts; ++i) {
1249  InstVTs.push_back(NodeVT);
1250  }
1251  InstVTs.push_back(MVT::Other);
1252  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1253 
1254  if (SelectDirectAddr(Op1, Addr)) {
1255  switch (N->getOpcode()) {
1256  default:
1257  return false;
1259  if (IsLDG)
1260  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1261  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1262  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1263  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1264  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1265  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1266  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1267  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1268  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1269  else
1270  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1271  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1272  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1273  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1274  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1275  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1276  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1277  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1278  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1279  break;
1280  case NVPTXISD::LDGV2:
1281  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1282  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1283  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1284  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1285  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1286  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1287  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1288  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1289  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1290  break;
1291  case NVPTXISD::LDUV2:
1292  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1293  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1294  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1295  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1296  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1297  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1298  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1299  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1300  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1301  break;
1302  case NVPTXISD::LDGV4:
1303  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1304  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1305  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1306  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1307  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1308  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1309  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1310  break;
1311  case NVPTXISD::LDUV4:
1312  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1313  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1314  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1315  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1316  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1317  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1318  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1319  break;
1320  }
1321  if (!Opcode)
1322  return false;
1323  SDValue Ops[] = { Addr, Chain };
1324  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1325  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1326  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1327  if (TM.is64Bit()) {
1328  switch (N->getOpcode()) {
1329  default:
1330  return false;
1331  case ISD::LOAD:
1333  if (IsLDG)
1334  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1335  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1336  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1337  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1338  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1339  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1340  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1341  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1342  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1343  else
1344  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1346  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1347  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1348  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1349  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1350  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1351  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1352  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1353  break;
1354  case NVPTXISD::LoadV2:
1355  case NVPTXISD::LDGV2:
1356  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1357  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1358  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1359  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1360  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1361  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1362  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1363  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1364  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1365  break;
1366  case NVPTXISD::LDUV2:
1367  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1368  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1369  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1370  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1371  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1372  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1373  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1374  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1375  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1376  break;
1377  case NVPTXISD::LoadV4:
1378  case NVPTXISD::LDGV4:
1379  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1380  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1381  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1382  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1383  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1384  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1385  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1386  break;
1387  case NVPTXISD::LDUV4:
1388  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1390  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1391  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1392  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1393  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1394  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1395  break;
1396  }
1397  } else {
1398  switch (N->getOpcode()) {
1399  default:
1400  return false;
1401  case ISD::LOAD:
1403  if (IsLDG)
1404  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1405  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1406  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1407  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1408  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1409  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1410  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1411  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1412  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1413  else
1414  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1415  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1416  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1417  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1418  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1419  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1420  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1421  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1422  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1423  break;
1424  case NVPTXISD::LoadV2:
1425  case NVPTXISD::LDGV2:
1426  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1427  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1428  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1429  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1430  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1431  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1432  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1433  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1434  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1435  break;
1436  case NVPTXISD::LDUV2:
1437  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1438  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1439  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1440  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1441  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1442  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1443  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1444  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1445  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1446  break;
1447  case NVPTXISD::LoadV4:
1448  case NVPTXISD::LDGV4:
1449  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1450  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1451  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1452  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1453  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1454  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1455  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1456  break;
1457  case NVPTXISD::LDUV4:
1458  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1459  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1460  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1461  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1462  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1463  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1464  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1465  break;
1466  }
1467  }
1468  if (!Opcode)
1469  return false;
1470  SDValue Ops[] = {Base, Offset, Chain};
1471  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1472  } else {
1473  if (TM.is64Bit()) {
1474  switch (N->getOpcode()) {
1475  default:
1476  return false;
1477  case ISD::LOAD:
1479  if (IsLDG)
1480  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1481  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1482  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1483  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1484  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1485  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1486  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1487  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1488  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1489  else
1490  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1492  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1493  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1494  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1495  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1496  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1497  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1498  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1499  break;
1500  case NVPTXISD::LoadV2:
1501  case NVPTXISD::LDGV2:
1502  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1503  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1504  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1505  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1506  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1507  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1508  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1509  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1510  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1511  break;
1512  case NVPTXISD::LDUV2:
1513  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1514  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1515  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1516  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1517  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1518  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1519  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1520  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1521  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1522  break;
1523  case NVPTXISD::LoadV4:
1524  case NVPTXISD::LDGV4:
1525  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1526  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1527  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1528  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1529  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1530  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1531  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1532  break;
1533  case NVPTXISD::LDUV4:
1534  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1535  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1536  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1537  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1538  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1539  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1540  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1541  break;
1542  }
1543  } else {
1544  switch (N->getOpcode()) {
1545  default:
1546  return false;
1547  case ISD::LOAD:
1549  if (IsLDG)
1550  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1551  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1552  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1553  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1554  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1555  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1556  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1557  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1558  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1559  else
1560  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1561  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1562  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1563  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1564  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1565  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1566  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1567  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1568  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1569  break;
1570  case NVPTXISD::LoadV2:
1571  case NVPTXISD::LDGV2:
1572  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1573  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1574  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1575  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1576  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1577  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1578  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1579  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1580  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1581  break;
1582  case NVPTXISD::LDUV2:
1583  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1584  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1585  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1586  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1587  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1588  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1589  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1590  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1591  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1592  break;
1593  case NVPTXISD::LoadV4:
1594  case NVPTXISD::LDGV4:
1595  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1597  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1598  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1599  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1600  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1601  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1602  break;
1603  case NVPTXISD::LDUV4:
1604  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1605  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1606  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1607  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1608  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1609  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1610  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1611  break;
1612  }
1613  }
1614  if (!Opcode)
1615  return false;
1616  SDValue Ops[] = { Op1, Chain };
1617  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1618  }
1619 
1621  MemRefs0[0] = Mem->getMemOperand();
1622  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1623 
1624  // For automatic generation of LDG (through SelectLoad[Vector], not the
1625  // intrinsics), we may have an extending load like:
1626  //
1627  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1628  //
1629  // In this case, the matching logic above will select a load for the original
1630  // memory type (in this case, i8) and our types will not match (the node needs
1631  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1632  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1633  // CVT instruction. Ptxas should clean up any redundancies here.
1634 
1635  EVT OrigType = N->getValueType(0);
1636  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1637 
1638  if (OrigType != EltVT && LdNode) {
1639  // We have an extending-load. The instruction we selected operates on the
1640  // smaller type, but the SDNode we are replacing has the larger type. We
1641  // need to emit a CVT to make the types match.
1642  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1643  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1644  EltVT.getSimpleVT(), IsSigned);
1645 
1646  // For each output value, apply the manual sign/zero-extension and make sure
1647  // all users of the load go through that CVT.
1648  for (unsigned i = 0; i != NumElts; ++i) {
1649  SDValue Res(LD, i);
1650  SDValue OrigVal(N, i);
1651 
1652  SDNode *CvtNode =
1653  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1655  DL, MVT::i32));
1656  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1657  }
1658  }
1659 
1660  ReplaceNode(N, LD);
1661  return true;
1662 }
1663 
1664 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1665  SDLoc dl(N);
1666  StoreSDNode *ST = cast<StoreSDNode>(N);
1667  EVT StoreVT = ST->getMemoryVT();
1668  SDNode *NVPTXST = nullptr;
1669 
1670  // do not support pre/post inc/dec
1671  if (ST->isIndexed())
1672  return false;
1673 
1674  if (!StoreVT.isSimple())
1675  return false;
1676 
1677  // Address Space Setting
1678  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
1679 
1680  // Volatile Setting
1681  // - .volatile is only availalble for .global and .shared
1682  bool isVolatile = ST->isVolatile();
1683  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1684  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1685  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1686  isVolatile = false;
1687 
1688  // Vector Setting
1689  MVT SimpleVT = StoreVT.getSimpleVT();
1690  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1691 
1692  // Type Setting: toType + toTypeWidth
1693  // - for integer type, always use 'u'
1694  //
1695  MVT ScalarVT = SimpleVT.getScalarType();
1696  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1697  if (SimpleVT.isVector()) {
1698  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1699  // v2f16 is stored using st.b32
1700  toTypeWidth = 32;
1701  }
1702 
1703  unsigned int toType;
1704  if (ScalarVT.isFloatingPoint())
1705  // f16 uses .b16 as its storage type.
1706  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1708  else
1710 
1711  // Create the machine instruction DAG
1712  SDValue Chain = N->getOperand(0);
1713  SDValue N1 = N->getOperand(1);
1714  SDValue N2 = N->getOperand(2);
1715  SDValue Addr;
1716  SDValue Offset, Base;
1717  Optional<unsigned> Opcode;
1719 
1720  if (SelectDirectAddr(N2, Addr)) {
1721  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1722  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1723  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1724  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1725  if (!Opcode)
1726  return false;
1727  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1728  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1729  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1730  Chain };
1731  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1732  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1733  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1734  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1735  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1736  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1737  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1738  if (!Opcode)
1739  return false;
1740  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1741  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1742  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1743  Offset, Chain };
1744  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1745  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1746  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1747  if (TM.is64Bit())
1748  Opcode = pickOpcodeForVT(
1749  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1750  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1751  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1752  else
1753  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1754  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1755  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1756  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1757  if (!Opcode)
1758  return false;
1759 
1760  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1761  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1762  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1763  Offset, Chain };
1764  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1765  } else {
1766  if (TM.is64Bit())
1767  Opcode =
1768  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1769  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1770  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1771  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1772  else
1773  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1774  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1775  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1776  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1777  if (!Opcode)
1778  return false;
1779  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1780  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1781  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1782  Chain };
1783  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1784  }
1785 
1786  if (!NVPTXST)
1787  return false;
1788 
1790  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1791  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1792  ReplaceNode(N, NVPTXST);
1793  return true;
1794 }
1795 
1796 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1797  SDValue Chain = N->getOperand(0);
1798  SDValue Op1 = N->getOperand(1);
1799  SDValue Addr, Offset, Base;
1800  Optional<unsigned> Opcode;
1801  SDLoc DL(N);
1802  SDNode *ST;
1803  EVT EltVT = Op1.getValueType();
1804  MemSDNode *MemSD = cast<MemSDNode>(N);
1805  EVT StoreVT = MemSD->getMemoryVT();
1806 
1807  // Address Space Setting
1808  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1809 
1810  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1811  report_fatal_error("Cannot store to pointer that points to constant "
1812  "memory space");
1813  }
1814 
1815  // Volatile Setting
1816  // - .volatile is only availalble for .global and .shared
1817  bool IsVolatile = MemSD->isVolatile();
1818  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1819  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1820  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1821  IsVolatile = false;
1822 
1823  // Type Setting: toType + toTypeWidth
1824  // - for integer type, always use 'u'
1825  assert(StoreVT.isSimple() && "Store value is not simple");
1826  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1827  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1828  unsigned ToType;
1829  if (ScalarVT.isFloatingPoint())
1830  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1832  else
1834 
1836  SDValue N2;
1837  unsigned VecType;
1838 
1839  switch (N->getOpcode()) {
1840  case NVPTXISD::StoreV2:
1842  StOps.push_back(N->getOperand(1));
1843  StOps.push_back(N->getOperand(2));
1844  N2 = N->getOperand(3);
1845  break;
1846  case NVPTXISD::StoreV4:
1848  StOps.push_back(N->getOperand(1));
1849  StOps.push_back(N->getOperand(2));
1850  StOps.push_back(N->getOperand(3));
1851  StOps.push_back(N->getOperand(4));
1852  N2 = N->getOperand(5);
1853  break;
1854  default:
1855  return false;
1856  }
1857 
1858  // v8f16 is a special case. PTX doesn't have st.v8.f16
1859  // instruction. Instead, we split the vector into v2f16 chunks and
1860  // store them with st.v4.b32.
1861  if (EltVT == MVT::v2f16) {
1862  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1863  EltVT = MVT::i32;
1865  ToTypeWidth = 32;
1866  }
1867 
1868  StOps.push_back(getI32Imm(IsVolatile, DL));
1869  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1870  StOps.push_back(getI32Imm(VecType, DL));
1871  StOps.push_back(getI32Imm(ToType, DL));
1872  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1873 
1874  if (SelectDirectAddr(N2, Addr)) {
1875  switch (N->getOpcode()) {
1876  default:
1877  return false;
1878  case NVPTXISD::StoreV2:
1879  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1880  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1881  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1882  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1883  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1884  break;
1885  case NVPTXISD::StoreV4:
1886  Opcode =
1887  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1888  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1889  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1890  NVPTX::STV_f32_v4_avar, None);
1891  break;
1892  }
1893  StOps.push_back(Addr);
1894  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1895  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1896  switch (N->getOpcode()) {
1897  default:
1898  return false;
1899  case NVPTXISD::StoreV2:
1900  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1902  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1903  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1904  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1905  break;
1906  case NVPTXISD::StoreV4:
1907  Opcode =
1908  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1909  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1910  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1911  NVPTX::STV_f32_v4_asi, None);
1912  break;
1913  }
1914  StOps.push_back(Base);
1915  StOps.push_back(Offset);
1916  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1917  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1918  if (TM.is64Bit()) {
1919  switch (N->getOpcode()) {
1920  default:
1921  return false;
1922  case NVPTXISD::StoreV2:
1923  Opcode = pickOpcodeForVT(
1924  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1925  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1926  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1927  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1928  NVPTX::STV_f64_v2_ari_64);
1929  break;
1930  case NVPTXISD::StoreV4:
1931  Opcode = pickOpcodeForVT(
1932  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1933  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1934  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1935  NVPTX::STV_f32_v4_ari_64, None);
1936  break;
1937  }
1938  } else {
1939  switch (N->getOpcode()) {
1940  default:
1941  return false;
1942  case NVPTXISD::StoreV2:
1943  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1944  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1945  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1946  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1947  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1948  break;
1949  case NVPTXISD::StoreV4:
1950  Opcode =
1951  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1952  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1953  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1954  NVPTX::STV_f32_v4_ari, None);
1955  break;
1956  }
1957  }
1958  StOps.push_back(Base);
1959  StOps.push_back(Offset);
1960  } else {
1961  if (TM.is64Bit()) {
1962  switch (N->getOpcode()) {
1963  default:
1964  return false;
1965  case NVPTXISD::StoreV2:
1966  Opcode = pickOpcodeForVT(
1967  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1968  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1969  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1970  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1971  NVPTX::STV_f64_v2_areg_64);
1972  break;
1973  case NVPTXISD::StoreV4:
1974  Opcode = pickOpcodeForVT(
1975  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1976  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1977  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1978  NVPTX::STV_f32_v4_areg_64, None);
1979  break;
1980  }
1981  } else {
1982  switch (N->getOpcode()) {
1983  default:
1984  return false;
1985  case NVPTXISD::StoreV2:
1986  Opcode =
1987  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1988  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1989  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
1990  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
1991  NVPTX::STV_f64_v2_areg);
1992  break;
1993  case NVPTXISD::StoreV4:
1994  Opcode =
1995  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
1996  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
1997  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
1998  NVPTX::STV_f32_v4_areg, None);
1999  break;
2000  }
2001  }
2002  StOps.push_back(N2);
2003  }
2004 
2005  if (!Opcode)
2006  return false;
2007 
2008  StOps.push_back(Chain);
2009 
2010  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2011 
2013  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2014  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2015 
2016  ReplaceNode(N, ST);
2017  return true;
2018 }
2019 
2020 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2021  SDValue Chain = Node->getOperand(0);
2022  SDValue Offset = Node->getOperand(2);
2023  SDValue Flag = Node->getOperand(3);
2024  SDLoc DL(Node);
2025  MemSDNode *Mem = cast<MemSDNode>(Node);
2026 
2027  unsigned VecSize;
2028  switch (Node->getOpcode()) {
2029  default:
2030  return false;
2031  case NVPTXISD::LoadParam:
2032  VecSize = 1;
2033  break;
2034  case NVPTXISD::LoadParamV2:
2035  VecSize = 2;
2036  break;
2037  case NVPTXISD::LoadParamV4:
2038  VecSize = 4;
2039  break;
2040  }
2041 
2042  EVT EltVT = Node->getValueType(0);
2043  EVT MemVT = Mem->getMemoryVT();
2044 
2045  Optional<unsigned> Opcode;
2046 
2047  switch (VecSize) {
2048  default:
2049  return false;
2050  case 1:
2051  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2052  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2053  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2054  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2055  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2056  break;
2057  case 2:
2058  Opcode =
2059  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2060  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2061  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2062  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2063  NVPTX::LoadParamMemV2F64);
2064  break;
2065  case 4:
2066  Opcode = pickOpcodeForVT(
2067  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2069  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2070  NVPTX::LoadParamMemV4F32, None);
2071  break;
2072  }
2073  if (!Opcode)
2074  return false;
2075 
2076  SDVTList VTs;
2077  if (VecSize == 1) {
2078  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2079  } else if (VecSize == 2) {
2080  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2081  } else {
2082  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2083  VTs = CurDAG->getVTList(EVTs);
2084  }
2085 
2086  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2087 
2089  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2090  Ops.push_back(Chain);
2091  Ops.push_back(Flag);
2092 
2093  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2094  return true;
2095 }
2096 
2097 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2098  SDLoc DL(N);
2099  SDValue Chain = N->getOperand(0);
2100  SDValue Offset = N->getOperand(1);
2101  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2102  MemSDNode *Mem = cast<MemSDNode>(N);
2103 
2104  // How many elements do we have?
2105  unsigned NumElts = 1;
2106  switch (N->getOpcode()) {
2107  default:
2108  return false;
2109  case NVPTXISD::StoreRetval:
2110  NumElts = 1;
2111  break;
2113  NumElts = 2;
2114  break;
2116  NumElts = 4;
2117  break;
2118  }
2119 
2120  // Build vector of operands
2122  for (unsigned i = 0; i < NumElts; ++i)
2123  Ops.push_back(N->getOperand(i + 2));
2124  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2125  Ops.push_back(Chain);
2126 
2127  // Determine target opcode
2128  // If we have an i1, use an 8-bit store. The lowering code in
2129  // NVPTXISelLowering will have already emitted an upcast.
2130  Optional<unsigned> Opcode = 0;
2131  switch (NumElts) {
2132  default:
2133  return false;
2134  case 1:
2135  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2136  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2137  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2138  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2139  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2140  break;
2141  case 2:
2142  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2143  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2144  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2145  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2146  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2147  break;
2148  case 4:
2149  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2150  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2151  NVPTX::StoreRetvalV4I32, None,
2152  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2153  NVPTX::StoreRetvalV4F32, None);
2154  break;
2155  }
2156  if (!Opcode)
2157  return false;
2158 
2159  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2161  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2162  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2163 
2164  ReplaceNode(N, Ret);
2165  return true;
2166 }
2167 
2168 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2169  SDLoc DL(N);
2170  SDValue Chain = N->getOperand(0);
2171  SDValue Param = N->getOperand(1);
2172  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2173  SDValue Offset = N->getOperand(2);
2174  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175  MemSDNode *Mem = cast<MemSDNode>(N);
2176  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2177 
2178  // How many elements do we have?
2179  unsigned NumElts = 1;
2180  switch (N->getOpcode()) {
2181  default:
2182  return false;
2185  case NVPTXISD::StoreParam:
2186  NumElts = 1;
2187  break;
2189  NumElts = 2;
2190  break;
2192  NumElts = 4;
2193  break;
2194  }
2195 
2196  // Build vector of operands
2198  for (unsigned i = 0; i < NumElts; ++i)
2199  Ops.push_back(N->getOperand(i + 3));
2200  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2201  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2202  Ops.push_back(Chain);
2203  Ops.push_back(Flag);
2204 
2205  // Determine target opcode
2206  // If we have an i1, use an 8-bit store. The lowering code in
2207  // NVPTXISelLowering will have already emitted an upcast.
2208  Optional<unsigned> Opcode = 0;
2209  switch (N->getOpcode()) {
2210  default:
2211  switch (NumElts) {
2212  default:
2213  return false;
2214  case 1:
2215  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2216  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2217  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2218  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2219  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2220  break;
2221  case 2:
2222  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2224  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2225  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2226  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2227  break;
2228  case 4:
2229  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2230  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2231  NVPTX::StoreParamV4I32, None,
2232  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2233  NVPTX::StoreParamV4F32, None);
2234  break;
2235  }
2236  if (!Opcode)
2237  return false;
2238  break;
2239  // Special case: if we have a sign-extend/zero-extend node, insert the
2240  // conversion instruction first, and use that as the value operand to
2241  // the selected StoreParam node.
2242  case NVPTXISD::StoreParamU32: {
2243  Opcode = NVPTX::StoreParamI32;
2245  MVT::i32);
2246  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2247  MVT::i32, Ops[0], CvtNone);
2248  Ops[0] = SDValue(Cvt, 0);
2249  break;
2250  }
2251  case NVPTXISD::StoreParamS32: {
2252  Opcode = NVPTX::StoreParamI32;
2254  MVT::i32);
2255  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2256  MVT::i32, Ops[0], CvtNone);
2257  Ops[0] = SDValue(Cvt, 0);
2258  break;
2259  }
2260  }
2261 
2263  SDNode *Ret =
2264  CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2266  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2267  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2268 
2269  ReplaceNode(N, Ret);
2270  return true;
2271 }
2272 
2273 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2274  SDValue Chain = N->getOperand(0);
2275  unsigned Opc = 0;
2277 
2278  switch (N->getOpcode()) {
2279  default: return false;
2281  Opc = NVPTX::TEX_1D_F32_S32;
2282  break;
2284  Opc = NVPTX::TEX_1D_F32_F32;
2285  break;
2287  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2288  break;
2290  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2291  break;
2292  case NVPTXISD::Tex1DS32S32:
2293  Opc = NVPTX::TEX_1D_S32_S32;
2294  break;
2296  Opc = NVPTX::TEX_1D_S32_F32;
2297  break;
2299  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2300  break;
2302  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2303  break;
2304  case NVPTXISD::Tex1DU32S32:
2305  Opc = NVPTX::TEX_1D_U32_S32;
2306  break;
2308  Opc = NVPTX::TEX_1D_U32_F32;
2309  break;
2311  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2312  break;
2314  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2315  break;
2317  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2318  break;
2320  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2321  break;
2323  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2324  break;
2326  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2327  break;
2329  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2330  break;
2332  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2333  break;
2335  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2336  break;
2338  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2339  break;
2341  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2342  break;
2344  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2345  break;
2347  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2348  break;
2350  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2351  break;
2353  Opc = NVPTX::TEX_2D_F32_S32;
2354  break;
2356  Opc = NVPTX::TEX_2D_F32_F32;
2357  break;
2359  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2360  break;
2362  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2363  break;
2364  case NVPTXISD::Tex2DS32S32:
2365  Opc = NVPTX::TEX_2D_S32_S32;
2366  break;
2368  Opc = NVPTX::TEX_2D_S32_F32;
2369  break;
2371  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2372  break;
2374  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2375  break;
2376  case NVPTXISD::Tex2DU32S32:
2377  Opc = NVPTX::TEX_2D_U32_S32;
2378  break;
2380  Opc = NVPTX::TEX_2D_U32_F32;
2381  break;
2383  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2384  break;
2386  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2387  break;
2389  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2390  break;
2392  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2393  break;
2395  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2396  break;
2398  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2399  break;
2401  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2402  break;
2404  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2405  break;
2407  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2408  break;
2410  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2411  break;
2413  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2414  break;
2416  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2417  break;
2419  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2420  break;
2422  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2423  break;
2425  Opc = NVPTX::TEX_3D_F32_S32;
2426  break;
2428  Opc = NVPTX::TEX_3D_F32_F32;
2429  break;
2431  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2432  break;
2434  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2435  break;
2436  case NVPTXISD::Tex3DS32S32:
2437  Opc = NVPTX::TEX_3D_S32_S32;
2438  break;
2440  Opc = NVPTX::TEX_3D_S32_F32;
2441  break;
2443  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2444  break;
2446  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2447  break;
2448  case NVPTXISD::Tex3DU32S32:
2449  Opc = NVPTX::TEX_3D_U32_S32;
2450  break;
2452  Opc = NVPTX::TEX_3D_U32_F32;
2453  break;
2455  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2456  break;
2458  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2459  break;
2461  Opc = NVPTX::TEX_CUBE_F32_F32;
2462  break;
2464  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2465  break;
2467  Opc = NVPTX::TEX_CUBE_S32_F32;
2468  break;
2470  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2471  break;
2473  Opc = NVPTX::TEX_CUBE_U32_F32;
2474  break;
2476  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2477  break;
2479  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2480  break;
2482  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2483  break;
2485  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2486  break;
2488  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2489  break;
2491  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2492  break;
2494  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2495  break;
2497  Opc = NVPTX::TLD4_R_2D_F32_F32;
2498  break;
2500  Opc = NVPTX::TLD4_G_2D_F32_F32;
2501  break;
2503  Opc = NVPTX::TLD4_B_2D_F32_F32;
2504  break;
2506  Opc = NVPTX::TLD4_A_2D_F32_F32;
2507  break;
2509  Opc = NVPTX::TLD4_R_2D_S32_F32;
2510  break;
2512  Opc = NVPTX::TLD4_G_2D_S32_F32;
2513  break;
2515  Opc = NVPTX::TLD4_B_2D_S32_F32;
2516  break;
2518  Opc = NVPTX::TLD4_A_2D_S32_F32;
2519  break;
2521  Opc = NVPTX::TLD4_R_2D_U32_F32;
2522  break;
2524  Opc = NVPTX::TLD4_G_2D_U32_F32;
2525  break;
2527  Opc = NVPTX::TLD4_B_2D_U32_F32;
2528  break;
2530  Opc = NVPTX::TLD4_A_2D_U32_F32;
2531  break;
2533  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2534  break;
2536  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2537  break;
2539  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2540  break;
2542  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2543  break;
2545  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2546  break;
2548  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2549  break;
2551  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2552  break;
2554  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2555  break;
2557  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2558  break;
2560  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2561  break;
2563  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2564  break;
2566  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2567  break;
2569  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2570  break;
2572  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2573  break;
2575  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2576  break;
2578  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2579  break;
2581  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2582  break;
2584  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2585  break;
2587  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2588  break;
2590  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2591  break;
2593  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2594  break;
2596  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2597  break;
2599  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2600  break;
2602  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2603  break;
2605  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2606  break;
2608  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2609  break;
2611  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2612  break;
2614  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2615  break;
2617  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2618  break;
2620  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2621  break;
2623  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2624  break;
2626  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2627  break;
2629  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2630  break;
2632  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2633  break;
2635  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2636  break;
2638  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2639  break;
2641  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2642  break;
2644  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2645  break;
2647  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2648  break;
2650  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2651  break;
2653  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2654  break;
2656  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2657  break;
2659  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2660  break;
2662  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2663  break;
2665  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2666  break;
2668  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2669  break;
2671  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2672  break;
2674  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2675  break;
2677  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2678  break;
2680  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2681  break;
2683  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2684  break;
2686  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2687  break;
2689  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2690  break;
2692  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2693  break;
2695  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2696  break;
2698  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2699  break;
2701  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2702  break;
2704  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2705  break;
2707  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2708  break;
2710  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2711  break;
2713  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2714  break;
2716  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2717  break;
2719  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2720  break;
2722  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2723  break;
2725  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2726  break;
2728  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2729  break;
2731  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2732  break;
2734  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2735  break;
2737  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2738  break;
2740  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2741  break;
2743  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2744  break;
2746  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2747  break;
2749  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2750  break;
2752  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2753  break;
2755  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2756  break;
2758  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2759  break;
2761  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2762  break;
2764  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2765  break;
2767  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2768  break;
2770  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2771  break;
2773  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2774  break;
2776  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2777  break;
2779  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2780  break;
2782  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2783  break;
2784  }
2785 
2786  // Copy over operands
2787  for (unsigned i = 1; i < N->getNumOperands(); ++i) {
2788  Ops.push_back(N->getOperand(i));
2789  }
2790 
2791  Ops.push_back(Chain);
2792  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2793  return true;
2794 }
2795 
2796 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2797  SDValue Chain = N->getOperand(0);
2798  SDValue TexHandle = N->getOperand(1);
2799  unsigned Opc = 0;
2801  switch (N->getOpcode()) {
2802  default: return false;
2804  Opc = NVPTX::SULD_1D_I8_CLAMP;
2805  Ops.push_back(TexHandle);
2806  Ops.push_back(N->getOperand(2));
2807  Ops.push_back(Chain);
2808  break;
2810  Opc = NVPTX::SULD_1D_I16_CLAMP;
2811  Ops.push_back(TexHandle);
2812  Ops.push_back(N->getOperand(2));
2813  Ops.push_back(Chain);
2814  break;
2816  Opc = NVPTX::SULD_1D_I32_CLAMP;
2817  Ops.push_back(TexHandle);
2818  Ops.push_back(N->getOperand(2));
2819  Ops.push_back(Chain);
2820  break;
2822  Opc = NVPTX::SULD_1D_I64_CLAMP;
2823  Ops.push_back(TexHandle);
2824  Ops.push_back(N->getOperand(2));
2825  Ops.push_back(Chain);
2826  break;
2828  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2829  Ops.push_back(TexHandle);
2830  Ops.push_back(N->getOperand(2));
2831  Ops.push_back(Chain);
2832  break;
2834  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2835  Ops.push_back(TexHandle);
2836  Ops.push_back(N->getOperand(2));
2837  Ops.push_back(Chain);
2838  break;
2840  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2841  Ops.push_back(TexHandle);
2842  Ops.push_back(N->getOperand(2));
2843  Ops.push_back(Chain);
2844  break;
2846  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2847  Ops.push_back(TexHandle);
2848  Ops.push_back(N->getOperand(2));
2849  Ops.push_back(Chain);
2850  break;
2852  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2853  Ops.push_back(TexHandle);
2854  Ops.push_back(N->getOperand(2));
2855  Ops.push_back(Chain);
2856  break;
2858  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2859  Ops.push_back(TexHandle);
2860  Ops.push_back(N->getOperand(2));
2861  Ops.push_back(Chain);
2862  break;
2864  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2865  Ops.push_back(TexHandle);
2866  Ops.push_back(N->getOperand(2));
2867  Ops.push_back(Chain);
2868  break;
2870  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2871  Ops.push_back(TexHandle);
2872  Ops.push_back(N->getOperand(2));
2873  Ops.push_back(N->getOperand(3));
2874  Ops.push_back(Chain);
2875  break;
2877  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2878  Ops.push_back(TexHandle);
2879  Ops.push_back(N->getOperand(2));
2880  Ops.push_back(N->getOperand(3));
2881  Ops.push_back(Chain);
2882  break;
2884  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2885  Ops.push_back(TexHandle);
2886  Ops.push_back(N->getOperand(2));
2887  Ops.push_back(N->getOperand(3));
2888  Ops.push_back(Chain);
2889  break;
2891  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2892  Ops.push_back(TexHandle);
2893  Ops.push_back(N->getOperand(2));
2894  Ops.push_back(N->getOperand(3));
2895  Ops.push_back(Chain);
2896  break;
2898  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2899  Ops.push_back(TexHandle);
2900  Ops.push_back(N->getOperand(2));
2901  Ops.push_back(N->getOperand(3));
2902  Ops.push_back(Chain);
2903  break;
2905  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2906  Ops.push_back(TexHandle);
2907  Ops.push_back(N->getOperand(2));
2908  Ops.push_back(N->getOperand(3));
2909  Ops.push_back(Chain);
2910  break;
2912  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2913  Ops.push_back(TexHandle);
2914  Ops.push_back(N->getOperand(2));
2915  Ops.push_back(N->getOperand(3));
2916  Ops.push_back(Chain);
2917  break;
2919  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2920  Ops.push_back(TexHandle);
2921  Ops.push_back(N->getOperand(2));
2922  Ops.push_back(N->getOperand(3));
2923  Ops.push_back(Chain);
2924  break;
2926  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2927  Ops.push_back(TexHandle);
2928  Ops.push_back(N->getOperand(2));
2929  Ops.push_back(N->getOperand(3));
2930  Ops.push_back(Chain);
2931  break;
2933  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2934  Ops.push_back(TexHandle);
2935  Ops.push_back(N->getOperand(2));
2936  Ops.push_back(N->getOperand(3));
2937  Ops.push_back(Chain);
2938  break;
2940  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2941  Ops.push_back(TexHandle);
2942  Ops.push_back(N->getOperand(2));
2943  Ops.push_back(N->getOperand(3));
2944  Ops.push_back(Chain);
2945  break;
2947  Opc = NVPTX::SULD_2D_I8_CLAMP;
2948  Ops.push_back(TexHandle);
2949  Ops.push_back(N->getOperand(2));
2950  Ops.push_back(N->getOperand(3));
2951  Ops.push_back(Chain);
2952  break;
2954  Opc = NVPTX::SULD_2D_I16_CLAMP;
2955  Ops.push_back(TexHandle);
2956  Ops.push_back(N->getOperand(2));
2957  Ops.push_back(N->getOperand(3));
2958  Ops.push_back(Chain);
2959  break;
2961  Opc = NVPTX::SULD_2D_I32_CLAMP;
2962  Ops.push_back(TexHandle);
2963  Ops.push_back(N->getOperand(2));
2964  Ops.push_back(N->getOperand(3));
2965  Ops.push_back(Chain);
2966  break;
2968  Opc = NVPTX::SULD_2D_I64_CLAMP;
2969  Ops.push_back(TexHandle);
2970  Ops.push_back(N->getOperand(2));
2971  Ops.push_back(N->getOperand(3));
2972  Ops.push_back(Chain);
2973  break;
2975  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2976  Ops.push_back(TexHandle);
2977  Ops.push_back(N->getOperand(2));
2978  Ops.push_back(N->getOperand(3));
2979  Ops.push_back(Chain);
2980  break;
2982  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2983  Ops.push_back(TexHandle);
2984  Ops.push_back(N->getOperand(2));
2985  Ops.push_back(N->getOperand(3));
2986  Ops.push_back(Chain);
2987  break;
2989  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2990  Ops.push_back(TexHandle);
2991  Ops.push_back(N->getOperand(2));
2992  Ops.push_back(N->getOperand(3));
2993  Ops.push_back(Chain);
2994  break;
2996  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2997  Ops.push_back(TexHandle);
2998  Ops.push_back(N->getOperand(2));
2999  Ops.push_back(N->getOperand(3));
3000  Ops.push_back(Chain);
3001  break;
3003  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3004  Ops.push_back(TexHandle);
3005  Ops.push_back(N->getOperand(2));
3006  Ops.push_back(N->getOperand(3));
3007  Ops.push_back(Chain);
3008  break;
3010  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3011  Ops.push_back(TexHandle);
3012  Ops.push_back(N->getOperand(2));
3013  Ops.push_back(N->getOperand(3));
3014  Ops.push_back(Chain);
3015  break;
3017  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3018  Ops.push_back(TexHandle);
3019  Ops.push_back(N->getOperand(2));
3020  Ops.push_back(N->getOperand(3));
3021  Ops.push_back(Chain);
3022  break;
3024  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3025  Ops.push_back(TexHandle);
3026  Ops.push_back(N->getOperand(2));
3027  Ops.push_back(N->getOperand(3));
3028  Ops.push_back(N->getOperand(4));
3029  Ops.push_back(Chain);
3030  break;
3032  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3033  Ops.push_back(TexHandle);
3034  Ops.push_back(N->getOperand(2));
3035  Ops.push_back(N->getOperand(3));
3036  Ops.push_back(N->getOperand(4));
3037  Ops.push_back(Chain);
3038  break;
3040  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3041  Ops.push_back(TexHandle);
3042  Ops.push_back(N->getOperand(2));
3043  Ops.push_back(N->getOperand(3));
3044  Ops.push_back(N->getOperand(4));
3045  Ops.push_back(Chain);
3046  break;
3048  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3049  Ops.push_back(TexHandle);
3050  Ops.push_back(N->getOperand(2));
3051  Ops.push_back(N->getOperand(3));
3052  Ops.push_back(N->getOperand(4));
3053  Ops.push_back(Chain);
3054  break;
3056  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3057  Ops.push_back(TexHandle);
3058  Ops.push_back(N->getOperand(2));
3059  Ops.push_back(N->getOperand(3));
3060  Ops.push_back(N->getOperand(4));
3061  Ops.push_back(Chain);
3062  break;
3064  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3065  Ops.push_back(TexHandle);
3066  Ops.push_back(N->getOperand(2));
3067  Ops.push_back(N->getOperand(3));
3068  Ops.push_back(N->getOperand(4));
3069  Ops.push_back(Chain);
3070  break;
3072  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3073  Ops.push_back(TexHandle);
3074  Ops.push_back(N->getOperand(2));
3075  Ops.push_back(N->getOperand(3));
3076  Ops.push_back(N->getOperand(4));
3077  Ops.push_back(Chain);
3078  break;
3080  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3081  Ops.push_back(TexHandle);
3082  Ops.push_back(N->getOperand(2));
3083  Ops.push_back(N->getOperand(3));
3084  Ops.push_back(N->getOperand(4));
3085  Ops.push_back(Chain);
3086  break;
3088  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3089  Ops.push_back(TexHandle);
3090  Ops.push_back(N->getOperand(2));
3091  Ops.push_back(N->getOperand(3));
3092  Ops.push_back(N->getOperand(4));
3093  Ops.push_back(Chain);
3094  break;
3096  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3097  Ops.push_back(TexHandle);
3098  Ops.push_back(N->getOperand(2));
3099  Ops.push_back(N->getOperand(3));
3100  Ops.push_back(N->getOperand(4));
3101  Ops.push_back(Chain);
3102  break;
3104  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3105  Ops.push_back(TexHandle);
3106  Ops.push_back(N->getOperand(2));
3107  Ops.push_back(N->getOperand(3));
3108  Ops.push_back(N->getOperand(4));
3109  Ops.push_back(Chain);
3110  break;
3112  Opc = NVPTX::SULD_3D_I8_CLAMP;
3113  Ops.push_back(TexHandle);
3114  Ops.push_back(N->getOperand(2));
3115  Ops.push_back(N->getOperand(3));
3116  Ops.push_back(N->getOperand(4));
3117  Ops.push_back(Chain);
3118  break;
3120  Opc = NVPTX::SULD_3D_I16_CLAMP;
3121  Ops.push_back(TexHandle);
3122  Ops.push_back(N->getOperand(2));
3123  Ops.push_back(N->getOperand(3));
3124  Ops.push_back(N->getOperand(4));
3125  Ops.push_back(Chain);
3126  break;
3128  Opc = NVPTX::SULD_3D_I32_CLAMP;
3129  Ops.push_back(TexHandle);
3130  Ops.push_back(N->getOperand(2));
3131  Ops.push_back(N->getOperand(3));
3132  Ops.push_back(N->getOperand(4));
3133  Ops.push_back(Chain);
3134  break;
3136  Opc = NVPTX::SULD_3D_I64_CLAMP;
3137  Ops.push_back(TexHandle);
3138  Ops.push_back(N->getOperand(2));
3139  Ops.push_back(N->getOperand(3));
3140  Ops.push_back(N->getOperand(4));
3141  Ops.push_back(Chain);
3142  break;
3144  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3145  Ops.push_back(TexHandle);
3146  Ops.push_back(N->getOperand(2));
3147  Ops.push_back(N->getOperand(3));
3148  Ops.push_back(N->getOperand(4));
3149  Ops.push_back(Chain);
3150  break;
3152  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3153  Ops.push_back(TexHandle);
3154  Ops.push_back(N->getOperand(2));
3155  Ops.push_back(N->getOperand(3));
3156  Ops.push_back(N->getOperand(4));
3157  Ops.push_back(Chain);
3158  break;
3160  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3161  Ops.push_back(TexHandle);
3162  Ops.push_back(N->getOperand(2));
3163  Ops.push_back(N->getOperand(3));
3164  Ops.push_back(N->getOperand(4));
3165  Ops.push_back(Chain);
3166  break;
3168  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3169  Ops.push_back(TexHandle);
3170  Ops.push_back(N->getOperand(2));
3171  Ops.push_back(N->getOperand(3));
3172  Ops.push_back(N->getOperand(4));
3173  Ops.push_back(Chain);
3174  break;
3176  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3177  Ops.push_back(TexHandle);
3178  Ops.push_back(N->getOperand(2));
3179  Ops.push_back(N->getOperand(3));
3180  Ops.push_back(N->getOperand(4));
3181  Ops.push_back(Chain);
3182  break;
3184  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3185  Ops.push_back(TexHandle);
3186  Ops.push_back(N->getOperand(2));
3187  Ops.push_back(N->getOperand(3));
3188  Ops.push_back(N->getOperand(4));
3189  Ops.push_back(Chain);
3190  break;
3192  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3193  Ops.push_back(TexHandle);
3194  Ops.push_back(N->getOperand(2));
3195  Ops.push_back(N->getOperand(3));
3196  Ops.push_back(N->getOperand(4));
3197  Ops.push_back(Chain);
3198  break;
3200  Opc = NVPTX::SULD_1D_I8_TRAP;
3201  Ops.push_back(TexHandle);
3202  Ops.push_back(N->getOperand(2));
3203  Ops.push_back(Chain);
3204  break;
3206  Opc = NVPTX::SULD_1D_I16_TRAP;
3207  Ops.push_back(TexHandle);
3208  Ops.push_back(N->getOperand(2));
3209  Ops.push_back(Chain);
3210  break;
3212  Opc = NVPTX::SULD_1D_I32_TRAP;
3213  Ops.push_back(TexHandle);
3214  Ops.push_back(N->getOperand(2));
3215  Ops.push_back(Chain);
3216  break;
3218  Opc = NVPTX::SULD_1D_I64_TRAP;
3219  Ops.push_back(TexHandle);
3220  Ops.push_back(N->getOperand(2));
3221  Ops.push_back(Chain);
3222  break;
3224  Opc = NVPTX::SULD_1D_V2I8_TRAP;
3225  Ops.push_back(TexHandle);
3226  Ops.push_back(N->getOperand(2));
3227  Ops.push_back(Chain);
3228  break;
3230  Opc = NVPTX::SULD_1D_V2I16_TRAP;
3231  Ops.push_back(TexHandle);
3232  Ops.push_back(N->getOperand(2));
3233  Ops.push_back(Chain);
3234  break;
3236  Opc = NVPTX::SULD_1D_V2I32_TRAP;
3237  Ops.push_back(TexHandle);
3238  Ops.push_back(N->getOperand(2));
3239  Ops.push_back(Chain);
3240  break;
3242  Opc = NVPTX::SULD_1D_V2I64_TRAP;
3243  Ops.push_back(TexHandle);
3244  Ops.push_back(N->getOperand(2));
3245  Ops.push_back(Chain);
3246  break;
3248  Opc = NVPTX::SULD_1D_V4I8_TRAP;
3249  Ops.push_back(TexHandle);
3250  Ops.push_back(N->getOperand(2));
3251  Ops.push_back(Chain);
3252  break;
3254  Opc = NVPTX::SULD_1D_V4I16_TRAP;
3255  Ops.push_back(TexHandle);
3256  Ops.push_back(N->getOperand(2));
3257  Ops.push_back(Chain);
3258  break;
3260  Opc = NVPTX::SULD_1D_V4I32_TRAP;
3261  Ops.push_back(TexHandle);
3262  Ops.push_back(N->getOperand(2));
3263  Ops.push_back(Chain);
3264  break;
3266  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3267  Ops.push_back(TexHandle);
3268  Ops.push_back(N->getOperand(2));
3269  Ops.push_back(N->getOperand(3));
3270  Ops.push_back(Chain);
3271  break;
3273  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3274  Ops.push_back(TexHandle);
3275  Ops.push_back(N->getOperand(2));
3276  Ops.push_back(N->getOperand(3));
3277  Ops.push_back(Chain);
3278  break;
3280  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3281  Ops.push_back(TexHandle);
3282  Ops.push_back(N->getOperand(2));
3283  Ops.push_back(N->getOperand(3));
3284  Ops.push_back(Chain);
3285  break;
3287  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3288  Ops.push_back(TexHandle);
3289  Ops.push_back(N->getOperand(2));
3290  Ops.push_back(N->getOperand(3));
3291  Ops.push_back(Chain);
3292  break;
3294  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3295  Ops.push_back(TexHandle);
3296  Ops.push_back(N->getOperand(2));
3297  Ops.push_back(N->getOperand(3));
3298  Ops.push_back(Chain);
3299  break;
3301  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3302  Ops.push_back(TexHandle);
3303  Ops.push_back(N->getOperand(2));
3304  Ops.push_back(N->getOperand(3));
3305  Ops.push_back(Chain);
3306  break;
3308  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3309  Ops.push_back(TexHandle);
3310  Ops.push_back(N->getOperand(2));
3311  Ops.push_back(N->getOperand(3));
3312  Ops.push_back(Chain);
3313  break;
3315  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3316  Ops.push_back(TexHandle);
3317  Ops.push_back(N->getOperand(2));
3318  Ops.push_back(N->getOperand(3));
3319  Ops.push_back(Chain);
3320  break;
3322  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3323  Ops.push_back(TexHandle);
3324  Ops.push_back(N->getOperand(2));
3325  Ops.push_back(N->getOperand(3));
3326  Ops.push_back(Chain);
3327  break;
3329  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3330  Ops.push_back(TexHandle);
3331  Ops.push_back(N->getOperand(2));
3332  Ops.push_back(N->getOperand(3));
3333  Ops.push_back(Chain);
3334  break;
3336  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3337  Ops.push_back(TexHandle);
3338  Ops.push_back(N->getOperand(2));
3339  Ops.push_back(N->getOperand(3));
3340  Ops.push_back(Chain);
3341  break;
3343  Opc = NVPTX::SULD_2D_I8_TRAP;
3344  Ops.push_back(TexHandle);
3345  Ops.push_back(N->getOperand(2));
3346  Ops.push_back(N->getOperand(3));
3347  Ops.push_back(Chain);
3348  break;
3350  Opc = NVPTX::SULD_2D_I16_TRAP;
3351  Ops.push_back(TexHandle);
3352  Ops.push_back(N->getOperand(2));
3353  Ops.push_back(N->getOperand(3));
3354  Ops.push_back(Chain);
3355  break;
3357  Opc = NVPTX::SULD_2D_I32_TRAP;
3358  Ops.push_back(TexHandle);
3359  Ops.push_back(N->getOperand(2));
3360  Ops.push_back(N->getOperand(3));
3361  Ops.push_back(Chain);
3362  break;
3364  Opc = NVPTX::SULD_2D_I64_TRAP;
3365  Ops.push_back(TexHandle);
3366  Ops.push_back(N->getOperand(2));
3367  Ops.push_back(N->getOperand(3));
3368  Ops.push_back(Chain);
3369  break;
3371  Opc = NVPTX::SULD_2D_V2I8_TRAP;
3372  Ops.push_back(TexHandle);
3373  Ops.push_back(N->getOperand(2));
3374  Ops.push_back(N->getOperand(3));
3375  Ops.push_back(Chain);
3376  break;
3378  Opc = NVPTX::SULD_2D_V2I16_TRAP;
3379  Ops.push_back(TexHandle);
3380  Ops.push_back(N->getOperand(2));
3381  Ops.push_back(N->getOperand(3));
3382  Ops.push_back(Chain);
3383  break;
3385  Opc = NVPTX::SULD_2D_V2I32_TRAP;
3386  Ops.push_back(TexHandle);
3387  Ops.push_back(N->getOperand(2));
3388  Ops.push_back(N->getOperand(3));
3389  Ops.push_back(Chain);
3390  break;
3392  Opc = NVPTX::SULD_2D_V2I64_TRAP;
3393  Ops.push_back(TexHandle);
3394  Ops.push_back(N->getOperand(2));
3395  Ops.push_back(N->getOperand(3));
3396  Ops.push_back(Chain);
3397  break;
3399  Opc = NVPTX::SULD_2D_V4I8_TRAP;
3400  Ops.push_back(TexHandle);
3401  Ops.push_back(N->getOperand(2));
3402  Ops.push_back(N->getOperand(3));
3403  Ops.push_back(Chain);
3404  break;
3406  Opc = NVPTX::SULD_2D_V4I16_TRAP;
3407  Ops.push_back(TexHandle);
3408  Ops.push_back(N->getOperand(2));
3409  Ops.push_back(N->getOperand(3));
3410  Ops.push_back(Chain);
3411  break;
3413  Opc = NVPTX::SULD_2D_V4I32_TRAP;
3414  Ops.push_back(TexHandle);
3415  Ops.push_back(N->getOperand(2));
3416  Ops.push_back(N->getOperand(3));
3417  Ops.push_back(Chain);
3418  break;
3420  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3421  Ops.push_back(TexHandle);
3422  Ops.push_back(N->getOperand(2));
3423  Ops.push_back(N->getOperand(3));
3424  Ops.push_back(N->getOperand(4));
3425  Ops.push_back(Chain);
3426  break;
3428  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3429  Ops.push_back(TexHandle);
3430  Ops.push_back(N->getOperand(2));
3431  Ops.push_back(N->getOperand(3));
3432  Ops.push_back(N->getOperand(4));
3433  Ops.push_back(Chain);
3434  break;
3436  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3437  Ops.push_back(TexHandle);
3438  Ops.push_back(N->getOperand(2));
3439  Ops.push_back(N->getOperand(3));
3440  Ops.push_back(N->getOperand(4));
3441  Ops.push_back(Chain);
3442  break;
3444  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3445  Ops.push_back(TexHandle);
3446  Ops.push_back(N->getOperand(2));
3447  Ops.push_back(N->getOperand(3));
3448  Ops.push_back(N->getOperand(4));
3449  Ops.push_back(Chain);
3450  break;
3452  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3453  Ops.push_back(TexHandle);
3454  Ops.push_back(N->getOperand(2));
3455  Ops.push_back(N->getOperand(3));
3456  Ops.push_back(N->getOperand(4));
3457  Ops.push_back(Chain);
3458  break;
3460  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3461  Ops.push_back(TexHandle);
3462  Ops.push_back(N->getOperand(2));
3463  Ops.push_back(N->getOperand(3));
3464  Ops.push_back(N->getOperand(4));
3465  Ops.push_back(Chain);
3466  break;
3468  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3469  Ops.push_back(TexHandle);
3470  Ops.push_back(N->getOperand(2));
3471  Ops.push_back(N->getOperand(3));
3472  Ops.push_back(N->getOperand(4));
3473  Ops.push_back(Chain);
3474  break;
3476  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3477  Ops.push_back(TexHandle);
3478  Ops.push_back(N->getOperand(2));
3479  Ops.push_back(N->getOperand(3));
3480  Ops.push_back(N->getOperand(4));
3481  Ops.push_back(Chain);
3482  break;
3484  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3485  Ops.push_back(TexHandle);
3486  Ops.push_back(N->getOperand(2));
3487  Ops.push_back(N->getOperand(3));
3488  Ops.push_back(N->getOperand(4));
3489  Ops.push_back(Chain);
3490  break;
3492  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3493  Ops.push_back(TexHandle);
3494  Ops.push_back(N->getOperand(2));
3495  Ops.push_back(N->getOperand(3));
3496  Ops.push_back(N->getOperand(4));
3497  Ops.push_back(Chain);
3498  break;
3500  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3501  Ops.push_back(TexHandle);
3502  Ops.push_back(N->getOperand(2));
3503  Ops.push_back(N->getOperand(3));
3504  Ops.push_back(N->getOperand(4));
3505  Ops.push_back(Chain);
3506  break;
3508  Opc = NVPTX::SULD_3D_I8_TRAP;
3509  Ops.push_back(TexHandle);
3510  Ops.push_back(N->getOperand(2));
3511  Ops.push_back(N->getOperand(3));
3512  Ops.push_back(N->getOperand(4));
3513  Ops.push_back(Chain);
3514  break;
3516  Opc = NVPTX::SULD_3D_I16_TRAP;
3517  Ops.push_back(TexHandle);
3518  Ops.push_back(N->getOperand(2));
3519  Ops.push_back(N->getOperand(3));
3520  Ops.push_back(N->getOperand(4));
3521  Ops.push_back(Chain);
3522  break;
3524  Opc = NVPTX::SULD_3D_I32_TRAP;
3525  Ops.push_back(TexHandle);
3526  Ops.push_back(N->getOperand(2));
3527  Ops.push_back(N->getOperand(3));
3528  Ops.push_back(N->getOperand(4));
3529  Ops.push_back(Chain);
3530  break;
3532  Opc = NVPTX::SULD_3D_I64_TRAP;
3533  Ops.push_back(TexHandle);
3534  Ops.push_back(N->getOperand(2));
3535  Ops.push_back(N->getOperand(3));
3536  Ops.push_back(N->getOperand(4));
3537  Ops.push_back(Chain);
3538  break;
3540  Opc = NVPTX::SULD_3D_V2I8_TRAP;
3541  Ops.push_back(TexHandle);
3542  Ops.push_back(N->getOperand(2));
3543  Ops.push_back(N->getOperand(3));
3544  Ops.push_back(N->getOperand(4));
3545  Ops.push_back(Chain);
3546  break;
3548  Opc = NVPTX::SULD_3D_V2I16_TRAP;
3549  Ops.push_back(TexHandle);
3550  Ops.push_back(N->getOperand(2));
3551  Ops.push_back(N->getOperand(3));
3552  Ops.push_back(N->getOperand(4));
3553  Ops.push_back(Chain);
3554  break;
3556  Opc = NVPTX::SULD_3D_V2I32_TRAP;
3557  Ops.push_back(TexHandle);
3558  Ops.push_back(N->getOperand(2));
3559  Ops.push_back(N->getOperand(3));
3560  Ops.push_back(N->getOperand(4));
3561  Ops.push_back(Chain);
3562  break;
3564  Opc = NVPTX::SULD_3D_V2I64_TRAP;
3565  Ops.push_back(TexHandle);
3566  Ops.push_back(N->getOperand(2));
3567  Ops.push_back(N->getOperand(3));
3568  Ops.push_back(N->getOperand(4));
3569  Ops.push_back(Chain);
3570  break;
3572  Opc = NVPTX::SULD_3D_V4I8_TRAP;
3573  Ops.push_back(TexHandle);
3574  Ops.push_back(N->getOperand(2));
3575  Ops.push_back(N->getOperand(3));
3576  Ops.push_back(N->getOperand(4));
3577  Ops.push_back(Chain);
3578  break;
3580  Opc = NVPTX::SULD_3D_V4I16_TRAP;
3581  Ops.push_back(TexHandle);
3582  Ops.push_back(N->getOperand(2));
3583  Ops.push_back(N->getOperand(3));
3584  Ops.push_back(N->getOperand(4));
3585  Ops.push_back(Chain);
3586  break;
3588  Opc = NVPTX::SULD_3D_V4I32_TRAP;
3589  Ops.push_back(TexHandle);
3590  Ops.push_back(N->getOperand(2));
3591  Ops.push_back(N->getOperand(3));
3592  Ops.push_back(N->getOperand(4));
3593  Ops.push_back(Chain);
3594  break;
3596  Opc = NVPTX::SULD_1D_I8_ZERO;
3597  Ops.push_back(TexHandle);
3598  Ops.push_back(N->getOperand(2));
3599  Ops.push_back(Chain);
3600  break;
3602  Opc = NVPTX::SULD_1D_I16_ZERO;
3603  Ops.push_back(TexHandle);
3604  Ops.push_back(N->getOperand(2));
3605  Ops.push_back(Chain);
3606  break;
3608  Opc = NVPTX::SULD_1D_I32_ZERO;
3609  Ops.push_back(TexHandle);
3610  Ops.push_back(N->getOperand(2));
3611  Ops.push_back(Chain);
3612  break;
3614  Opc = NVPTX::SULD_1D_I64_ZERO;
3615  Ops.push_back(TexHandle);
3616  Ops.push_back(N->getOperand(2));
3617  Ops.push_back(Chain);
3618  break;
3620  Opc = NVPTX::SULD_1D_V2I8_ZERO;
3621  Ops.push_back(TexHandle);
3622  Ops.push_back(N->getOperand(2));
3623  Ops.push_back(Chain);
3624  break;
3626  Opc = NVPTX::SULD_1D_V2I16_ZERO;
3627  Ops.push_back(TexHandle);
3628  Ops.push_back(N->getOperand(2));
3629  Ops.push_back(Chain);
3630  break;
3632  Opc = NVPTX::SULD_1D_V2I32_ZERO;
3633  Ops.push_back(TexHandle);
3634  Ops.push_back(N->getOperand(2));
3635  Ops.push_back(Chain);
3636  break;
3638  Opc = NVPTX::SULD_1D_V2I64_ZERO;
3639  Ops.push_back(TexHandle);
3640  Ops.push_back(N->getOperand(2));
3641  Ops.push_back(Chain);
3642  break;
3644  Opc = NVPTX::SULD_1D_V4I8_ZERO;
3645  Ops.push_back(TexHandle);
3646  Ops.push_back(N->getOperand(2));
3647  Ops.push_back(Chain);
3648  break;
3650  Opc = NVPTX::SULD_1D_V4I16_ZERO;
3651  Ops.push_back(TexHandle);
3652  Ops.push_back(N->getOperand(2));
3653  Ops.push_back(Chain);
3654  break;
3656  Opc = NVPTX::SULD_1D_V4I32_ZERO;
3657  Ops.push_back(TexHandle);
3658  Ops.push_back(N->getOperand(2));
3659  Ops.push_back(Chain);
3660  break;
3662  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3663  Ops.push_back(TexHandle);
3664  Ops.push_back(N->getOperand(2));
3665  Ops.push_back(N->getOperand(3));
3666  Ops.push_back(Chain);
3667  break;
3669  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3670  Ops.push_back(TexHandle);
3671  Ops.push_back(N->getOperand(2));
3672  Ops.push_back(N->getOperand(3));
3673  Ops.push_back(Chain);
3674  break;
3676  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3677  Ops.push_back(TexHandle);
3678  Ops.push_back(N->getOperand(2));
3679  Ops.push_back(N->getOperand(3));
3680  Ops.push_back(Chain);
3681  break;
3683  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3684  Ops.push_back(TexHandle);
3685  Ops.push_back(N->getOperand(2));
3686  Ops.push_back(N->getOperand(3));
3687  Ops.push_back(Chain);
3688  break;
3690  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3691  Ops.push_back(TexHandle);
3692  Ops.push_back(N->getOperand(2));
3693  Ops.push_back(N->getOperand(3));
3694  Ops.push_back(Chain);
3695  break;
3697  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3698  Ops.push_back(TexHandle);
3699  Ops.push_back(N->getOperand(2));
3700  Ops.push_back(N->getOperand(3));
3701  Ops.push_back(Chain);
3702  break;
3704  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3705  Ops.push_back(TexHandle);
3706  Ops.push_back(N->getOperand(2));
3707  Ops.push_back(N->getOperand(3));
3708  Ops.push_back(Chain);
3709  break;
3711  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3712  Ops.push_back(TexHandle);
3713  Ops.push_back(N->getOperand(2));
3714  Ops.push_back(N->getOperand(3));
3715  Ops.push_back(Chain);
3716  break;
3718  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3719  Ops.push_back(TexHandle);
3720  Ops.push_back(N->getOperand(2));
3721  Ops.push_back(N->getOperand(3));
3722  Ops.push_back(Chain);
3723  break;
3725  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3726  Ops.push_back(TexHandle);
3727  Ops.push_back(N->getOperand(2));
3728  Ops.push_back(N->getOperand(3));
3729  Ops.push_back(Chain);
3730  break;
3732  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3733  Ops.push_back(TexHandle);
3734  Ops.push_back(N->getOperand(2));
3735  Ops.push_back(N->getOperand(3));
3736  Ops.push_back(Chain);
3737  break;
3739  Opc = NVPTX::SULD_2D_I8_ZERO;
3740  Ops.push_back(TexHandle);
3741  Ops.push_back(N->getOperand(2));
3742  Ops.push_back(N->getOperand(3));
3743  Ops.push_back(Chain);
3744  break;
3746  Opc = NVPTX::SULD_2D_I16_ZERO;
3747  Ops.push_back(TexHandle);
3748  Ops.push_back(N->getOperand(2));
3749  Ops.push_back(N->getOperand(3));
3750  Ops.push_back(Chain);
3751  break;
3753  Opc = NVPTX::SULD_2D_I32_ZERO;
3754  Ops.push_back(TexHandle);
3755  Ops.push_back(N->getOperand(2));
3756  Ops.push_back(N->getOperand(3));
3757  Ops.push_back(Chain);
3758  break;
3760  Opc = NVPTX::SULD_2D_I64_ZERO;
3761  Ops.push_back(TexHandle);
3762  Ops.push_back(N->getOperand(2));
3763  Ops.push_back(N->getOperand(3));
3764  Ops.push_back(Chain);
3765  break;
3767  Opc = NVPTX::SULD_2D_V2I8_ZERO;
3768  Ops.push_back(TexHandle);
3769  Ops.push_back(N->getOperand(2));
3770  Ops.push_back(N->getOperand(3));
3771  Ops.push_back(Chain);
3772  break;
3774  Opc = NVPTX::SULD_2D_V2I16_ZERO;
3775  Ops.push_back(TexHandle);
3776  Ops.push_back(N->getOperand(2));
3777  Ops.push_back(N->getOperand(3));
3778  Ops.push_back(Chain);
3779  break;
3781  Opc = NVPTX::SULD_2D_V2I32_ZERO;
3782  Ops.push_back(TexHandle);
3783  Ops.push_back(N->getOperand(2));
3784  Ops.push_back(N->getOperand(3));
3785  Ops.push_back(Chain);
3786  break;
3788  Opc = NVPTX::SULD_2D_V2I64_ZERO;
3789  Ops.push_back(TexHandle);
3790  Ops.push_back(N->getOperand(2));
3791  Ops.push_back(N->getOperand(3));
3792  Ops.push_back(Chain);
3793  break;
3795  Opc = NVPTX::SULD_2D_V4I8_ZERO;
3796  Ops.push_back(TexHandle);
3797  Ops.push_back(N->getOperand(2));
3798  Ops.push_back(N->getOperand(3));
3799  Ops.push_back(Chain);
3800  break;
3802  Opc = NVPTX::SULD_2D_V4I16_ZERO;
3803  Ops.push_back(TexHandle);
3804  Ops.push_back(N->getOperand(2));
3805  Ops.push_back(N->getOperand(3));
3806  Ops.push_back(Chain);
3807  break;
3809  Opc = NVPTX::SULD_2D_V4I32_ZERO;
3810  Ops.push_back(TexHandle);
3811  Ops.push_back(N->getOperand(2));
3812  Ops.push_back(N->getOperand(3));
3813  Ops.push_back(Chain);
3814  break;
3816  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3817  Ops.push_back(TexHandle);
3818  Ops.push_back(N->getOperand(2));
3819  Ops.push_back(N->getOperand(3));
3820  Ops.push_back(N->getOperand(4));
3821  Ops.push_back(Chain);
3822  break;
3824  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3825  Ops.push_back(TexHandle);
3826  Ops.push_back(N->getOperand(2));
3827  Ops.push_back(N->getOperand(3));
3828  Ops.push_back(N->getOperand(4));
3829  Ops.push_back(Chain);
3830  break;
3832  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3833  Ops.push_back(TexHandle);
3834  Ops.push_back(N->getOperand(2));
3835  Ops.push_back(N->getOperand(3));
3836  Ops.push_back(N->getOperand(4));
3837  Ops.push_back(Chain);
3838  break;
3840  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3841  Ops.push_back(TexHandle);
3842  Ops.push_back(N->getOperand(2));
3843  Ops.push_back(N->getOperand(3));
3844  Ops.push_back(N->getOperand(4));
3845  Ops.push_back(Chain);
3846  break;
3848  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3849  Ops.push_back(TexHandle);
3850  Ops.push_back(N->getOperand(2));
3851  Ops.push_back(N->getOperand(3));
3852  Ops.push_back(N->getOperand(4));
3853  Ops.push_back(Chain);
3854  break;
3856  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3857  Ops.push_back(TexHandle);
3858  Ops.push_back(N->getOperand(2));
3859  Ops.push_back(N->getOperand(3));
3860  Ops.push_back(N->getOperand(4));
3861  Ops.push_back(Chain);
3862  break;
3864  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3865  Ops.push_back(TexHandle);
3866  Ops.push_back(N->getOperand(2));
3867  Ops.push_back(N->getOperand(3));
3868  Ops.push_back(N->getOperand(4));
3869  Ops.push_back(Chain);
3870  break;
3872  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3873  Ops.push_back(TexHandle);
3874  Ops.push_back(N->getOperand(2));
3875  Ops.push_back(N->getOperand(3));
3876  Ops.push_back(N->getOperand(4));
3877  Ops.push_back(Chain);
3878  break;
3880  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3881  Ops.push_back(TexHandle);
3882  Ops.push_back(N->getOperand(2));
3883  Ops.push_back(N->getOperand(3));
3884  Ops.push_back(N->getOperand(4));
3885  Ops.push_back(Chain);
3886  break;
3888  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3889  Ops.push_back(TexHandle);
3890  Ops.push_back(N->getOperand(2));
3891  Ops.push_back(N->getOperand(3));
3892  Ops.push_back(N->getOperand(4));
3893  Ops.push_back(Chain);
3894  break;
3896  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3897  Ops.push_back(TexHandle);
3898  Ops.push_back(N->getOperand(2));
3899  Ops.push_back(N->getOperand(3));
3900  Ops.push_back(N->getOperand(4));
3901  Ops.push_back(Chain);
3902  break;
3904  Opc = NVPTX::SULD_3D_I8_ZERO;
3905  Ops.push_back(TexHandle);
3906  Ops.push_back(N->getOperand(2));
3907  Ops.push_back(N->getOperand(3));
3908  Ops.push_back(N->getOperand(4));
3909  Ops.push_back(Chain);
3910  break;
3912  Opc = NVPTX::SULD_3D_I16_ZERO;
3913  Ops.push_back(TexHandle);
3914  Ops.push_back(N->getOperand(2));
3915  Ops.push_back(N->getOperand(3));
3916  Ops.push_back(N->getOperand(4));
3917  Ops.push_back(Chain);
3918  break;
3920  Opc = NVPTX::SULD_3D_I32_ZERO;
3921  Ops.push_back(TexHandle);
3922  Ops.push_back(N->getOperand(2));
3923  Ops.push_back(N->getOperand(3));
3924  Ops.push_back(N->getOperand(4));
3925  Ops.push_back(Chain);
3926  break;
3928  Opc = NVPTX::SULD_3D_I64_ZERO;
3929  Ops.push_back(TexHandle);
3930  Ops.push_back(N->getOperand(2));
3931  Ops.push_back(N->getOperand(3));
3932  Ops.push_back(N->getOperand(4));
3933  Ops.push_back(Chain);
3934  break;
3936  Opc = NVPTX::SULD_3D_V2I8_ZERO;
3937  Ops.push_back(TexHandle);
3938  Ops.push_back(N->getOperand(2));
3939  Ops.push_back(N->getOperand(3));
3940  Ops.push_back(N->getOperand(4));
3941  Ops.push_back(Chain);
3942  break;
3944  Opc = NVPTX::SULD_3D_V2I16_ZERO;
3945  Ops.push_back(TexHandle);
3946  Ops.push_back(N->getOperand(2));
3947  Ops.push_back(N->getOperand(3));
3948  Ops.push_back(N->getOperand(4));
3949  Ops.push_back(Chain);
3950  break;
3952  Opc = NVPTX::SULD_3D_V2I32_ZERO;
3953  Ops.push_back(TexHandle);
3954  Ops.push_back(N->getOperand(2));
3955  Ops.push_back(N->getOperand(3));
3956  Ops.push_back(N->getOperand(4));
3957  Ops.push_back(Chain);
3958  break;
3960  Opc = NVPTX::SULD_3D_V2I64_ZERO;
3961  Ops.push_back(TexHandle);
3962  Ops.push_back(N->getOperand(2));
3963  Ops.push_back(N->getOperand(3));
3964  Ops.push_back(N->getOperand(4));
3965  Ops.push_back(Chain);
3966  break;
3968  Opc = NVPTX::SULD_3D_V4I8_ZERO;
3969  Ops.push_back(TexHandle);
3970  Ops.push_back(N->getOperand(2));
3971  Ops.push_back(N->getOperand(3));
3972  Ops.push_back(N->getOperand(4));
3973  Ops.push_back(Chain);
3974  break;
3976  Opc = NVPTX::SULD_3D_V4I16_ZERO;
3977  Ops.push_back(TexHandle);
3978  Ops.push_back(N->getOperand(2));
3979  Ops.push_back(N->getOperand(3));
3980  Ops.push_back(N->getOperand(4));
3981  Ops.push_back(Chain);
3982  break;
3984  Opc = NVPTX::SULD_3D_V4I32_ZERO;
3985  Ops.push_back(TexHandle);
3986  Ops.push_back(N->getOperand(2));
3987  Ops.push_back(N->getOperand(3));
3988  Ops.push_back(N->getOperand(4));
3989  Ops.push_back(Chain);
3990  break;
3991  }
3992  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3993  return true;
3994 }
3995 
3996 
3997 /// SelectBFE - Look for instruction sequences that can be made more efficient
3998 /// by using the 'bfe' (bit-field extract) PTX instruction
3999 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4000  SDLoc DL(N);
4001  SDValue LHS = N->getOperand(0);
4002  SDValue RHS = N->getOperand(1);
4003  SDValue Len;
4004  SDValue Start;
4005  SDValue Val;
4006  bool IsSigned = false;
4007 
4008  if (N->getOpcode() == ISD::AND) {
4009  // Canonicalize the operands
4010  // We want 'and %val, %mask'
4011  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4012  std::swap(LHS, RHS);
4013  }
4014 
4016  if (!Mask) {
4017  // We need a constant mask on the RHS of the AND
4018  return false;
4019  }
4020 
4021  // Extract the mask bits
4022  uint64_t MaskVal = Mask->getZExtValue();
4023  if (!isMask_64(MaskVal)) {
4024  // We *could* handle shifted masks here, but doing so would require an
4025  // 'and' operation to fix up the low-order bits so we would trade
4026  // shr+and for bfe+and, which has the same throughput
4027  return false;
4028  }
4029 
4030  // How many bits are in our mask?
4031  uint64_t NumBits = countTrailingOnes(MaskVal);
4032  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4033 
4034  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4035  // We have a 'srl/and' pair, extract the effective start bit and length
4036  Val = LHS.getNode()->getOperand(0);
4037  Start = LHS.getNode()->getOperand(1);
4038  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4039  if (StartConst) {
4040  uint64_t StartVal = StartConst->getZExtValue();
4041  // How many "good" bits do we have left? "good" is defined here as bits
4042  // that exist in the original value, not shifted in.
4043  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
4044  if (NumBits > GoodBits) {
4045  // Do not handle the case where bits have been shifted in. In theory
4046  // we could handle this, but the cost is likely higher than just
4047  // emitting the srl/and pair.
4048  return false;
4049  }
4050  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4051  } else {
4052  // Do not handle the case where the shift amount (can be zero if no srl
4053  // was found) is not constant. We could handle this case, but it would
4054  // require run-time logic that would be more expensive than just
4055  // emitting the srl/and pair.
4056  return false;
4057  }
4058  } else {
4059  // Do not handle the case where the LHS of the and is not a shift. While
4060  // it would be trivial to handle this case, it would just transform
4061  // 'and' -> 'bfe', but 'and' has higher-throughput.
4062  return false;
4063  }
4064  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4065  if (LHS->getOpcode() == ISD::AND) {
4066  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4067  if (!ShiftCnst) {
4068  // Shift amount must be constant
4069  return false;
4070  }
4071 
4072  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4073 
4074  SDValue AndLHS = LHS->getOperand(0);
4075  SDValue AndRHS = LHS->getOperand(1);
4076 
4077  // Canonicalize the AND to have the mask on the RHS
4078  if (isa<ConstantSDNode>(AndLHS)) {
4079  std::swap(AndLHS, AndRHS);
4080  }
4081 
4082  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4083  if (!MaskCnst) {
4084  // Mask must be constant
4085  return false;
4086  }
4087 
4088  uint64_t MaskVal = MaskCnst->getZExtValue();
4089  uint64_t NumZeros;
4090  uint64_t NumBits;
4091  if (isMask_64(MaskVal)) {
4092  NumZeros = 0;
4093  // The number of bits in the result bitfield will be the number of
4094  // trailing ones (the AND) minus the number of bits we shift off
4095  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4096  } else if (isShiftedMask_64(MaskVal)) {
4097  NumZeros = countTrailingZeros(MaskVal);
4098  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4099  // The number of bits in the result bitfield will be the number of
4100  // trailing zeros plus the number of set bits in the mask minus the
4101  // number of bits we shift off
4102  NumBits = NumZeros + NumOnes - ShiftAmt;
4103  } else {
4104  // This is not a mask we can handle
4105  return false;
4106  }
4107 
4108  if (ShiftAmt < NumZeros) {
4109  // Handling this case would require extra logic that would make this
4110  // transformation non-profitable
4111  return false;
4112  }
4113 
4114  Val = AndLHS;
4115  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4116  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4117  } else if (LHS->getOpcode() == ISD::SHL) {
4118  // Here, we have a pattern like:
4119  //
4120  // (sra (shl val, NN), MM)
4121  // or
4122  // (srl (shl val, NN), MM)
4123  //
4124  // If MM >= NN, we can efficiently optimize this with bfe
4125  Val = LHS->getOperand(0);
4126 
4127  SDValue ShlRHS = LHS->getOperand(1);
4128  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4129  if (!ShlCnst) {
4130  // Shift amount must be constant
4131  return false;
4132  }
4133  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4134 
4135  SDValue ShrRHS = RHS;
4136  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4137  if (!ShrCnst) {
4138  // Shift amount must be constant
4139  return false;
4140  }
4141  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4142 
4143  // To avoid extra codegen and be profitable, we need Outer >= Inner
4144  if (OuterShiftAmt < InnerShiftAmt) {
4145  return false;
4146  }
4147 
4148  // If the outer shift is more than the type size, we have no bitfield to
4149  // extract (since we also check that the inner shift is <= the outer shift
4150  // then this also implies that the inner shift is < the type size)
4151  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
4152  return false;
4153  }
4154 
4155  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
4156  MVT::i32);
4157  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
4158  DL, MVT::i32);
4159 
4160  if (N->getOpcode() == ISD::SRA) {
4161  // If we have a arithmetic right shift, we need to use the signed bfe
4162  // variant
4163  IsSigned = true;
4164  }
4165  } else {
4166  // No can do...
4167  return false;
4168  }
4169  } else {
4170  // No can do...
4171  return false;
4172  }
4173 
4174 
4175  unsigned Opc;
4176  // For the BFE operations we form here from "and" and "srl", always use the
4177  // unsigned variants.
4178  if (Val.getValueType() == MVT::i32) {
4179  if (IsSigned) {
4180  Opc = NVPTX::BFE_S32rii;
4181  } else {
4182  Opc = NVPTX::BFE_U32rii;
4183  }
4184  } else if (Val.getValueType() == MVT::i64) {
4185  if (IsSigned) {
4186  Opc = NVPTX::BFE_S64rii;
4187  } else {
4188  Opc = NVPTX::BFE_U64rii;
4189  }
4190  } else {
4191  // We cannot handle this type
4192  return false;
4193  }
4194 
4195  SDValue Ops[] = {
4196  Val, Start, Len
4197  };
4198 
4199  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
4200  return true;
4201 }
4202 
4203 // SelectDirectAddr - Match a direct address for DAG.
4204 // A direct address could be a globaladdress or externalsymbol.
4205 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4206  // Return true if TGA or ES.
4207  if (N.getOpcode() == ISD::TargetGlobalAddress ||
4209  Address = N;
4210  return true;
4211  }
4212  if (N.getOpcode() == NVPTXISD::Wrapper) {
4213  Address = N.getOperand(0);
4214  return true;
4215  }
4216  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
4217  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
4218  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
4219  CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
4220  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
4221  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
4222  }
4223  return false;
4224 }
4225 
4226 // symbol+offset
4227 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4228  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4229  if (Addr.getOpcode() == ISD::ADD) {
4230  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4231  SDValue base = Addr.getOperand(0);
4232  if (SelectDirectAddr(base, Base)) {
4233  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4234  mvt);
4235  return true;
4236  }
4237  }
4238  }
4239  return false;
4240 }
4241 
4242 // symbol+offset
4243 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4244  SDValue &Base, SDValue &Offset) {
4245  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4246 }
4247 
4248 // symbol+offset
4249 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4250  SDValue &Base, SDValue &Offset) {
4251  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4252 }
4253 
4254 // register+offset
4255 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4256  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4257  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4258  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4259  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
4260  return true;
4261  }
4262  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4264  return false; // direct calls.
4265 
4266  if (Addr.getOpcode() == ISD::ADD) {
4267  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
4268  return false;
4269  }
4270  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4271  if (FrameIndexSDNode *FIN =
4272  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
4273  // Constant offset from frame ref.
4274  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4275  else
4276  Base = Addr.getOperand(0);
4277  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4278  mvt);
4279  return true;
4280  }
4281  }
4282  return false;
4283 }
4284 
4285 // register+offset
4286 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
4287  SDValue &Base, SDValue &Offset) {
4288  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
4289 }
4290 
4291 // register+offset
4292 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
4293  SDValue &Base, SDValue &Offset) {
4294  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
4295 }
4296 
4297 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
4298  unsigned int spN) const {
4299  const Value *Src = nullptr;
4300  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
4301  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
4302  return true;
4303  Src = mN->getMemOperand()->getValue();
4304  }
4305  if (!Src)
4306  return false;
4307  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
4308  return (PT->getAddressSpace() == spN);
4309  return false;
4310 }
4311 
4312 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
4313 /// inline asm expressions.
4315  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
4316  SDValue Op0, Op1;
4317  switch (ConstraintID) {
4318  default:
4319  return true;
4320  case InlineAsm::Constraint_m: // memory
4321  if (SelectDirectAddr(Op, Op0)) {
4322  OutOps.push_back(Op0);
4323  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
4324  return false;
4325  }
4326  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
4327  OutOps.push_back(Op0);
4328  OutOps.push_back(Op1);
4329  return false;
4330  }
4331  break;
4332  }
4333  return true;
4334 }
4335 
4336 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
4337 /// conversion from \p SrcTy to \p DestTy.
4338 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
4339  bool IsSigned) {
4340  switch (SrcTy.SimpleTy) {
4341  default:
4342  llvm_unreachable("Unhandled source type");
4343  case MVT::i8:
4344  switch (DestTy.SimpleTy) {
4345  default:
4346  llvm_unreachable("Unhandled dest type");
4347  case MVT::i16:
4348  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
4349  case MVT::i32:
4350  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
4351  case MVT::i64:
4352  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
4353  }
4354  case MVT::i16:
4355  switch (DestTy.SimpleTy) {
4356  default:
4357  llvm_unreachable("Unhandled dest type");
4358  case MVT::i8:
4359  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
4360  case MVT::i32:
4361  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
4362  case MVT::i64:
4363  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
4364  }
4365  case MVT::i32:
4366  switch (DestTy.SimpleTy) {
4367  default:
4368  llvm_unreachable("Unhandled dest type");
4369  case MVT::i8:
4370  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
4371  case MVT::i16:
4372  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
4373  case MVT::i64:
4374  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
4375  }
4376  case MVT::i64:
4377  switch (DestTy.SimpleTy) {
4378  default:
4379  llvm_unreachable("Unhandled dest type");
4380  case MVT::i8:
4381  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
4382  case MVT::i16:
4383  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
4384  case MVT::i32:
4385  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
4386  }
4387  }
4388 }
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:544
bool isInvariant() const
EVT getValueType() const
Return the ValueType of the referenced return value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:103
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool isVector() const
Return true if this is a vector value type.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool useF32FTZ(const MachineFunction &MF) const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const NVPTXSubtarget * Subtarget
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
MachineFunction * MF
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:378
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isKernelFunction(const Function &F)
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:594
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:129
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:909
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
#define EQ(a, b)
Definition: regexec.c:112
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:546
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:574
unsigned getSrcAddressSpace() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
CodeGenOpt::Level OptLevel
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273