LLVM  3.7.0
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
18 #include "llvm/Support/Debug.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "nvptx-isel"
26 
28  "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29  cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30  " IEEE Compliant F32 div.rnd if available."),
31  cl::init(2));
32 
33 static cl::opt<bool>
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35  cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
36  cl::init(true));
37 
38 static cl::opt<bool>
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40  cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
41  cl::init(false));
42 
43 
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
47  llvm::CodeGenOpt::Level OptLevel) {
48  return new NVPTXDAGToDAGISel(TM, OptLevel);
49 }
50 
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52  CodeGenOpt::Level OptLevel)
53  : SelectionDAGISel(tm, OptLevel), TM(tm) {
54  doMulWide = (OptLevel > 0);
55 }
56 
57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59  return SelectionDAGISel::runOnMachineFunction(MF);
60 }
61 
62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63  if (UsePrecDivF32.getNumOccurrences() > 0) {
64  // If nvptx-prec-div32=N is used on the command-line, always honor it
65  return UsePrecDivF32;
66  } else {
67  // Otherwise, use div.approx if fast math is enabled
68  if (TM.Options.UnsafeFPMath)
69  return 0;
70  else
71  return 2;
72  }
73 }
74 
75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77  // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78  return UsePrecSqrtF32;
79  } else {
80  // Otherwise, use sqrt.approx if fast math is enabled
81  return !TM.Options.UnsafeFPMath;
82  }
83 }
84 
85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
86  if (FtzEnabled.getNumOccurrences() > 0) {
87  // If nvptx-f32ftz is used on the command-line, always honor it
88  return FtzEnabled;
89  } else {
90  const Function *F = MF->getFunction();
91  // Otherwise, check for an nvptx-f32ftz attribute on the function
92  if (F->hasFnAttribute("nvptx-f32ftz"))
93  return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
94  else
95  return false;
96  }
97 }
98 
99 bool NVPTXDAGToDAGISel::allowFMA() const {
100  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
101  return TL->allowFMA(*MF, OptLevel);
102 }
103 
104 /// Select - Select instructions not customized! Used for
105 /// expanded, promoted and normal instructions.
107 
108  if (N->isMachineOpcode()) {
109  N->setNodeId(-1);
110  return nullptr; // Already selected.
111  }
112 
113  SDNode *ResNode = nullptr;
114  switch (N->getOpcode()) {
115  case ISD::LOAD:
116  ResNode = SelectLoad(N);
117  break;
118  case ISD::STORE:
119  ResNode = SelectStore(N);
120  break;
121  case NVPTXISD::LoadV2:
122  case NVPTXISD::LoadV4:
123  ResNode = SelectLoadVector(N);
124  break;
125  case NVPTXISD::LDGV2:
126  case NVPTXISD::LDGV4:
127  case NVPTXISD::LDUV2:
128  case NVPTXISD::LDUV4:
129  ResNode = SelectLDGLDU(N);
130  break;
131  case NVPTXISD::StoreV2:
132  case NVPTXISD::StoreV4:
133  ResNode = SelectStoreVector(N);
134  break;
135  case NVPTXISD::LoadParam:
138  ResNode = SelectLoadParam(N);
139  break;
143  ResNode = SelectStoreRetval(N);
144  break;
150  ResNode = SelectStoreParam(N);
151  break;
153  ResNode = SelectIntrinsicNoChain(N);
154  break;
156  ResNode = SelectIntrinsicChain(N);
157  break;
326  ResNode = SelectTextureIntrinsic(N);
327  break;
493  ResNode = SelectSurfaceIntrinsic(N);
494  break;
495  case ISD::AND:
496  case ISD::SRA:
497  case ISD::SRL:
498  // Try to select BFE
499  ResNode = SelectBFE(N);
500  break;
501  case ISD::ADDRSPACECAST:
502  ResNode = SelectAddrSpaceCast(N);
503  break;
504  default:
505  break;
506  }
507  if (ResNode)
508  return ResNode;
509  return SelectCode(N);
510 }
511 
512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
513  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
514  switch (IID) {
515  default:
516  return NULL;
517  case Intrinsic::nvvm_ldg_global_f:
518  case Intrinsic::nvvm_ldg_global_i:
519  case Intrinsic::nvvm_ldg_global_p:
520  case Intrinsic::nvvm_ldu_global_f:
521  case Intrinsic::nvvm_ldu_global_i:
522  case Intrinsic::nvvm_ldu_global_p:
523  return SelectLDGLDU(N);
524  }
525 }
526 
527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
528  const Value *Src = N->getMemOperand()->getValue();
529 
530  if (!Src)
532 
533  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
534  switch (PT->getAddressSpace()) {
541  default: break;
542  }
543  }
545 }
546 
547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
548  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
549  switch (IID) {
550  default:
551  return nullptr;
552  case Intrinsic::nvvm_texsurf_handle_internal:
553  return SelectTexSurfHandle(N);
554  }
555 }
556 
557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
558  // Op 0 is the intrinsic ID
559  SDValue Wrapper = N->getOperand(1);
560  SDValue GlobalVal = Wrapper.getOperand(0);
561  return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
562  GlobalVal);
563 }
564 
565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
566  SDValue Src = N->getOperand(0);
567  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
568  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
569  unsigned DstAddrSpace = CastN->getDestAddressSpace();
570 
571  assert(SrcAddrSpace != DstAddrSpace &&
572  "addrspacecast must be between different address spaces");
573 
574  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
575  // Specific to generic
576  unsigned Opc;
577  switch (SrcAddrSpace) {
578  default: report_fatal_error("Bad address space in addrspacecast");
580  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
581  break;
583  Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
584  break;
585  case ADDRESS_SPACE_CONST:
586  Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
587  break;
588  case ADDRESS_SPACE_LOCAL:
589  Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
590  break;
591  }
592  return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
593  } else {
594  // Generic to specific
595  if (SrcAddrSpace != 0)
596  report_fatal_error("Cannot cast between two non-generic address spaces");
597  unsigned Opc;
598  switch (DstAddrSpace) {
599  default: report_fatal_error("Bad address space in addrspacecast");
601  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
602  : NVPTX::cvta_to_global_yes;
603  break;
605  Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
606  : NVPTX::cvta_to_shared_yes;
607  break;
608  case ADDRESS_SPACE_CONST:
609  Opc =
610  TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
611  break;
612  case ADDRESS_SPACE_LOCAL:
613  Opc =
614  TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
615  break;
616  case ADDRESS_SPACE_PARAM:
617  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
618  : NVPTX::nvvm_ptr_gen_to_param;
619  break;
620  }
621  return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
622  }
623 }
624 
625 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
626  SDLoc dl(N);
627  LoadSDNode *LD = cast<LoadSDNode>(N);
628  EVT LoadedVT = LD->getMemoryVT();
629  SDNode *NVPTXLD = nullptr;
630 
631  // do not support pre/post inc/dec
632  if (LD->isIndexed())
633  return nullptr;
634 
635  if (!LoadedVT.isSimple())
636  return nullptr;
637 
638  // Address Space Setting
639  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
640 
641  // Volatile Setting
642  // - .volatile is only availalble for .global and .shared
643  bool isVolatile = LD->isVolatile();
644  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
645  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
646  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
647  isVolatile = false;
648 
649  // Vector Setting
650  MVT SimpleVT = LoadedVT.getSimpleVT();
651  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
652  if (SimpleVT.isVector()) {
653  unsigned num = SimpleVT.getVectorNumElements();
654  if (num == 2)
655  vecType = NVPTX::PTXLdStInstCode::V2;
656  else if (num == 4)
657  vecType = NVPTX::PTXLdStInstCode::V4;
658  else
659  return nullptr;
660  }
661 
662  // Type Setting: fromType + fromTypeWidth
663  //
664  // Sign : ISD::SEXTLOAD
665  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
666  // type is integer
667  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
668  MVT ScalarVT = SimpleVT.getScalarType();
669  // Read at least 8 bits (predicates are stored as 8-bit values)
670  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
671  unsigned int fromType;
672  if ((LD->getExtensionType() == ISD::SEXTLOAD))
674  else if (ScalarVT.isFloatingPoint())
676  else
678 
679  // Create the machine instruction DAG
680  SDValue Chain = N->getOperand(0);
681  SDValue N1 = N->getOperand(1);
682  SDValue Addr;
683  SDValue Offset, Base;
684  unsigned Opcode;
686 
687  if (SelectDirectAddr(N1, Addr)) {
688  switch (TargetVT) {
689  case MVT::i8:
690  Opcode = NVPTX::LD_i8_avar;
691  break;
692  case MVT::i16:
693  Opcode = NVPTX::LD_i16_avar;
694  break;
695  case MVT::i32:
696  Opcode = NVPTX::LD_i32_avar;
697  break;
698  case MVT::i64:
699  Opcode = NVPTX::LD_i64_avar;
700  break;
701  case MVT::f32:
702  Opcode = NVPTX::LD_f32_avar;
703  break;
704  case MVT::f64:
705  Opcode = NVPTX::LD_f64_avar;
706  break;
707  default:
708  return nullptr;
709  }
710  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
711  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
712  getI32Imm(fromTypeWidth, dl), Addr, Chain };
713  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
714  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
715  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
716  switch (TargetVT) {
717  case MVT::i8:
718  Opcode = NVPTX::LD_i8_asi;
719  break;
720  case MVT::i16:
721  Opcode = NVPTX::LD_i16_asi;
722  break;
723  case MVT::i32:
724  Opcode = NVPTX::LD_i32_asi;
725  break;
726  case MVT::i64:
727  Opcode = NVPTX::LD_i64_asi;
728  break;
729  case MVT::f32:
730  Opcode = NVPTX::LD_f32_asi;
731  break;
732  case MVT::f64:
733  Opcode = NVPTX::LD_f64_asi;
734  break;
735  default:
736  return nullptr;
737  }
738  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
739  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
740  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
741  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
742  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
743  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
744  if (TM.is64Bit()) {
745  switch (TargetVT) {
746  case MVT::i8:
747  Opcode = NVPTX::LD_i8_ari_64;
748  break;
749  case MVT::i16:
750  Opcode = NVPTX::LD_i16_ari_64;
751  break;
752  case MVT::i32:
753  Opcode = NVPTX::LD_i32_ari_64;
754  break;
755  case MVT::i64:
756  Opcode = NVPTX::LD_i64_ari_64;
757  break;
758  case MVT::f32:
759  Opcode = NVPTX::LD_f32_ari_64;
760  break;
761  case MVT::f64:
762  Opcode = NVPTX::LD_f64_ari_64;
763  break;
764  default:
765  return nullptr;
766  }
767  } else {
768  switch (TargetVT) {
769  case MVT::i8:
770  Opcode = NVPTX::LD_i8_ari;
771  break;
772  case MVT::i16:
773  Opcode = NVPTX::LD_i16_ari;
774  break;
775  case MVT::i32:
776  Opcode = NVPTX::LD_i32_ari;
777  break;
778  case MVT::i64:
779  Opcode = NVPTX::LD_i64_ari;
780  break;
781  case MVT::f32:
782  Opcode = NVPTX::LD_f32_ari;
783  break;
784  case MVT::f64:
785  Opcode = NVPTX::LD_f64_ari;
786  break;
787  default:
788  return nullptr;
789  }
790  }
791  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
792  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
793  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
794  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
795  } else {
796  if (TM.is64Bit()) {
797  switch (TargetVT) {
798  case MVT::i8:
799  Opcode = NVPTX::LD_i8_areg_64;
800  break;
801  case MVT::i16:
802  Opcode = NVPTX::LD_i16_areg_64;
803  break;
804  case MVT::i32:
805  Opcode = NVPTX::LD_i32_areg_64;
806  break;
807  case MVT::i64:
808  Opcode = NVPTX::LD_i64_areg_64;
809  break;
810  case MVT::f32:
811  Opcode = NVPTX::LD_f32_areg_64;
812  break;
813  case MVT::f64:
814  Opcode = NVPTX::LD_f64_areg_64;
815  break;
816  default:
817  return nullptr;
818  }
819  } else {
820  switch (TargetVT) {
821  case MVT::i8:
822  Opcode = NVPTX::LD_i8_areg;
823  break;
824  case MVT::i16:
825  Opcode = NVPTX::LD_i16_areg;
826  break;
827  case MVT::i32:
828  Opcode = NVPTX::LD_i32_areg;
829  break;
830  case MVT::i64:
831  Opcode = NVPTX::LD_i64_areg;
832  break;
833  case MVT::f32:
834  Opcode = NVPTX::LD_f32_areg;
835  break;
836  case MVT::f64:
837  Opcode = NVPTX::LD_f64_areg;
838  break;
839  default:
840  return nullptr;
841  }
842  }
843  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
844  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
845  getI32Imm(fromTypeWidth, dl), N1, Chain };
846  NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
847  }
848 
849  if (NVPTXLD) {
851  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
852  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
853  }
854 
855  return NVPTXLD;
856 }
857 
858 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
859 
860  SDValue Chain = N->getOperand(0);
861  SDValue Op1 = N->getOperand(1);
862  SDValue Addr, Offset, Base;
863  unsigned Opcode;
864  SDLoc DL(N);
865  SDNode *LD;
866  MemSDNode *MemSD = cast<MemSDNode>(N);
867  EVT LoadedVT = MemSD->getMemoryVT();
868 
869  if (!LoadedVT.isSimple())
870  return nullptr;
871 
872  // Address Space Setting
873  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
874 
875  // Volatile Setting
876  // - .volatile is only availalble for .global and .shared
877  bool IsVolatile = MemSD->isVolatile();
878  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
879  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
880  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
881  IsVolatile = false;
882 
883  // Vector Setting
884  MVT SimpleVT = LoadedVT.getSimpleVT();
885 
886  // Type Setting: fromType + fromTypeWidth
887  //
888  // Sign : ISD::SEXTLOAD
889  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
890  // type is integer
891  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
892  MVT ScalarVT = SimpleVT.getScalarType();
893  // Read at least 8 bits (predicates are stored as 8-bit values)
894  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
895  unsigned int FromType;
896  // The last operand holds the original LoadSDNode::getExtensionType() value
897  unsigned ExtensionType = cast<ConstantSDNode>(
898  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
899  if (ExtensionType == ISD::SEXTLOAD)
901  else if (ScalarVT.isFloatingPoint())
903  else
905 
906  unsigned VecType;
907 
908  switch (N->getOpcode()) {
909  case NVPTXISD::LoadV2:
910  VecType = NVPTX::PTXLdStInstCode::V2;
911  break;
912  case NVPTXISD::LoadV4:
913  VecType = NVPTX::PTXLdStInstCode::V4;
914  break;
915  default:
916  return nullptr;
917  }
918 
919  EVT EltVT = N->getValueType(0);
920 
921  if (SelectDirectAddr(Op1, Addr)) {
922  switch (N->getOpcode()) {
923  default:
924  return nullptr;
925  case NVPTXISD::LoadV2:
926  switch (EltVT.getSimpleVT().SimpleTy) {
927  default:
928  return nullptr;
929  case MVT::i8:
930  Opcode = NVPTX::LDV_i8_v2_avar;
931  break;
932  case MVT::i16:
933  Opcode = NVPTX::LDV_i16_v2_avar;
934  break;
935  case MVT::i32:
936  Opcode = NVPTX::LDV_i32_v2_avar;
937  break;
938  case MVT::i64:
939  Opcode = NVPTX::LDV_i64_v2_avar;
940  break;
941  case MVT::f32:
942  Opcode = NVPTX::LDV_f32_v2_avar;
943  break;
944  case MVT::f64:
945  Opcode = NVPTX::LDV_f64_v2_avar;
946  break;
947  }
948  break;
949  case NVPTXISD::LoadV4:
950  switch (EltVT.getSimpleVT().SimpleTy) {
951  default:
952  return nullptr;
953  case MVT::i8:
954  Opcode = NVPTX::LDV_i8_v4_avar;
955  break;
956  case MVT::i16:
957  Opcode = NVPTX::LDV_i16_v4_avar;
958  break;
959  case MVT::i32:
960  Opcode = NVPTX::LDV_i32_v4_avar;
961  break;
962  case MVT::f32:
963  Opcode = NVPTX::LDV_f32_v4_avar;
964  break;
965  }
966  break;
967  }
968 
969  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
970  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
971  getI32Imm(FromTypeWidth, DL), Addr, Chain };
972  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
973  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
974  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
975  switch (N->getOpcode()) {
976  default:
977  return nullptr;
978  case NVPTXISD::LoadV2:
979  switch (EltVT.getSimpleVT().SimpleTy) {
980  default:
981  return nullptr;
982  case MVT::i8:
983  Opcode = NVPTX::LDV_i8_v2_asi;
984  break;
985  case MVT::i16:
986  Opcode = NVPTX::LDV_i16_v2_asi;
987  break;
988  case MVT::i32:
989  Opcode = NVPTX::LDV_i32_v2_asi;
990  break;
991  case MVT::i64:
992  Opcode = NVPTX::LDV_i64_v2_asi;
993  break;
994  case MVT::f32:
995  Opcode = NVPTX::LDV_f32_v2_asi;
996  break;
997  case MVT::f64:
998  Opcode = NVPTX::LDV_f64_v2_asi;
999  break;
1000  }
1001  break;
1002  case NVPTXISD::LoadV4:
1003  switch (EltVT.getSimpleVT().SimpleTy) {
1004  default:
1005  return nullptr;
1006  case MVT::i8:
1007  Opcode = NVPTX::LDV_i8_v4_asi;
1008  break;
1009  case MVT::i16:
1010  Opcode = NVPTX::LDV_i16_v4_asi;
1011  break;
1012  case MVT::i32:
1013  Opcode = NVPTX::LDV_i32_v4_asi;
1014  break;
1015  case MVT::f32:
1016  Opcode = NVPTX::LDV_f32_v4_asi;
1017  break;
1018  }
1019  break;
1020  }
1021 
1022  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1023  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1024  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1025  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1026  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1027  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1028  if (TM.is64Bit()) {
1029  switch (N->getOpcode()) {
1030  default:
1031  return nullptr;
1032  case NVPTXISD::LoadV2:
1033  switch (EltVT.getSimpleVT().SimpleTy) {
1034  default:
1035  return nullptr;
1036  case MVT::i8:
1037  Opcode = NVPTX::LDV_i8_v2_ari_64;
1038  break;
1039  case MVT::i16:
1040  Opcode = NVPTX::LDV_i16_v2_ari_64;
1041  break;
1042  case MVT::i32:
1043  Opcode = NVPTX::LDV_i32_v2_ari_64;
1044  break;
1045  case MVT::i64:
1046  Opcode = NVPTX::LDV_i64_v2_ari_64;
1047  break;
1048  case MVT::f32:
1049  Opcode = NVPTX::LDV_f32_v2_ari_64;
1050  break;
1051  case MVT::f64:
1052  Opcode = NVPTX::LDV_f64_v2_ari_64;
1053  break;
1054  }
1055  break;
1056  case NVPTXISD::LoadV4:
1057  switch (EltVT.getSimpleVT().SimpleTy) {
1058  default:
1059  return nullptr;
1060  case MVT::i8:
1061  Opcode = NVPTX::LDV_i8_v4_ari_64;
1062  break;
1063  case MVT::i16:
1064  Opcode = NVPTX::LDV_i16_v4_ari_64;
1065  break;
1066  case MVT::i32:
1067  Opcode = NVPTX::LDV_i32_v4_ari_64;
1068  break;
1069  case MVT::f32:
1070  Opcode = NVPTX::LDV_f32_v4_ari_64;
1071  break;
1072  }
1073  break;
1074  }
1075  } else {
1076  switch (N->getOpcode()) {
1077  default:
1078  return nullptr;
1079  case NVPTXISD::LoadV2:
1080  switch (EltVT.getSimpleVT().SimpleTy) {
1081  default:
1082  return nullptr;
1083  case MVT::i8:
1084  Opcode = NVPTX::LDV_i8_v2_ari;
1085  break;
1086  case MVT::i16:
1087  Opcode = NVPTX::LDV_i16_v2_ari;
1088  break;
1089  case MVT::i32:
1090  Opcode = NVPTX::LDV_i32_v2_ari;
1091  break;
1092  case MVT::i64:
1093  Opcode = NVPTX::LDV_i64_v2_ari;
1094  break;
1095  case MVT::f32:
1096  Opcode = NVPTX::LDV_f32_v2_ari;
1097  break;
1098  case MVT::f64:
1099  Opcode = NVPTX::LDV_f64_v2_ari;
1100  break;
1101  }
1102  break;
1103  case NVPTXISD::LoadV4:
1104  switch (EltVT.getSimpleVT().SimpleTy) {
1105  default:
1106  return nullptr;
1107  case MVT::i8:
1108  Opcode = NVPTX::LDV_i8_v4_ari;
1109  break;
1110  case MVT::i16:
1111  Opcode = NVPTX::LDV_i16_v4_ari;
1112  break;
1113  case MVT::i32:
1114  Opcode = NVPTX::LDV_i32_v4_ari;
1115  break;
1116  case MVT::f32:
1117  Opcode = NVPTX::LDV_f32_v4_ari;
1118  break;
1119  }
1120  break;
1121  }
1122  }
1123 
1124  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1125  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1126  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1127 
1128  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1129  } else {
1130  if (TM.is64Bit()) {
1131  switch (N->getOpcode()) {
1132  default:
1133  return nullptr;
1134  case NVPTXISD::LoadV2:
1135  switch (EltVT.getSimpleVT().SimpleTy) {
1136  default:
1137  return nullptr;
1138  case MVT::i8:
1139  Opcode = NVPTX::LDV_i8_v2_areg_64;
1140  break;
1141  case MVT::i16:
1142  Opcode = NVPTX::LDV_i16_v2_areg_64;
1143  break;
1144  case MVT::i32:
1145  Opcode = NVPTX::LDV_i32_v2_areg_64;
1146  break;
1147  case MVT::i64:
1148  Opcode = NVPTX::LDV_i64_v2_areg_64;
1149  break;
1150  case MVT::f32:
1151  Opcode = NVPTX::LDV_f32_v2_areg_64;
1152  break;
1153  case MVT::f64:
1154  Opcode = NVPTX::LDV_f64_v2_areg_64;
1155  break;
1156  }
1157  break;
1158  case NVPTXISD::LoadV4:
1159  switch (EltVT.getSimpleVT().SimpleTy) {
1160  default:
1161  return nullptr;
1162  case MVT::i8:
1163  Opcode = NVPTX::LDV_i8_v4_areg_64;
1164  break;
1165  case MVT::i16:
1166  Opcode = NVPTX::LDV_i16_v4_areg_64;
1167  break;
1168  case MVT::i32:
1169  Opcode = NVPTX::LDV_i32_v4_areg_64;
1170  break;
1171  case MVT::f32:
1172  Opcode = NVPTX::LDV_f32_v4_areg_64;
1173  break;
1174  }
1175  break;
1176  }
1177  } else {
1178  switch (N->getOpcode()) {
1179  default:
1180  return nullptr;
1181  case NVPTXISD::LoadV2:
1182  switch (EltVT.getSimpleVT().SimpleTy) {
1183  default:
1184  return nullptr;
1185  case MVT::i8:
1186  Opcode = NVPTX::LDV_i8_v2_areg;
1187  break;
1188  case MVT::i16:
1189  Opcode = NVPTX::LDV_i16_v2_areg;
1190  break;
1191  case MVT::i32:
1192  Opcode = NVPTX::LDV_i32_v2_areg;
1193  break;
1194  case MVT::i64:
1195  Opcode = NVPTX::LDV_i64_v2_areg;
1196  break;
1197  case MVT::f32:
1198  Opcode = NVPTX::LDV_f32_v2_areg;
1199  break;
1200  case MVT::f64:
1201  Opcode = NVPTX::LDV_f64_v2_areg;
1202  break;
1203  }
1204  break;
1205  case NVPTXISD::LoadV4:
1206  switch (EltVT.getSimpleVT().SimpleTy) {
1207  default:
1208  return nullptr;
1209  case MVT::i8:
1210  Opcode = NVPTX::LDV_i8_v4_areg;
1211  break;
1212  case MVT::i16:
1213  Opcode = NVPTX::LDV_i16_v4_areg;
1214  break;
1215  case MVT::i32:
1216  Opcode = NVPTX::LDV_i32_v4_areg;
1217  break;
1218  case MVT::f32:
1219  Opcode = NVPTX::LDV_f32_v4_areg;
1220  break;
1221  }
1222  break;
1223  }
1224  }
1225 
1226  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1227  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1228  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1229  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1230  }
1231 
1233  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1234  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1235 
1236  return LD;
1237 }
1238 
1239 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1240 
1241  SDValue Chain = N->getOperand(0);
1242  SDValue Op1;
1243  MemSDNode *Mem;
1244  bool IsLDG = true;
1245 
1246  // If this is an LDG intrinsic, the address is the third operand. Its its an
1247  // LDG/LDU SD node (from custom vector handling), then its the second operand
1248  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1249  Op1 = N->getOperand(2);
1250  Mem = cast<MemIntrinsicSDNode>(N);
1251  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1252  switch (IID) {
1253  default:
1254  return NULL;
1255  case Intrinsic::nvvm_ldg_global_f:
1256  case Intrinsic::nvvm_ldg_global_i:
1257  case Intrinsic::nvvm_ldg_global_p:
1258  IsLDG = true;
1259  break;
1260  case Intrinsic::nvvm_ldu_global_f:
1261  case Intrinsic::nvvm_ldu_global_i:
1262  case Intrinsic::nvvm_ldu_global_p:
1263  IsLDG = false;
1264  break;
1265  }
1266  } else {
1267  Op1 = N->getOperand(1);
1268  Mem = cast<MemSDNode>(N);
1269  }
1270 
1271  unsigned Opcode;
1272  SDLoc DL(N);
1273  SDNode *LD;
1274  SDValue Base, Offset, Addr;
1275 
1276  EVT EltVT = Mem->getMemoryVT();
1277  if (EltVT.isVector()) {
1278  EltVT = EltVT.getVectorElementType();
1279  }
1280 
1281  if (SelectDirectAddr(Op1, Addr)) {
1282  switch (N->getOpcode()) {
1283  default:
1284  return nullptr;
1286  if (IsLDG) {
1287  switch (EltVT.getSimpleVT().SimpleTy) {
1288  default:
1289  return nullptr;
1290  case MVT::i8:
1291  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1292  break;
1293  case MVT::i16:
1294  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1295  break;
1296  case MVT::i32:
1297  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1298  break;
1299  case MVT::i64:
1300  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1301  break;
1302  case MVT::f32:
1303  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1304  break;
1305  case MVT::f64:
1306  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1307  break;
1308  }
1309  } else {
1310  switch (EltVT.getSimpleVT().SimpleTy) {
1311  default:
1312  return nullptr;
1313  case MVT::i8:
1314  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1315  break;
1316  case MVT::i16:
1317  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1318  break;
1319  case MVT::i32:
1320  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1321  break;
1322  case MVT::i64:
1323  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1324  break;
1325  case MVT::f32:
1326  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1327  break;
1328  case MVT::f64:
1329  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1330  break;
1331  }
1332  }
1333  break;
1334  case NVPTXISD::LDGV2:
1335  switch (EltVT.getSimpleVT().SimpleTy) {
1336  default:
1337  return nullptr;
1338  case MVT::i8:
1339  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1340  break;
1341  case MVT::i16:
1342  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1343  break;
1344  case MVT::i32:
1345  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1346  break;
1347  case MVT::i64:
1348  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1349  break;
1350  case MVT::f32:
1351  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1352  break;
1353  case MVT::f64:
1354  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1355  break;
1356  }
1357  break;
1358  case NVPTXISD::LDUV2:
1359  switch (EltVT.getSimpleVT().SimpleTy) {
1360  default:
1361  return nullptr;
1362  case MVT::i8:
1363  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1364  break;
1365  case MVT::i16:
1366  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1367  break;
1368  case MVT::i32:
1369  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1370  break;
1371  case MVT::i64:
1372  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1373  break;
1374  case MVT::f32:
1375  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1376  break;
1377  case MVT::f64:
1378  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1379  break;
1380  }
1381  break;
1382  case NVPTXISD::LDGV4:
1383  switch (EltVT.getSimpleVT().SimpleTy) {
1384  default:
1385  return nullptr;
1386  case MVT::i8:
1387  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1388  break;
1389  case MVT::i16:
1390  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1391  break;
1392  case MVT::i32:
1393  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1394  break;
1395  case MVT::f32:
1396  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1397  break;
1398  }
1399  break;
1400  case NVPTXISD::LDUV4:
1401  switch (EltVT.getSimpleVT().SimpleTy) {
1402  default:
1403  return nullptr;
1404  case MVT::i8:
1405  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1406  break;
1407  case MVT::i16:
1408  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1409  break;
1410  case MVT::i32:
1411  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1412  break;
1413  case MVT::f32:
1414  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1415  break;
1416  }
1417  break;
1418  }
1419 
1420  SDValue Ops[] = { Addr, Chain };
1421  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1422  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1423  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1424  if (TM.is64Bit()) {
1425  switch (N->getOpcode()) {
1426  default:
1427  return nullptr;
1429  if (IsLDG) {
1430  switch (EltVT.getSimpleVT().SimpleTy) {
1431  default:
1432  return nullptr;
1433  case MVT::i8:
1434  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1435  break;
1436  case MVT::i16:
1437  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1438  break;
1439  case MVT::i32:
1440  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1441  break;
1442  case MVT::i64:
1443  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1444  break;
1445  case MVT::f32:
1446  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1447  break;
1448  case MVT::f64:
1449  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1450  break;
1451  }
1452  } else {
1453  switch (EltVT.getSimpleVT().SimpleTy) {
1454  default:
1455  return nullptr;
1456  case MVT::i8:
1457  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1458  break;
1459  case MVT::i16:
1460  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1461  break;
1462  case MVT::i32:
1463  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1464  break;
1465  case MVT::i64:
1466  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1467  break;
1468  case MVT::f32:
1469  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1470  break;
1471  case MVT::f64:
1472  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1473  break;
1474  }
1475  }
1476  break;
1477  case NVPTXISD::LDGV2:
1478  switch (EltVT.getSimpleVT().SimpleTy) {
1479  default:
1480  return nullptr;
1481  case MVT::i8:
1482  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1483  break;
1484  case MVT::i16:
1485  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1486  break;
1487  case MVT::i32:
1488  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1489  break;
1490  case MVT::i64:
1491  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1492  break;
1493  case MVT::f32:
1494  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1495  break;
1496  case MVT::f64:
1497  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1498  break;
1499  }
1500  break;
1501  case NVPTXISD::LDUV2:
1502  switch (EltVT.getSimpleVT().SimpleTy) {
1503  default:
1504  return nullptr;
1505  case MVT::i8:
1506  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1507  break;
1508  case MVT::i16:
1509  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1510  break;
1511  case MVT::i32:
1512  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1513  break;
1514  case MVT::i64:
1515  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1516  break;
1517  case MVT::f32:
1518  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1519  break;
1520  case MVT::f64:
1521  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1522  break;
1523  }
1524  break;
1525  case NVPTXISD::LDGV4:
1526  switch (EltVT.getSimpleVT().SimpleTy) {
1527  default:
1528  return nullptr;
1529  case MVT::i8:
1530  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1531  break;
1532  case MVT::i16:
1533  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1534  break;
1535  case MVT::i32:
1536  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1537  break;
1538  case MVT::f32:
1539  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1540  break;
1541  }
1542  break;
1543  case NVPTXISD::LDUV4:
1544  switch (EltVT.getSimpleVT().SimpleTy) {
1545  default:
1546  return nullptr;
1547  case MVT::i8:
1548  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1549  break;
1550  case MVT::i16:
1551  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1552  break;
1553  case MVT::i32:
1554  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1555  break;
1556  case MVT::f32:
1557  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1558  break;
1559  }
1560  break;
1561  }
1562  } else {
1563  switch (N->getOpcode()) {
1564  default:
1565  return nullptr;
1567  if (IsLDG) {
1568  switch (EltVT.getSimpleVT().SimpleTy) {
1569  default:
1570  return nullptr;
1571  case MVT::i8:
1572  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1573  break;
1574  case MVT::i16:
1575  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1576  break;
1577  case MVT::i32:
1578  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1579  break;
1580  case MVT::i64:
1581  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1582  break;
1583  case MVT::f32:
1584  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1585  break;
1586  case MVT::f64:
1587  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1588  break;
1589  }
1590  } else {
1591  switch (EltVT.getSimpleVT().SimpleTy) {
1592  default:
1593  return nullptr;
1594  case MVT::i8:
1595  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1596  break;
1597  case MVT::i16:
1598  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1599  break;
1600  case MVT::i32:
1601  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1602  break;
1603  case MVT::i64:
1604  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1605  break;
1606  case MVT::f32:
1607  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1608  break;
1609  case MVT::f64:
1610  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1611  break;
1612  }
1613  }
1614  break;
1615  case NVPTXISD::LDGV2:
1616  switch (EltVT.getSimpleVT().SimpleTy) {
1617  default:
1618  return nullptr;
1619  case MVT::i8:
1620  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1621  break;
1622  case MVT::i16:
1623  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1624  break;
1625  case MVT::i32:
1626  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1627  break;
1628  case MVT::i64:
1629  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1630  break;
1631  case MVT::f32:
1632  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1633  break;
1634  case MVT::f64:
1635  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1636  break;
1637  }
1638  break;
1639  case NVPTXISD::LDUV2:
1640  switch (EltVT.getSimpleVT().SimpleTy) {
1641  default:
1642  return nullptr;
1643  case MVT::i8:
1644  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1645  break;
1646  case MVT::i16:
1647  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1648  break;
1649  case MVT::i32:
1650  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1651  break;
1652  case MVT::i64:
1653  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1654  break;
1655  case MVT::f32:
1656  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1657  break;
1658  case MVT::f64:
1659  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1660  break;
1661  }
1662  break;
1663  case NVPTXISD::LDGV4:
1664  switch (EltVT.getSimpleVT().SimpleTy) {
1665  default:
1666  return nullptr;
1667  case MVT::i8:
1668  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1669  break;
1670  case MVT::i16:
1671  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1672  break;
1673  case MVT::i32:
1674  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1675  break;
1676  case MVT::f32:
1677  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1678  break;
1679  }
1680  break;
1681  case NVPTXISD::LDUV4:
1682  switch (EltVT.getSimpleVT().SimpleTy) {
1683  default:
1684  return nullptr;
1685  case MVT::i8:
1686  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1687  break;
1688  case MVT::i16:
1689  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1690  break;
1691  case MVT::i32:
1692  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1693  break;
1694  case MVT::f32:
1695  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1696  break;
1697  }
1698  break;
1699  }
1700  }
1701 
1702  SDValue Ops[] = { Base, Offset, Chain };
1703 
1704  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1705  } else {
1706  if (TM.is64Bit()) {
1707  switch (N->getOpcode()) {
1708  default:
1709  return nullptr;
1711  if (IsLDG) {
1712  switch (EltVT.getSimpleVT().SimpleTy) {
1713  default:
1714  return nullptr;
1715  case MVT::i8:
1716  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1717  break;
1718  case MVT::i16:
1719  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1720  break;
1721  case MVT::i32:
1722  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1723  break;
1724  case MVT::i64:
1725  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1726  break;
1727  case MVT::f32:
1728  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1729  break;
1730  case MVT::f64:
1731  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1732  break;
1733  }
1734  } else {
1735  switch (EltVT.getSimpleVT().SimpleTy) {
1736  default:
1737  return nullptr;
1738  case MVT::i8:
1739  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1740  break;
1741  case MVT::i16:
1742  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1743  break;
1744  case MVT::i32:
1745  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1746  break;
1747  case MVT::i64:
1748  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1749  break;
1750  case MVT::f32:
1751  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1752  break;
1753  case MVT::f64:
1754  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1755  break;
1756  }
1757  }
1758  break;
1759  case NVPTXISD::LDGV2:
1760  switch (EltVT.getSimpleVT().SimpleTy) {
1761  default:
1762  return nullptr;
1763  case MVT::i8:
1764  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1765  break;
1766  case MVT::i16:
1767  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1768  break;
1769  case MVT::i32:
1770  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1771  break;
1772  case MVT::i64:
1773  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1774  break;
1775  case MVT::f32:
1776  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1777  break;
1778  case MVT::f64:
1779  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1780  break;
1781  }
1782  break;
1783  case NVPTXISD::LDUV2:
1784  switch (EltVT.getSimpleVT().SimpleTy) {
1785  default:
1786  return nullptr;
1787  case MVT::i8:
1788  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1789  break;
1790  case MVT::i16:
1791  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1792  break;
1793  case MVT::i32:
1794  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1795  break;
1796  case MVT::i64:
1797  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1798  break;
1799  case MVT::f32:
1800  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1801  break;
1802  case MVT::f64:
1803  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1804  break;
1805  }
1806  break;
1807  case NVPTXISD::LDGV4:
1808  switch (EltVT.getSimpleVT().SimpleTy) {
1809  default:
1810  return nullptr;
1811  case MVT::i8:
1812  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1813  break;
1814  case MVT::i16:
1815  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1816  break;
1817  case MVT::i32:
1818  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1819  break;
1820  case MVT::f32:
1821  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1822  break;
1823  }
1824  break;
1825  case NVPTXISD::LDUV4:
1826  switch (EltVT.getSimpleVT().SimpleTy) {
1827  default:
1828  return nullptr;
1829  case MVT::i8:
1830  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1831  break;
1832  case MVT::i16:
1833  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1834  break;
1835  case MVT::i32:
1836  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1837  break;
1838  case MVT::f32:
1839  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1840  break;
1841  }
1842  break;
1843  }
1844  } else {
1845  switch (N->getOpcode()) {
1846  default:
1847  return nullptr;
1849  if (IsLDG) {
1850  switch (EltVT.getSimpleVT().SimpleTy) {
1851  default:
1852  return nullptr;
1853  case MVT::i8:
1854  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1855  break;
1856  case MVT::i16:
1857  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1858  break;
1859  case MVT::i32:
1860  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1861  break;
1862  case MVT::i64:
1863  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1864  break;
1865  case MVT::f32:
1866  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1867  break;
1868  case MVT::f64:
1869  Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1870  break;
1871  }
1872  } else {
1873  switch (EltVT.getSimpleVT().SimpleTy) {
1874  default:
1875  return nullptr;
1876  case MVT::i8:
1877  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1878  break;
1879  case MVT::i16:
1880  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1881  break;
1882  case MVT::i32:
1883  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1884  break;
1885  case MVT::i64:
1886  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1887  break;
1888  case MVT::f32:
1889  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1890  break;
1891  case MVT::f64:
1892  Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1893  break;
1894  }
1895  }
1896  break;
1897  case NVPTXISD::LDGV2:
1898  switch (EltVT.getSimpleVT().SimpleTy) {
1899  default:
1900  return nullptr;
1901  case MVT::i8:
1902  Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1903  break;
1904  case MVT::i16:
1905  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1906  break;
1907  case MVT::i32:
1908  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1909  break;
1910  case MVT::i64:
1911  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1912  break;
1913  case MVT::f32:
1914  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1915  break;
1916  case MVT::f64:
1917  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1918  break;
1919  }
1920  break;
1921  case NVPTXISD::LDUV2:
1922  switch (EltVT.getSimpleVT().SimpleTy) {
1923  default:
1924  return nullptr;
1925  case MVT::i8:
1926  Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1927  break;
1928  case MVT::i16:
1929  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1930  break;
1931  case MVT::i32:
1932  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1933  break;
1934  case MVT::i64:
1935  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1936  break;
1937  case MVT::f32:
1938  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1939  break;
1940  case MVT::f64:
1941  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1942  break;
1943  }
1944  break;
1945  case NVPTXISD::LDGV4:
1946  switch (EltVT.getSimpleVT().SimpleTy) {
1947  default:
1948  return nullptr;
1949  case MVT::i8:
1950  Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1951  break;
1952  case MVT::i16:
1953  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1954  break;
1955  case MVT::i32:
1956  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1957  break;
1958  case MVT::f32:
1959  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1960  break;
1961  }
1962  break;
1963  case NVPTXISD::LDUV4:
1964  switch (EltVT.getSimpleVT().SimpleTy) {
1965  default:
1966  return nullptr;
1967  case MVT::i8:
1968  Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1969  break;
1970  case MVT::i16:
1971  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1972  break;
1973  case MVT::i32:
1974  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1975  break;
1976  case MVT::f32:
1977  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1978  break;
1979  }
1980  break;
1981  }
1982  }
1983 
1984  SDValue Ops[] = { Op1, Chain };
1985  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1986  }
1987 
1989  MemRefs0[0] = Mem->getMemOperand();
1990  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1991 
1992  return LD;
1993 }
1994 
1995 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1996  SDLoc dl(N);
1997  StoreSDNode *ST = cast<StoreSDNode>(N);
1998  EVT StoreVT = ST->getMemoryVT();
1999  SDNode *NVPTXST = nullptr;
2000 
2001  // do not support pre/post inc/dec
2002  if (ST->isIndexed())
2003  return nullptr;
2004 
2005  if (!StoreVT.isSimple())
2006  return nullptr;
2007 
2008  // Address Space Setting
2009  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2010 
2011  // Volatile Setting
2012  // - .volatile is only availalble for .global and .shared
2013  bool isVolatile = ST->isVolatile();
2014  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2015  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2016  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2017  isVolatile = false;
2018 
2019  // Vector Setting
2020  MVT SimpleVT = StoreVT.getSimpleVT();
2021  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2022  if (SimpleVT.isVector()) {
2023  unsigned num = SimpleVT.getVectorNumElements();
2024  if (num == 2)
2025  vecType = NVPTX::PTXLdStInstCode::V2;
2026  else if (num == 4)
2027  vecType = NVPTX::PTXLdStInstCode::V4;
2028  else
2029  return nullptr;
2030  }
2031 
2032  // Type Setting: toType + toTypeWidth
2033  // - for integer type, always use 'u'
2034  //
2035  MVT ScalarVT = SimpleVT.getScalarType();
2036  unsigned toTypeWidth = ScalarVT.getSizeInBits();
2037  unsigned int toType;
2038  if (ScalarVT.isFloatingPoint())
2040  else
2042 
2043  // Create the machine instruction DAG
2044  SDValue Chain = N->getOperand(0);
2045  SDValue N1 = N->getOperand(1);
2046  SDValue N2 = N->getOperand(2);
2047  SDValue Addr;
2048  SDValue Offset, Base;
2049  unsigned Opcode;
2051 
2052  if (SelectDirectAddr(N2, Addr)) {
2053  switch (SourceVT) {
2054  case MVT::i8:
2055  Opcode = NVPTX::ST_i8_avar;
2056  break;
2057  case MVT::i16:
2058  Opcode = NVPTX::ST_i16_avar;
2059  break;
2060  case MVT::i32:
2061  Opcode = NVPTX::ST_i32_avar;
2062  break;
2063  case MVT::i64:
2064  Opcode = NVPTX::ST_i64_avar;
2065  break;
2066  case MVT::f32:
2067  Opcode = NVPTX::ST_f32_avar;
2068  break;
2069  case MVT::f64:
2070  Opcode = NVPTX::ST_f64_avar;
2071  break;
2072  default:
2073  return nullptr;
2074  }
2075  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2076  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2077  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2078  Chain };
2079  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2080  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2081  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2082  switch (SourceVT) {
2083  case MVT::i8:
2084  Opcode = NVPTX::ST_i8_asi;
2085  break;
2086  case MVT::i16:
2087  Opcode = NVPTX::ST_i16_asi;
2088  break;
2089  case MVT::i32:
2090  Opcode = NVPTX::ST_i32_asi;
2091  break;
2092  case MVT::i64:
2093  Opcode = NVPTX::ST_i64_asi;
2094  break;
2095  case MVT::f32:
2096  Opcode = NVPTX::ST_f32_asi;
2097  break;
2098  case MVT::f64:
2099  Opcode = NVPTX::ST_f64_asi;
2100  break;
2101  default:
2102  return nullptr;
2103  }
2104  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2105  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2106  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2107  Offset, Chain };
2108  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2109  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2110  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2111  if (TM.is64Bit()) {
2112  switch (SourceVT) {
2113  case MVT::i8:
2114  Opcode = NVPTX::ST_i8_ari_64;
2115  break;
2116  case MVT::i16:
2117  Opcode = NVPTX::ST_i16_ari_64;
2118  break;
2119  case MVT::i32:
2120  Opcode = NVPTX::ST_i32_ari_64;
2121  break;
2122  case MVT::i64:
2123  Opcode = NVPTX::ST_i64_ari_64;
2124  break;
2125  case MVT::f32:
2126  Opcode = NVPTX::ST_f32_ari_64;
2127  break;
2128  case MVT::f64:
2129  Opcode = NVPTX::ST_f64_ari_64;
2130  break;
2131  default:
2132  return nullptr;
2133  }
2134  } else {
2135  switch (SourceVT) {
2136  case MVT::i8:
2137  Opcode = NVPTX::ST_i8_ari;
2138  break;
2139  case MVT::i16:
2140  Opcode = NVPTX::ST_i16_ari;
2141  break;
2142  case MVT::i32:
2143  Opcode = NVPTX::ST_i32_ari;
2144  break;
2145  case MVT::i64:
2146  Opcode = NVPTX::ST_i64_ari;
2147  break;
2148  case MVT::f32:
2149  Opcode = NVPTX::ST_f32_ari;
2150  break;
2151  case MVT::f64:
2152  Opcode = NVPTX::ST_f64_ari;
2153  break;
2154  default:
2155  return nullptr;
2156  }
2157  }
2158  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2159  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2160  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2161  Offset, Chain };
2162  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2163  } else {
2164  if (TM.is64Bit()) {
2165  switch (SourceVT) {
2166  case MVT::i8:
2167  Opcode = NVPTX::ST_i8_areg_64;
2168  break;
2169  case MVT::i16:
2170  Opcode = NVPTX::ST_i16_areg_64;
2171  break;
2172  case MVT::i32:
2173  Opcode = NVPTX::ST_i32_areg_64;
2174  break;
2175  case MVT::i64:
2176  Opcode = NVPTX::ST_i64_areg_64;
2177  break;
2178  case MVT::f32:
2179  Opcode = NVPTX::ST_f32_areg_64;
2180  break;
2181  case MVT::f64:
2182  Opcode = NVPTX::ST_f64_areg_64;
2183  break;
2184  default:
2185  return nullptr;
2186  }
2187  } else {
2188  switch (SourceVT) {
2189  case MVT::i8:
2190  Opcode = NVPTX::ST_i8_areg;
2191  break;
2192  case MVT::i16:
2193  Opcode = NVPTX::ST_i16_areg;
2194  break;
2195  case MVT::i32:
2196  Opcode = NVPTX::ST_i32_areg;
2197  break;
2198  case MVT::i64:
2199  Opcode = NVPTX::ST_i64_areg;
2200  break;
2201  case MVT::f32:
2202  Opcode = NVPTX::ST_f32_areg;
2203  break;
2204  case MVT::f64:
2205  Opcode = NVPTX::ST_f64_areg;
2206  break;
2207  default:
2208  return nullptr;
2209  }
2210  }
2211  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2212  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2213  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2214  Chain };
2215  NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2216  }
2217 
2218  if (NVPTXST) {
2220  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2221  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2222  }
2223 
2224  return NVPTXST;
2225 }
2226 
2227 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2228  SDValue Chain = N->getOperand(0);
2229  SDValue Op1 = N->getOperand(1);
2230  SDValue Addr, Offset, Base;
2231  unsigned Opcode;
2232  SDLoc DL(N);
2233  SDNode *ST;
2234  EVT EltVT = Op1.getValueType();
2235  MemSDNode *MemSD = cast<MemSDNode>(N);
2236  EVT StoreVT = MemSD->getMemoryVT();
2237 
2238  // Address Space Setting
2239  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2240 
2241  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2242  report_fatal_error("Cannot store to pointer that points to constant "
2243  "memory space");
2244  }
2245 
2246  // Volatile Setting
2247  // - .volatile is only availalble for .global and .shared
2248  bool IsVolatile = MemSD->isVolatile();
2249  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2250  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2251  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2252  IsVolatile = false;
2253 
2254  // Type Setting: toType + toTypeWidth
2255  // - for integer type, always use 'u'
2256  assert(StoreVT.isSimple() && "Store value is not simple");
2257  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2258  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2259  unsigned ToType;
2260  if (ScalarVT.isFloatingPoint())
2262  else
2264 
2266  SDValue N2;
2267  unsigned VecType;
2268 
2269  switch (N->getOpcode()) {
2270  case NVPTXISD::StoreV2:
2271  VecType = NVPTX::PTXLdStInstCode::V2;
2272  StOps.push_back(N->getOperand(1));
2273  StOps.push_back(N->getOperand(2));
2274  N2 = N->getOperand(3);
2275  break;
2276  case NVPTXISD::StoreV4:
2277  VecType = NVPTX::PTXLdStInstCode::V4;
2278  StOps.push_back(N->getOperand(1));
2279  StOps.push_back(N->getOperand(2));
2280  StOps.push_back(N->getOperand(3));
2281  StOps.push_back(N->getOperand(4));
2282  N2 = N->getOperand(5);
2283  break;
2284  default:
2285  return nullptr;
2286  }
2287 
2288  StOps.push_back(getI32Imm(IsVolatile, DL));
2289  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2290  StOps.push_back(getI32Imm(VecType, DL));
2291  StOps.push_back(getI32Imm(ToType, DL));
2292  StOps.push_back(getI32Imm(ToTypeWidth, DL));
2293 
2294  if (SelectDirectAddr(N2, Addr)) {
2295  switch (N->getOpcode()) {
2296  default:
2297  return nullptr;
2298  case NVPTXISD::StoreV2:
2299  switch (EltVT.getSimpleVT().SimpleTy) {
2300  default:
2301  return nullptr;
2302  case MVT::i8:
2303  Opcode = NVPTX::STV_i8_v2_avar;
2304  break;
2305  case MVT::i16:
2306  Opcode = NVPTX::STV_i16_v2_avar;
2307  break;
2308  case MVT::i32:
2309  Opcode = NVPTX::STV_i32_v2_avar;
2310  break;
2311  case MVT::i64:
2312  Opcode = NVPTX::STV_i64_v2_avar;
2313  break;
2314  case MVT::f32:
2315  Opcode = NVPTX::STV_f32_v2_avar;
2316  break;
2317  case MVT::f64:
2318  Opcode = NVPTX::STV_f64_v2_avar;
2319  break;
2320  }
2321  break;
2322  case NVPTXISD::StoreV4:
2323  switch (EltVT.getSimpleVT().SimpleTy) {
2324  default:
2325  return nullptr;
2326  case MVT::i8:
2327  Opcode = NVPTX::STV_i8_v4_avar;
2328  break;
2329  case MVT::i16:
2330  Opcode = NVPTX::STV_i16_v4_avar;
2331  break;
2332  case MVT::i32:
2333  Opcode = NVPTX::STV_i32_v4_avar;
2334  break;
2335  case MVT::f32:
2336  Opcode = NVPTX::STV_f32_v4_avar;
2337  break;
2338  }
2339  break;
2340  }
2341  StOps.push_back(Addr);
2342  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2343  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2344  switch (N->getOpcode()) {
2345  default:
2346  return nullptr;
2347  case NVPTXISD::StoreV2:
2348  switch (EltVT.getSimpleVT().SimpleTy) {
2349  default:
2350  return nullptr;
2351  case MVT::i8:
2352  Opcode = NVPTX::STV_i8_v2_asi;
2353  break;
2354  case MVT::i16:
2355  Opcode = NVPTX::STV_i16_v2_asi;
2356  break;
2357  case MVT::i32:
2358  Opcode = NVPTX::STV_i32_v2_asi;
2359  break;
2360  case MVT::i64:
2361  Opcode = NVPTX::STV_i64_v2_asi;
2362  break;
2363  case MVT::f32:
2364  Opcode = NVPTX::STV_f32_v2_asi;
2365  break;
2366  case MVT::f64:
2367  Opcode = NVPTX::STV_f64_v2_asi;
2368  break;
2369  }
2370  break;
2371  case NVPTXISD::StoreV4:
2372  switch (EltVT.getSimpleVT().SimpleTy) {
2373  default:
2374  return nullptr;
2375  case MVT::i8:
2376  Opcode = NVPTX::STV_i8_v4_asi;
2377  break;
2378  case MVT::i16:
2379  Opcode = NVPTX::STV_i16_v4_asi;
2380  break;
2381  case MVT::i32:
2382  Opcode = NVPTX::STV_i32_v4_asi;
2383  break;
2384  case MVT::f32:
2385  Opcode = NVPTX::STV_f32_v4_asi;
2386  break;
2387  }
2388  break;
2389  }
2390  StOps.push_back(Base);
2391  StOps.push_back(Offset);
2392  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2393  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2394  if (TM.is64Bit()) {
2395  switch (N->getOpcode()) {
2396  default:
2397  return nullptr;
2398  case NVPTXISD::StoreV2:
2399  switch (EltVT.getSimpleVT().SimpleTy) {
2400  default:
2401  return nullptr;
2402  case MVT::i8:
2403  Opcode = NVPTX::STV_i8_v2_ari_64;
2404  break;
2405  case MVT::i16:
2406  Opcode = NVPTX::STV_i16_v2_ari_64;
2407  break;
2408  case MVT::i32:
2409  Opcode = NVPTX::STV_i32_v2_ari_64;
2410  break;
2411  case MVT::i64:
2412  Opcode = NVPTX::STV_i64_v2_ari_64;
2413  break;
2414  case MVT::f32:
2415  Opcode = NVPTX::STV_f32_v2_ari_64;
2416  break;
2417  case MVT::f64:
2418  Opcode = NVPTX::STV_f64_v2_ari_64;
2419  break;
2420  }
2421  break;
2422  case NVPTXISD::StoreV4:
2423  switch (EltVT.getSimpleVT().SimpleTy) {
2424  default:
2425  return nullptr;
2426  case MVT::i8:
2427  Opcode = NVPTX::STV_i8_v4_ari_64;
2428  break;
2429  case MVT::i16:
2430  Opcode = NVPTX::STV_i16_v4_ari_64;
2431  break;
2432  case MVT::i32:
2433  Opcode = NVPTX::STV_i32_v4_ari_64;
2434  break;
2435  case MVT::f32:
2436  Opcode = NVPTX::STV_f32_v4_ari_64;
2437  break;
2438  }
2439  break;
2440  }
2441  } else {
2442  switch (N->getOpcode()) {
2443  default:
2444  return nullptr;
2445  case NVPTXISD::StoreV2:
2446  switch (EltVT.getSimpleVT().SimpleTy) {
2447  default:
2448  return nullptr;
2449  case MVT::i8:
2450  Opcode = NVPTX::STV_i8_v2_ari;
2451  break;
2452  case MVT::i16:
2453  Opcode = NVPTX::STV_i16_v2_ari;
2454  break;
2455  case MVT::i32:
2456  Opcode = NVPTX::STV_i32_v2_ari;
2457  break;
2458  case MVT::i64:
2459  Opcode = NVPTX::STV_i64_v2_ari;
2460  break;
2461  case MVT::f32:
2462  Opcode = NVPTX::STV_f32_v2_ari;
2463  break;
2464  case MVT::f64:
2465  Opcode = NVPTX::STV_f64_v2_ari;
2466  break;
2467  }
2468  break;
2469  case NVPTXISD::StoreV4:
2470  switch (EltVT.getSimpleVT().SimpleTy) {
2471  default:
2472  return nullptr;
2473  case MVT::i8:
2474  Opcode = NVPTX::STV_i8_v4_ari;
2475  break;
2476  case MVT::i16:
2477  Opcode = NVPTX::STV_i16_v4_ari;
2478  break;
2479  case MVT::i32:
2480  Opcode = NVPTX::STV_i32_v4_ari;
2481  break;
2482  case MVT::f32:
2483  Opcode = NVPTX::STV_f32_v4_ari;
2484  break;
2485  }
2486  break;
2487  }
2488  }
2489  StOps.push_back(Base);
2490  StOps.push_back(Offset);
2491  } else {
2492  if (TM.is64Bit()) {
2493  switch (N->getOpcode()) {
2494  default:
2495  return nullptr;
2496  case NVPTXISD::StoreV2:
2497  switch (EltVT.getSimpleVT().SimpleTy) {
2498  default:
2499  return nullptr;
2500  case MVT::i8:
2501  Opcode = NVPTX::STV_i8_v2_areg_64;
2502  break;
2503  case MVT::i16:
2504  Opcode = NVPTX::STV_i16_v2_areg_64;
2505  break;
2506  case MVT::i32:
2507  Opcode = NVPTX::STV_i32_v2_areg_64;
2508  break;
2509  case MVT::i64:
2510  Opcode = NVPTX::STV_i64_v2_areg_64;
2511  break;
2512  case MVT::f32:
2513  Opcode = NVPTX::STV_f32_v2_areg_64;
2514  break;
2515  case MVT::f64:
2516  Opcode = NVPTX::STV_f64_v2_areg_64;
2517  break;
2518  }
2519  break;
2520  case NVPTXISD::StoreV4:
2521  switch (EltVT.getSimpleVT().SimpleTy) {
2522  default:
2523  return nullptr;
2524  case MVT::i8:
2525  Opcode = NVPTX::STV_i8_v4_areg_64;
2526  break;
2527  case MVT::i16:
2528  Opcode = NVPTX::STV_i16_v4_areg_64;
2529  break;
2530  case MVT::i32:
2531  Opcode = NVPTX::STV_i32_v4_areg_64;
2532  break;
2533  case MVT::f32:
2534  Opcode = NVPTX::STV_f32_v4_areg_64;
2535  break;
2536  }
2537  break;
2538  }
2539  } else {
2540  switch (N->getOpcode()) {
2541  default:
2542  return nullptr;
2543  case NVPTXISD::StoreV2:
2544  switch (EltVT.getSimpleVT().SimpleTy) {
2545  default:
2546  return nullptr;
2547  case MVT::i8:
2548  Opcode = NVPTX::STV_i8_v2_areg;
2549  break;
2550  case MVT::i16:
2551  Opcode = NVPTX::STV_i16_v2_areg;
2552  break;
2553  case MVT::i32:
2554  Opcode = NVPTX::STV_i32_v2_areg;
2555  break;
2556  case MVT::i64:
2557  Opcode = NVPTX::STV_i64_v2_areg;
2558  break;
2559  case MVT::f32:
2560  Opcode = NVPTX::STV_f32_v2_areg;
2561  break;
2562  case MVT::f64:
2563  Opcode = NVPTX::STV_f64_v2_areg;
2564  break;
2565  }
2566  break;
2567  case NVPTXISD::StoreV4:
2568  switch (EltVT.getSimpleVT().SimpleTy) {
2569  default:
2570  return nullptr;
2571  case MVT::i8:
2572  Opcode = NVPTX::STV_i8_v4_areg;
2573  break;
2574  case MVT::i16:
2575  Opcode = NVPTX::STV_i16_v4_areg;
2576  break;
2577  case MVT::i32:
2578  Opcode = NVPTX::STV_i32_v4_areg;
2579  break;
2580  case MVT::f32:
2581  Opcode = NVPTX::STV_f32_v4_areg;
2582  break;
2583  }
2584  break;
2585  }
2586  }
2587  StOps.push_back(N2);
2588  }
2589 
2590  StOps.push_back(Chain);
2591 
2592  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2593 
2595  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2596  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2597 
2598  return ST;
2599 }
2600 
2601 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2602  SDValue Chain = Node->getOperand(0);
2603  SDValue Offset = Node->getOperand(2);
2604  SDValue Flag = Node->getOperand(3);
2605  SDLoc DL(Node);
2606  MemSDNode *Mem = cast<MemSDNode>(Node);
2607 
2608  unsigned VecSize;
2609  switch (Node->getOpcode()) {
2610  default:
2611  return nullptr;
2612  case NVPTXISD::LoadParam:
2613  VecSize = 1;
2614  break;
2615  case NVPTXISD::LoadParamV2:
2616  VecSize = 2;
2617  break;
2618  case NVPTXISD::LoadParamV4:
2619  VecSize = 4;
2620  break;
2621  }
2622 
2623  EVT EltVT = Node->getValueType(0);
2624  EVT MemVT = Mem->getMemoryVT();
2625 
2626  unsigned Opc = 0;
2627 
2628  switch (VecSize) {
2629  default:
2630  return nullptr;
2631  case 1:
2632  switch (MemVT.getSimpleVT().SimpleTy) {
2633  default:
2634  return nullptr;
2635  case MVT::i1:
2636  Opc = NVPTX::LoadParamMemI8;
2637  break;
2638  case MVT::i8:
2639  Opc = NVPTX::LoadParamMemI8;
2640  break;
2641  case MVT::i16:
2642  Opc = NVPTX::LoadParamMemI16;
2643  break;
2644  case MVT::i32:
2645  Opc = NVPTX::LoadParamMemI32;
2646  break;
2647  case MVT::i64:
2648  Opc = NVPTX::LoadParamMemI64;
2649  break;
2650  case MVT::f32:
2651  Opc = NVPTX::LoadParamMemF32;
2652  break;
2653  case MVT::f64:
2654  Opc = NVPTX::LoadParamMemF64;
2655  break;
2656  }
2657  break;
2658  case 2:
2659  switch (MemVT.getSimpleVT().SimpleTy) {
2660  default:
2661  return nullptr;
2662  case MVT::i1:
2663  Opc = NVPTX::LoadParamMemV2I8;
2664  break;
2665  case MVT::i8:
2666  Opc = NVPTX::LoadParamMemV2I8;
2667  break;
2668  case MVT::i16:
2669  Opc = NVPTX::LoadParamMemV2I16;
2670  break;
2671  case MVT::i32:
2672  Opc = NVPTX::LoadParamMemV2I32;
2673  break;
2674  case MVT::i64:
2675  Opc = NVPTX::LoadParamMemV2I64;
2676  break;
2677  case MVT::f32:
2678  Opc = NVPTX::LoadParamMemV2F32;
2679  break;
2680  case MVT::f64:
2681  Opc = NVPTX::LoadParamMemV2F64;
2682  break;
2683  }
2684  break;
2685  case 4:
2686  switch (MemVT.getSimpleVT().SimpleTy) {
2687  default:
2688  return nullptr;
2689  case MVT::i1:
2690  Opc = NVPTX::LoadParamMemV4I8;
2691  break;
2692  case MVT::i8:
2693  Opc = NVPTX::LoadParamMemV4I8;
2694  break;
2695  case MVT::i16:
2696  Opc = NVPTX::LoadParamMemV4I16;
2697  break;
2698  case MVT::i32:
2699  Opc = NVPTX::LoadParamMemV4I32;
2700  break;
2701  case MVT::f32:
2702  Opc = NVPTX::LoadParamMemV4F32;
2703  break;
2704  }
2705  break;
2706  }
2707 
2708  SDVTList VTs;
2709  if (VecSize == 1) {
2710  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2711  } else if (VecSize == 2) {
2712  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2713  } else {
2714  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2715  VTs = CurDAG->getVTList(EVTs);
2716  }
2717 
2718  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2719 
2721  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2722  Ops.push_back(Chain);
2723  Ops.push_back(Flag);
2724 
2725  return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2726 }
2727 
2728 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2729  SDLoc DL(N);
2730  SDValue Chain = N->getOperand(0);
2731  SDValue Offset = N->getOperand(1);
2732  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2733  MemSDNode *Mem = cast<MemSDNode>(N);
2734 
2735  // How many elements do we have?
2736  unsigned NumElts = 1;
2737  switch (N->getOpcode()) {
2738  default:
2739  return nullptr;
2740  case NVPTXISD::StoreRetval:
2741  NumElts = 1;
2742  break;
2744  NumElts = 2;
2745  break;
2747  NumElts = 4;
2748  break;
2749  }
2750 
2751  // Build vector of operands
2753  for (unsigned i = 0; i < NumElts; ++i)
2754  Ops.push_back(N->getOperand(i + 2));
2755  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2756  Ops.push_back(Chain);
2757 
2758  // Determine target opcode
2759  // If we have an i1, use an 8-bit store. The lowering code in
2760  // NVPTXISelLowering will have already emitted an upcast.
2761  unsigned Opcode = 0;
2762  switch (NumElts) {
2763  default:
2764  return nullptr;
2765  case 1:
2766  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2767  default:
2768  return nullptr;
2769  case MVT::i1:
2770  Opcode = NVPTX::StoreRetvalI8;
2771  break;
2772  case MVT::i8:
2773  Opcode = NVPTX::StoreRetvalI8;
2774  break;
2775  case MVT::i16:
2776  Opcode = NVPTX::StoreRetvalI16;
2777  break;
2778  case MVT::i32:
2779  Opcode = NVPTX::StoreRetvalI32;
2780  break;
2781  case MVT::i64:
2782  Opcode = NVPTX::StoreRetvalI64;
2783  break;
2784  case MVT::f32:
2785  Opcode = NVPTX::StoreRetvalF32;
2786  break;
2787  case MVT::f64:
2788  Opcode = NVPTX::StoreRetvalF64;
2789  break;
2790  }
2791  break;
2792  case 2:
2793  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2794  default:
2795  return nullptr;
2796  case MVT::i1:
2797  Opcode = NVPTX::StoreRetvalV2I8;
2798  break;
2799  case MVT::i8:
2800  Opcode = NVPTX::StoreRetvalV2I8;
2801  break;
2802  case MVT::i16:
2803  Opcode = NVPTX::StoreRetvalV2I16;
2804  break;
2805  case MVT::i32:
2806  Opcode = NVPTX::StoreRetvalV2I32;
2807  break;
2808  case MVT::i64:
2809  Opcode = NVPTX::StoreRetvalV2I64;
2810  break;
2811  case MVT::f32:
2812  Opcode = NVPTX::StoreRetvalV2F32;
2813  break;
2814  case MVT::f64:
2815  Opcode = NVPTX::StoreRetvalV2F64;
2816  break;
2817  }
2818  break;
2819  case 4:
2820  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2821  default:
2822  return nullptr;
2823  case MVT::i1:
2824  Opcode = NVPTX::StoreRetvalV4I8;
2825  break;
2826  case MVT::i8:
2827  Opcode = NVPTX::StoreRetvalV4I8;
2828  break;
2829  case MVT::i16:
2830  Opcode = NVPTX::StoreRetvalV4I16;
2831  break;
2832  case MVT::i32:
2833  Opcode = NVPTX::StoreRetvalV4I32;
2834  break;
2835  case MVT::f32:
2836  Opcode = NVPTX::StoreRetvalV4F32;
2837  break;
2838  }
2839  break;
2840  }
2841 
2842  SDNode *Ret =
2843  CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2845  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2846  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2847 
2848  return Ret;
2849 }
2850 
2851 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2852  SDLoc DL(N);
2853  SDValue Chain = N->getOperand(0);
2854  SDValue Param = N->getOperand(1);
2855  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2856  SDValue Offset = N->getOperand(2);
2857  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2858  MemSDNode *Mem = cast<MemSDNode>(N);
2859  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2860 
2861  // How many elements do we have?
2862  unsigned NumElts = 1;
2863  switch (N->getOpcode()) {
2864  default:
2865  return nullptr;
2868  case NVPTXISD::StoreParam:
2869  NumElts = 1;
2870  break;
2872  NumElts = 2;
2873  break;
2875  NumElts = 4;
2876  break;
2877  }
2878 
2879  // Build vector of operands
2881  for (unsigned i = 0; i < NumElts; ++i)
2882  Ops.push_back(N->getOperand(i + 3));
2883  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2884  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2885  Ops.push_back(Chain);
2886  Ops.push_back(Flag);
2887 
2888  // Determine target opcode
2889  // If we have an i1, use an 8-bit store. The lowering code in
2890  // NVPTXISelLowering will have already emitted an upcast.
2891  unsigned Opcode = 0;
2892  switch (N->getOpcode()) {
2893  default:
2894  switch (NumElts) {
2895  default:
2896  return nullptr;
2897  case 1:
2898  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2899  default:
2900  return nullptr;
2901  case MVT::i1:
2902  Opcode = NVPTX::StoreParamI8;
2903  break;
2904  case MVT::i8:
2905  Opcode = NVPTX::StoreParamI8;
2906  break;
2907  case MVT::i16:
2908  Opcode = NVPTX::StoreParamI16;
2909  break;
2910  case MVT::i32:
2911  Opcode = NVPTX::StoreParamI32;
2912  break;
2913  case MVT::i64:
2914  Opcode = NVPTX::StoreParamI64;
2915  break;
2916  case MVT::f32:
2917  Opcode = NVPTX::StoreParamF32;
2918  break;
2919  case MVT::f64:
2920  Opcode = NVPTX::StoreParamF64;
2921  break;
2922  }
2923  break;
2924  case 2:
2925  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2926  default:
2927  return nullptr;
2928  case MVT::i1:
2929  Opcode = NVPTX::StoreParamV2I8;
2930  break;
2931  case MVT::i8:
2932  Opcode = NVPTX::StoreParamV2I8;
2933  break;
2934  case MVT::i16:
2935  Opcode = NVPTX::StoreParamV2I16;
2936  break;
2937  case MVT::i32:
2938  Opcode = NVPTX::StoreParamV2I32;
2939  break;
2940  case MVT::i64:
2941  Opcode = NVPTX::StoreParamV2I64;
2942  break;
2943  case MVT::f32:
2944  Opcode = NVPTX::StoreParamV2F32;
2945  break;
2946  case MVT::f64:
2947  Opcode = NVPTX::StoreParamV2F64;
2948  break;
2949  }
2950  break;
2951  case 4:
2952  switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2953  default:
2954  return nullptr;
2955  case MVT::i1:
2956  Opcode = NVPTX::StoreParamV4I8;
2957  break;
2958  case MVT::i8:
2959  Opcode = NVPTX::StoreParamV4I8;
2960  break;
2961  case MVT::i16:
2962  Opcode = NVPTX::StoreParamV4I16;
2963  break;
2964  case MVT::i32:
2965  Opcode = NVPTX::StoreParamV4I32;
2966  break;
2967  case MVT::f32:
2968  Opcode = NVPTX::StoreParamV4F32;
2969  break;
2970  }
2971  break;
2972  }
2973  break;
2974  // Special case: if we have a sign-extend/zero-extend node, insert the
2975  // conversion instruction first, and use that as the value operand to
2976  // the selected StoreParam node.
2977  case NVPTXISD::StoreParamU32: {
2978  Opcode = NVPTX::StoreParamI32;
2979  SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2980  MVT::i32);
2981  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2982  MVT::i32, Ops[0], CvtNone);
2983  Ops[0] = SDValue(Cvt, 0);
2984  break;
2985  }
2986  case NVPTXISD::StoreParamS32: {
2987  Opcode = NVPTX::StoreParamI32;
2988  SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2989  MVT::i32);
2990  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2991  MVT::i32, Ops[0], CvtNone);
2992  Ops[0] = SDValue(Cvt, 0);
2993  break;
2994  }
2995  }
2996 
2997  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2998  SDNode *Ret =
2999  CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3001  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3002  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3003 
3004  return Ret;
3005 }
3006 
3007 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3008  SDValue Chain = N->getOperand(0);
3009  SDNode *Ret = nullptr;
3010  unsigned Opc = 0;
3012 
3013  switch (N->getOpcode()) {
3014  default: return nullptr;
3016  Opc = NVPTX::TEX_1D_F32_S32;
3017  break;
3019  Opc = NVPTX::TEX_1D_F32_F32;
3020  break;
3022  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3023  break;
3025  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3026  break;
3027  case NVPTXISD::Tex1DS32S32:
3028  Opc = NVPTX::TEX_1D_S32_S32;
3029  break;
3031  Opc = NVPTX::TEX_1D_S32_F32;
3032  break;
3034  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3035  break;
3037  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3038  break;
3039  case NVPTXISD::Tex1DU32S32:
3040  Opc = NVPTX::TEX_1D_U32_S32;
3041  break;
3043  Opc = NVPTX::TEX_1D_U32_F32;
3044  break;
3046  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3047  break;
3049  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3050  break;
3052  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3053  break;
3055  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3056  break;
3058  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3059  break;
3061  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3062  break;
3064  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3065  break;
3067  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3068  break;
3070  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3071  break;
3073  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3074  break;
3076  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3077  break;
3079  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3080  break;
3082  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3083  break;
3085  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3086  break;
3088  Opc = NVPTX::TEX_2D_F32_S32;
3089  break;
3091  Opc = NVPTX::TEX_2D_F32_F32;
3092  break;
3094  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3095  break;
3097  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3098  break;
3099  case NVPTXISD::Tex2DS32S32:
3100  Opc = NVPTX::TEX_2D_S32_S32;
3101  break;
3103  Opc = NVPTX::TEX_2D_S32_F32;
3104  break;
3106  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3107  break;
3109  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3110  break;
3111  case NVPTXISD::Tex2DU32S32:
3112  Opc = NVPTX::TEX_2D_U32_S32;
3113  break;
3115  Opc = NVPTX::TEX_2D_U32_F32;
3116  break;
3118  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3119  break;
3121  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3122  break;
3124  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3125  break;
3127  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3128  break;
3130  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3131  break;
3133  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3134  break;
3136  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3137  break;
3139  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3140  break;
3142  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3143  break;
3145  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3146  break;
3148  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3149  break;
3151  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3152  break;
3154  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3155  break;
3157  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3158  break;
3160  Opc = NVPTX::TEX_3D_F32_S32;
3161  break;
3163  Opc = NVPTX::TEX_3D_F32_F32;
3164  break;
3166  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3167  break;
3169  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3170  break;
3171  case NVPTXISD::Tex3DS32S32:
3172  Opc = NVPTX::TEX_3D_S32_S32;
3173  break;
3175  Opc = NVPTX::TEX_3D_S32_F32;
3176  break;
3178  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3179  break;
3181  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3182  break;
3183  case NVPTXISD::Tex3DU32S32:
3184  Opc = NVPTX::TEX_3D_U32_S32;
3185  break;
3187  Opc = NVPTX::TEX_3D_U32_F32;
3188  break;
3190  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3191  break;
3193  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3194  break;
3196  Opc = NVPTX::TEX_CUBE_F32_F32;
3197  break;
3199  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3200  break;
3202  Opc = NVPTX::TEX_CUBE_S32_F32;
3203  break;
3205  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3206  break;
3208  Opc = NVPTX::TEX_CUBE_U32_F32;
3209  break;
3211  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3212  break;
3214  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3215  break;
3217  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3218  break;
3220  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3221  break;
3223  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3224  break;
3226  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3227  break;
3229  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3230  break;
3232  Opc = NVPTX::TLD4_R_2D_F32_F32;
3233  break;
3235  Opc = NVPTX::TLD4_G_2D_F32_F32;
3236  break;
3238  Opc = NVPTX::TLD4_B_2D_F32_F32;
3239  break;
3241  Opc = NVPTX::TLD4_A_2D_F32_F32;
3242  break;
3244  Opc = NVPTX::TLD4_R_2D_S32_F32;
3245  break;
3247  Opc = NVPTX::TLD4_G_2D_S32_F32;
3248  break;
3250  Opc = NVPTX::TLD4_B_2D_S32_F32;
3251  break;
3253  Opc = NVPTX::TLD4_A_2D_S32_F32;
3254  break;
3256  Opc = NVPTX::TLD4_R_2D_U32_F32;
3257  break;
3259  Opc = NVPTX::TLD4_G_2D_U32_F32;
3260  break;
3262  Opc = NVPTX::TLD4_B_2D_U32_F32;
3263  break;
3265  Opc = NVPTX::TLD4_A_2D_U32_F32;
3266  break;
3268  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3269  break;
3271  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3272  break;
3274  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3275  break;
3277  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3278  break;
3280  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3281  break;
3283  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3284  break;
3286  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3287  break;
3289  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3290  break;
3292  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3293  break;
3295  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3296  break;
3298  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3299  break;
3301  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3302  break;
3304  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3305  break;
3307  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3308  break;
3310  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3311  break;
3313  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3314  break;
3316  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3317  break;
3319  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3320  break;
3322  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3323  break;
3325  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3326  break;
3328  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3329  break;
3331  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3332  break;
3334  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3335  break;
3337  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3338  break;
3340  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3341  break;
3343  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3344  break;
3346  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3347  break;
3349  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3350  break;
3352  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3353  break;
3355  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3356  break;
3358  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3359  break;
3361  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3362  break;
3364  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3365  break;
3367  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3368  break;
3370  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3371  break;
3373  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3374  break;
3376  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3377  break;
3379  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3380  break;
3382  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3383  break;
3385  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3386  break;
3388  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3389  break;
3391  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3392  break;
3394  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3395  break;
3397  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3398  break;
3400  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3401  break;
3403  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3404  break;
3406  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3407  break;
3409  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3410  break;
3412  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3413  break;
3415  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3416  break;
3418  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3419  break;
3421  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3422  break;
3424  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3425  break;
3427  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3428  break;
3430  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3431  break;
3433  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3434  break;
3436  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3437  break;
3439  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3440  break;
3442  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3443  break;
3445  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3446  break;
3448  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3449  break;
3451  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3452  break;
3454  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3455  break;
3457  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3458  break;
3460  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3461  break;
3463  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3464  break;
3466  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3467  break;
3469  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3470  break;
3472  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3473  break;
3475  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3476  break;
3478  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3479  break;
3481  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3482  break;
3484  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3485  break;
3487  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3488  break;
3490  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3491  break;
3493  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3494  break;
3496  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3497  break;
3499  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3500  break;
3502  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3503  break;
3505  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3506  break;
3508  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3509  break;
3511  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3512  break;
3514  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3515  break;
3517  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3518  break;
3519  }
3520 
3521  // Copy over operands
3522  for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3523  Ops.push_back(N->getOperand(i));
3524  }
3525 
3526  Ops.push_back(Chain);
3527  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3528  return Ret;
3529 }
3530 
3531 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3532  SDValue Chain = N->getOperand(0);
3533  SDValue TexHandle = N->getOperand(1);
3534  SDNode *Ret = nullptr;
3535  unsigned Opc = 0;
3537  switch (N->getOpcode()) {
3538  default: return nullptr;
3540  Opc = NVPTX::SULD_1D_I8_CLAMP;
3541  Ops.push_back(TexHandle);
3542  Ops.push_back(N->getOperand(2));
3543  Ops.push_back(Chain);
3544  break;
3546  Opc = NVPTX::SULD_1D_I16_CLAMP;
3547  Ops.push_back(TexHandle);
3548  Ops.push_back(N->getOperand(2));
3549  Ops.push_back(Chain);
3550  break;
3552  Opc = NVPTX::SULD_1D_I32_CLAMP;
3553  Ops.push_back(TexHandle);
3554  Ops.push_back(N->getOperand(2));
3555  Ops.push_back(Chain);
3556  break;
3558  Opc = NVPTX::SULD_1D_I64_CLAMP;
3559  Ops.push_back(TexHandle);
3560  Ops.push_back(N->getOperand(2));
3561  Ops.push_back(Chain);
3562  break;
3564  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3565  Ops.push_back(TexHandle);
3566  Ops.push_back(N->getOperand(2));
3567  Ops.push_back(Chain);
3568  break;
3570  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3571  Ops.push_back(TexHandle);
3572  Ops.push_back(N->getOperand(2));
3573  Ops.push_back(Chain);
3574  break;
3576  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3577  Ops.push_back(TexHandle);
3578  Ops.push_back(N->getOperand(2));
3579  Ops.push_back(Chain);
3580  break;
3582  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3583  Ops.push_back(TexHandle);
3584  Ops.push_back(N->getOperand(2));
3585  Ops.push_back(Chain);
3586  break;
3588  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3589  Ops.push_back(TexHandle);
3590  Ops.push_back(N->getOperand(2));
3591  Ops.push_back(Chain);
3592  break;
3594  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3595  Ops.push_back(TexHandle);
3596  Ops.push_back(N->getOperand(2));
3597  Ops.push_back(Chain);
3598  break;
3600  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3601  Ops.push_back(TexHandle);
3602  Ops.push_back(N->getOperand(2));
3603  Ops.push_back(Chain);
3604  break;
3606  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3607  Ops.push_back(TexHandle);
3608  Ops.push_back(N->getOperand(2));
3609  Ops.push_back(N->getOperand(3));
3610  Ops.push_back(Chain);
3611  break;
3613  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3614  Ops.push_back(TexHandle);
3615  Ops.push_back(N->getOperand(2));
3616  Ops.push_back(N->getOperand(3));
3617  Ops.push_back(Chain);
3618  break;
3620  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3621  Ops.push_back(TexHandle);
3622  Ops.push_back(N->getOperand(2));
3623  Ops.push_back(N->getOperand(3));
3624  Ops.push_back(Chain);
3625  break;
3627  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3628  Ops.push_back(TexHandle);
3629  Ops.push_back(N->getOperand(2));
3630  Ops.push_back(N->getOperand(3));
3631  Ops.push_back(Chain);
3632  break;
3634  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3635  Ops.push_back(TexHandle);
3636  Ops.push_back(N->getOperand(2));
3637  Ops.push_back(N->getOperand(3));
3638  Ops.push_back(Chain);
3639  break;
3641  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3642  Ops.push_back(TexHandle);
3643  Ops.push_back(N->getOperand(2));
3644  Ops.push_back(N->getOperand(3));
3645  Ops.push_back(Chain);
3646  break;
3648  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3649  Ops.push_back(TexHandle);
3650  Ops.push_back(N->getOperand(2));
3651  Ops.push_back(N->getOperand(3));
3652  Ops.push_back(Chain);
3653  break;
3655  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3656  Ops.push_back(TexHandle);
3657  Ops.push_back(N->getOperand(2));
3658  Ops.push_back(N->getOperand(3));
3659  Ops.push_back(Chain);
3660  break;
3662  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3663  Ops.push_back(TexHandle);
3664  Ops.push_back(N->getOperand(2));
3665  Ops.push_back(N->getOperand(3));
3666  Ops.push_back(Chain);
3667  break;
3669  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3670  Ops.push_back(TexHandle);
3671  Ops.push_back(N->getOperand(2));
3672  Ops.push_back(N->getOperand(3));
3673  Ops.push_back(Chain);
3674  break;
3676  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3677  Ops.push_back(TexHandle);
3678  Ops.push_back(N->getOperand(2));
3679  Ops.push_back(N->getOperand(3));
3680  Ops.push_back(Chain);
3681  break;
3683  Opc = NVPTX::SULD_2D_I8_CLAMP;
3684  Ops.push_back(TexHandle);
3685  Ops.push_back(N->getOperand(2));
3686  Ops.push_back(N->getOperand(3));
3687  Ops.push_back(Chain);
3688  break;
3690  Opc = NVPTX::SULD_2D_I16_CLAMP;
3691  Ops.push_back(TexHandle);
3692  Ops.push_back(N->getOperand(2));
3693  Ops.push_back(N->getOperand(3));
3694  Ops.push_back(Chain);
3695  break;
3697  Opc = NVPTX::SULD_2D_I32_CLAMP;
3698  Ops.push_back(TexHandle);
3699  Ops.push_back(N->getOperand(2));
3700  Ops.push_back(N->getOperand(3));
3701  Ops.push_back(Chain);
3702  break;
3704  Opc = NVPTX::SULD_2D_I64_CLAMP;
3705  Ops.push_back(TexHandle);
3706  Ops.push_back(N->getOperand(2));
3707  Ops.push_back(N->getOperand(3));
3708  Ops.push_back(Chain);
3709  break;
3711  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3712  Ops.push_back(TexHandle);
3713  Ops.push_back(N->getOperand(2));
3714  Ops.push_back(N->getOperand(3));
3715  Ops.push_back(Chain);
3716  break;
3718  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3719  Ops.push_back(TexHandle);
3720  Ops.push_back(N->getOperand(2));
3721  Ops.push_back(N->getOperand(3));
3722  Ops.push_back(Chain);
3723  break;
3725  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3726  Ops.push_back(TexHandle);
3727  Ops.push_back(N->getOperand(2));
3728  Ops.push_back(N->getOperand(3));
3729  Ops.push_back(Chain);
3730  break;
3732  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3733  Ops.push_back(TexHandle);
3734  Ops.push_back(N->getOperand(2));
3735  Ops.push_back(N->getOperand(3));
3736  Ops.push_back(Chain);
3737  break;
3739  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3740  Ops.push_back(TexHandle);
3741  Ops.push_back(N->getOperand(2));
3742  Ops.push_back(N->getOperand(3));
3743  Ops.push_back(Chain);
3744  break;
3746  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3747  Ops.push_back(TexHandle);
3748  Ops.push_back(N->getOperand(2));
3749  Ops.push_back(N->getOperand(3));
3750  Ops.push_back(Chain);
3751  break;
3753  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3754  Ops.push_back(TexHandle);
3755  Ops.push_back(N->getOperand(2));
3756  Ops.push_back(N->getOperand(3));
3757  Ops.push_back(Chain);
3758  break;
3760  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3761  Ops.push_back(TexHandle);
3762  Ops.push_back(N->getOperand(2));
3763  Ops.push_back(N->getOperand(3));
3764  Ops.push_back(N->getOperand(4));
3765  Ops.push_back(Chain);
3766  break;
3768  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3769  Ops.push_back(TexHandle);
3770  Ops.push_back(N->getOperand(2));
3771  Ops.push_back(N->getOperand(3));
3772  Ops.push_back(N->getOperand(4));
3773  Ops.push_back(Chain);
3774  break;
3776  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3777  Ops.push_back(TexHandle);
3778  Ops.push_back(N->getOperand(2));
3779  Ops.push_back(N->getOperand(3));
3780  Ops.push_back(N->getOperand(4));
3781  Ops.push_back(Chain);
3782  break;
3784  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3785  Ops.push_back(TexHandle);
3786  Ops.push_back(N->getOperand(2));
3787  Ops.push_back(N->getOperand(3));
3788  Ops.push_back(N->getOperand(4));
3789  Ops.push_back(Chain);
3790  break;
3792  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3793  Ops.push_back(TexHandle);
3794  Ops.push_back(N->getOperand(2));
3795  Ops.push_back(N->getOperand(3));
3796  Ops.push_back(N->getOperand(4));
3797  Ops.push_back(Chain);
3798  break;
3800  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3801  Ops.push_back(TexHandle);
3802  Ops.push_back(N->getOperand(2));
3803  Ops.push_back(N->getOperand(3));
3804  Ops.push_back(N->getOperand(4));
3805  Ops.push_back(Chain);
3806  break;
3808  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3809  Ops.push_back(TexHandle);
3810  Ops.push_back(N->getOperand(2));
3811  Ops.push_back(N->getOperand(3));
3812  Ops.push_back(N->getOperand(4));
3813  Ops.push_back(Chain);
3814  break;
3816  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3817  Ops.push_back(TexHandle);
3818  Ops.push_back(N->getOperand(2));
3819  Ops.push_back(N->getOperand(3));
3820  Ops.push_back(N->getOperand(4));
3821  Ops.push_back(Chain);
3822  break;
3824  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3825  Ops.push_back(TexHandle);
3826  Ops.push_back(N->getOperand(2));
3827  Ops.push_back(N->getOperand(3));
3828  Ops.push_back(N->getOperand(4));
3829  Ops.push_back(Chain);
3830  break;
3832  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3833  Ops.push_back(TexHandle);
3834  Ops.push_back(N->getOperand(2));
3835  Ops.push_back(N->getOperand(3));
3836  Ops.push_back(N->getOperand(4));
3837  Ops.push_back(Chain);
3838  break;
3840  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3841  Ops.push_back(TexHandle);
3842  Ops.push_back(N->getOperand(2));
3843  Ops.push_back(N->getOperand(3));
3844  Ops.push_back(N->getOperand(4));
3845  Ops.push_back(Chain);
3846  break;
3848  Opc = NVPTX::SULD_3D_I8_CLAMP;
3849  Ops.push_back(TexHandle);
3850  Ops.push_back(N->getOperand(2));
3851  Ops.push_back(N->getOperand(3));
3852  Ops.push_back(N->getOperand(4));
3853  Ops.push_back(Chain);
3854  break;
3856  Opc = NVPTX::SULD_3D_I16_CLAMP;
3857  Ops.push_back(TexHandle);
3858  Ops.push_back(N->getOperand(2));
3859  Ops.push_back(N->getOperand(3));
3860  Ops.push_back(N->getOperand(4));
3861  Ops.push_back(Chain);
3862  break;
3864  Opc = NVPTX::SULD_3D_I32_CLAMP;
3865  Ops.push_back(TexHandle);
3866  Ops.push_back(N->getOperand(2));
3867  Ops.push_back(N->getOperand(3));
3868  Ops.push_back(N->getOperand(4));
3869  Ops.push_back(Chain);
3870  break;
3872  Opc = NVPTX::SULD_3D_I64_CLAMP;
3873  Ops.push_back(TexHandle);
3874  Ops.push_back(N->getOperand(2));
3875  Ops.push_back(N->getOperand(3));
3876  Ops.push_back(N->getOperand(4));
3877  Ops.push_back(Chain);
3878  break;
3880  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3881  Ops.push_back(TexHandle);
3882  Ops.push_back(N->getOperand(2));
3883  Ops.push_back(N->getOperand(3));
3884  Ops.push_back(N->getOperand(4));
3885  Ops.push_back(Chain);
3886  break;
3888  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3889  Ops.push_back(TexHandle);
3890  Ops.push_back(N->getOperand(2));
3891  Ops.push_back(N->getOperand(3));
3892  Ops.push_back(N->getOperand(4));
3893  Ops.push_back(Chain);
3894  break;
3896  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3897  Ops.push_back(TexHandle);
3898  Ops.push_back(N->getOperand(2));
3899  Ops.push_back(N->getOperand(3));
3900  Ops.push_back(N->getOperand(4));
3901  Ops.push_back(Chain);
3902  break;
3904  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3905  Ops.push_back(TexHandle);
3906  Ops.push_back(N->getOperand(2));
3907  Ops.push_back(N->getOperand(3));
3908  Ops.push_back(N->getOperand(4));
3909  Ops.push_back(Chain);
3910  break;
3912  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3913  Ops.push_back(TexHandle);
3914  Ops.push_back(N->getOperand(2));
3915  Ops.push_back(N->getOperand(3));
3916  Ops.push_back(N->getOperand(4));
3917  Ops.push_back(Chain);
3918  break;
3920  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3921  Ops.push_back(TexHandle);
3922  Ops.push_back(N->getOperand(2));
3923  Ops.push_back(N->getOperand(3));
3924  Ops.push_back(N->getOperand(4));
3925  Ops.push_back(Chain);
3926  break;
3928  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3929  Ops.push_back(TexHandle);
3930  Ops.push_back(N->getOperand(2));
3931  Ops.push_back(N->getOperand(3));
3932  Ops.push_back(N->getOperand(4));
3933  Ops.push_back(Chain);
3934  break;
3936  Opc = NVPTX::SULD_1D_I8_TRAP;
3937  Ops.push_back(TexHandle);
3938  Ops.push_back(N->getOperand(2));
3939  Ops.push_back(Chain);
3940  break;
3942  Opc = NVPTX::SULD_1D_I16_TRAP;
3943  Ops.push_back(TexHandle);
3944  Ops.push_back(N->getOperand(2));
3945  Ops.push_back(Chain);
3946  break;
3948  Opc = NVPTX::SULD_1D_I32_TRAP;
3949  Ops.push_back(TexHandle);
3950  Ops.push_back(N->getOperand(2));
3951  Ops.push_back(Chain);
3952  break;
3954  Opc = NVPTX::SULD_1D_I64_TRAP;
3955  Ops.push_back(TexHandle);
3956  Ops.push_back(N->getOperand(2));
3957  Ops.push_back(Chain);
3958  break;
3960  Opc = NVPTX::SULD_1D_V2I8_TRAP;
3961  Ops.push_back(TexHandle);
3962  Ops.push_back(N->getOperand(2));
3963  Ops.push_back(Chain);
3964  break;
3966  Opc = NVPTX::SULD_1D_V2I16_TRAP;
3967  Ops.push_back(TexHandle);
3968  Ops.push_back(N->getOperand(2));
3969  Ops.push_back(Chain);
3970  break;
3972  Opc = NVPTX::SULD_1D_V2I32_TRAP;
3973  Ops.push_back(TexHandle);
3974  Ops.push_back(N->getOperand(2));
3975  Ops.push_back(Chain);
3976  break;
3978  Opc = NVPTX::SULD_1D_V2I64_TRAP;
3979  Ops.push_back(TexHandle);
3980  Ops.push_back(N->getOperand(2));
3981  Ops.push_back(Chain);
3982  break;
3984  Opc = NVPTX::SULD_1D_V4I8_TRAP;
3985  Ops.push_back(TexHandle);
3986  Ops.push_back(N->getOperand(2));
3987  Ops.push_back(Chain);
3988  break;
3990  Opc = NVPTX::SULD_1D_V4I16_TRAP;
3991  Ops.push_back(TexHandle);
3992  Ops.push_back(N->getOperand(2));
3993  Ops.push_back(Chain);
3994  break;
3996  Opc = NVPTX::SULD_1D_V4I32_TRAP;
3997  Ops.push_back(TexHandle);
3998  Ops.push_back(N->getOperand(2));
3999  Ops.push_back(Chain);
4000  break;
4002  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4003  Ops.push_back(TexHandle);
4004  Ops.push_back(N->getOperand(2));
4005  Ops.push_back(N->getOperand(3));
4006  Ops.push_back(Chain);
4007  break;
4009  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4010  Ops.push_back(TexHandle);
4011  Ops.push_back(N->getOperand(2));
4012  Ops.push_back(N->getOperand(3));
4013  Ops.push_back(Chain);
4014  break;
4016  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4017  Ops.push_back(TexHandle);
4018  Ops.push_back(N->getOperand(2));
4019  Ops.push_back(N->getOperand(3));
4020  Ops.push_back(Chain);
4021  break;
4023  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4024  Ops.push_back(TexHandle);
4025  Ops.push_back(N->getOperand(2));
4026  Ops.push_back(N->getOperand(3));
4027  Ops.push_back(Chain);
4028  break;
4030  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4031  Ops.push_back(TexHandle);
4032  Ops.push_back(N->getOperand(2));
4033  Ops.push_back(N->getOperand(3));
4034  Ops.push_back(Chain);
4035  break;
4037  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4038  Ops.push_back(TexHandle);
4039  Ops.push_back(N->getOperand(2));
4040  Ops.push_back(N->getOperand(3));
4041  Ops.push_back(Chain);
4042  break;
4044  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4045  Ops.push_back(TexHandle);
4046  Ops.push_back(N->getOperand(2));
4047  Ops.push_back(N->getOperand(3));
4048  Ops.push_back(Chain);
4049  break;
4051  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4052  Ops.push_back(TexHandle);
4053  Ops.push_back(N->getOperand(2));
4054  Ops.push_back(N->getOperand(3));
4055  Ops.push_back(Chain);
4056  break;
4058  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4059  Ops.push_back(TexHandle);
4060  Ops.push_back(N->getOperand(2));
4061  Ops.push_back(N->getOperand(3));
4062  Ops.push_back(Chain);
4063  break;
4065  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4066  Ops.push_back(TexHandle);
4067  Ops.push_back(N->getOperand(2));
4068  Ops.push_back(N->getOperand(3));
4069  Ops.push_back(Chain);
4070  break;
4072  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4073  Ops.push_back(TexHandle);
4074  Ops.push_back(N->getOperand(2));
4075  Ops.push_back(N->getOperand(3));
4076  Ops.push_back(Chain);
4077  break;
4079  Opc = NVPTX::SULD_2D_I8_TRAP;
4080  Ops.push_back(TexHandle);
4081  Ops.push_back(N->getOperand(2));
4082  Ops.push_back(N->getOperand(3));
4083  Ops.push_back(Chain);
4084  break;
4086  Opc = NVPTX::SULD_2D_I16_TRAP;
4087  Ops.push_back(TexHandle);
4088  Ops.push_back(N->getOperand(2));
4089  Ops.push_back(N->getOperand(3));
4090  Ops.push_back(Chain);
4091  break;
4093  Opc = NVPTX::SULD_2D_I32_TRAP;
4094  Ops.push_back(TexHandle);
4095  Ops.push_back(N->getOperand(2));
4096  Ops.push_back(N->getOperand(3));
4097  Ops.push_back(Chain);
4098  break;
4100  Opc = NVPTX::SULD_2D_I64_TRAP;
4101  Ops.push_back(TexHandle);
4102  Ops.push_back(N->getOperand(2));
4103  Ops.push_back(N->getOperand(3));
4104  Ops.push_back(Chain);
4105  break;
4107  Opc = NVPTX::SULD_2D_V2I8_TRAP;
4108  Ops.push_back(TexHandle);
4109  Ops.push_back(N->getOperand(2));
4110  Ops.push_back(N->getOperand(3));
4111  Ops.push_back(Chain);
4112  break;
4114  Opc = NVPTX::SULD_2D_V2I16_TRAP;
4115  Ops.push_back(TexHandle);
4116  Ops.push_back(N->getOperand(2));
4117  Ops.push_back(N->getOperand(3));
4118  Ops.push_back(Chain);
4119  break;
4121  Opc = NVPTX::SULD_2D_V2I32_TRAP;
4122  Ops.push_back(TexHandle);
4123  Ops.push_back(N->getOperand(2));
4124  Ops.push_back(N->getOperand(3));
4125  Ops.push_back(Chain);
4126  break;
4128  Opc = NVPTX::SULD_2D_V2I64_TRAP;
4129  Ops.push_back(TexHandle);
4130  Ops.push_back(N->getOperand(2));
4131  Ops.push_back(N->getOperand(3));
4132  Ops.push_back(Chain);
4133  break;
4135  Opc = NVPTX::SULD_2D_V4I8_TRAP;
4136  Ops.push_back(TexHandle);
4137  Ops.push_back(N->getOperand(2));
4138  Ops.push_back(N->getOperand(3));
4139  Ops.push_back(Chain);
4140  break;
4142  Opc = NVPTX::SULD_2D_V4I16_TRAP;
4143  Ops.push_back(TexHandle);
4144  Ops.push_back(N->getOperand(2));
4145  Ops.push_back(N->getOperand(3));
4146  Ops.push_back(Chain);
4147  break;
4149  Opc = NVPTX::SULD_2D_V4I32_TRAP;
4150  Ops.push_back(TexHandle);
4151  Ops.push_back(N->getOperand(2));
4152  Ops.push_back(N->getOperand(3));
4153  Ops.push_back(Chain);
4154  break;
4156  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4157  Ops.push_back(TexHandle);
4158  Ops.push_back(N->getOperand(2));
4159  Ops.push_back(N->getOperand(3));
4160  Ops.push_back(N->getOperand(4));
4161  Ops.push_back(Chain);
4162  break;
4164  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4165  Ops.push_back(TexHandle);
4166  Ops.push_back(N->getOperand(2));
4167  Ops.push_back(N->getOperand(3));
4168  Ops.push_back(N->getOperand(4));
4169  Ops.push_back(Chain);
4170  break;
4172  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4173  Ops.push_back(TexHandle);
4174  Ops.push_back(N->getOperand(2));
4175  Ops.push_back(N->getOperand(3));
4176  Ops.push_back(N->getOperand(4));
4177  Ops.push_back(Chain);
4178  break;
4180  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4181  Ops.push_back(TexHandle);
4182  Ops.push_back(N->getOperand(2));
4183  Ops.push_back(N->getOperand(3));
4184  Ops.push_back(N->getOperand(4));
4185  Ops.push_back(Chain);
4186  break;
4188  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4189  Ops.push_back(TexHandle);
4190  Ops.push_back(N->getOperand(2));
4191  Ops.push_back(N->getOperand(3));
4192  Ops.push_back(N->getOperand(4));
4193  Ops.push_back(Chain);
4194  break;
4196  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4197  Ops.push_back(TexHandle);
4198  Ops.push_back(N->getOperand(2));
4199  Ops.push_back(N->getOperand(3));
4200  Ops.push_back(N->getOperand(4));
4201  Ops.push_back(Chain);
4202  break;
4204  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4205  Ops.push_back(TexHandle);
4206  Ops.push_back(N->getOperand(2));
4207  Ops.push_back(N->getOperand(3));
4208  Ops.push_back(N->getOperand(4));
4209  Ops.push_back(Chain);
4210  break;
4212  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4213  Ops.push_back(TexHandle);
4214  Ops.push_back(N->getOperand(2));
4215  Ops.push_back(N->getOperand(3));
4216  Ops.push_back(N->getOperand(4));
4217  Ops.push_back(Chain);
4218  break;
4220  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4221  Ops.push_back(TexHandle);
4222  Ops.push_back(N->getOperand(2));
4223  Ops.push_back(N->getOperand(3));
4224  Ops.push_back(N->getOperand(4));
4225  Ops.push_back(Chain);
4226  break;
4228  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4229  Ops.push_back(TexHandle);
4230  Ops.push_back(N->getOperand(2));
4231  Ops.push_back(N->getOperand(3));
4232  Ops.push_back(N->getOperand(4));
4233  Ops.push_back(Chain);
4234  break;
4236  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4237  Ops.push_back(TexHandle);
4238  Ops.push_back(N->getOperand(2));
4239  Ops.push_back(N->getOperand(3));
4240  Ops.push_back(N->getOperand(4));
4241  Ops.push_back(Chain);
4242  break;
4244  Opc = NVPTX::SULD_3D_I8_TRAP;
4245  Ops.push_back(TexHandle);
4246  Ops.push_back(N->getOperand(2));
4247  Ops.push_back(N->getOperand(3));
4248  Ops.push_back(N->getOperand(4));
4249  Ops.push_back(Chain);
4250  break;
4252  Opc = NVPTX::SULD_3D_I16_TRAP;
4253  Ops.push_back(TexHandle);
4254  Ops.push_back(N->getOperand(2));
4255  Ops.push_back(N->getOperand(3));
4256  Ops.push_back(N->getOperand(4));
4257  Ops.push_back(Chain);
4258  break;
4260  Opc = NVPTX::SULD_3D_I32_TRAP;
4261  Ops.push_back(TexHandle);
4262  Ops.push_back(N->getOperand(2));
4263  Ops.push_back(N->getOperand(3));
4264  Ops.push_back(N->getOperand(4));
4265  Ops.push_back(Chain);
4266  break;
4268  Opc = NVPTX::SULD_3D_I64_TRAP;
4269  Ops.push_back(TexHandle);
4270  Ops.push_back(N->getOperand(2));
4271  Ops.push_back(N->getOperand(3));
4272  Ops.push_back(N->getOperand(4));
4273  Ops.push_back(Chain);
4274  break;
4276  Opc = NVPTX::SULD_3D_V2I8_TRAP;
4277  Ops.push_back(TexHandle);
4278  Ops.push_back(N->getOperand(2));
4279  Ops.push_back(N->getOperand(3));
4280  Ops.push_back(N->getOperand(4));
4281  Ops.push_back(Chain);
4282  break;
4284  Opc = NVPTX::SULD_3D_V2I16_TRAP;
4285  Ops.push_back(TexHandle);
4286  Ops.push_back(N->getOperand(2));
4287  Ops.push_back(N->getOperand(3));
4288  Ops.push_back(N->getOperand(4));
4289  Ops.push_back(Chain);
4290  break;
4292  Opc = NVPTX::SULD_3D_V2I32_TRAP;
4293  Ops.push_back(TexHandle);
4294  Ops.push_back(N->getOperand(2));
4295  Ops.push_back(N->getOperand(3));
4296  Ops.push_back(N->getOperand(4));
4297  Ops.push_back(Chain);
4298  break;
4300  Opc = NVPTX::SULD_3D_V2I64_TRAP;
4301  Ops.push_back(TexHandle);
4302  Ops.push_back(N->getOperand(2));
4303  Ops.push_back(N->getOperand(3));
4304  Ops.push_back(N->getOperand(4));
4305  Ops.push_back(Chain);
4306  break;
4308  Opc = NVPTX::SULD_3D_V4I8_TRAP;
4309  Ops.push_back(TexHandle);
4310  Ops.push_back(N->getOperand(2));
4311  Ops.push_back(N->getOperand(3));
4312  Ops.push_back(N->getOperand(4));
4313  Ops.push_back(Chain);
4314  break;
4316  Opc = NVPTX::SULD_3D_V4I16_TRAP;
4317  Ops.push_back(TexHandle);
4318  Ops.push_back(N->getOperand(2));
4319  Ops.push_back(N->getOperand(3));
4320  Ops.push_back(N->getOperand(4));
4321  Ops.push_back(Chain);
4322  break;
4324  Opc = NVPTX::SULD_3D_V4I32_TRAP;
4325  Ops.push_back(TexHandle);
4326  Ops.push_back(N->getOperand(2));
4327  Ops.push_back(N->getOperand(3));
4328  Ops.push_back(N->getOperand(4));
4329  Ops.push_back(Chain);
4330  break;
4332  Opc = NVPTX::SULD_1D_I8_ZERO;
4333  Ops.push_back(TexHandle);
4334  Ops.push_back(N->getOperand(2));
4335  Ops.push_back(Chain);
4336  break;
4338  Opc = NVPTX::SULD_1D_I16_ZERO;
4339  Ops.push_back(TexHandle);
4340  Ops.push_back(N->getOperand(2));
4341  Ops.push_back(Chain);
4342  break;
4344  Opc = NVPTX::SULD_1D_I32_ZERO;
4345  Ops.push_back(TexHandle);
4346  Ops.push_back(N->getOperand(2));
4347  Ops.push_back(Chain);
4348  break;
4350  Opc = NVPTX::SULD_1D_I64_ZERO;
4351  Ops.push_back(TexHandle);
4352  Ops.push_back(N->getOperand(2));
4353  Ops.push_back(Chain);
4354  break;
4356  Opc = NVPTX::SULD_1D_V2I8_ZERO;
4357  Ops.push_back(TexHandle);
4358  Ops.push_back(N->getOperand(2));
4359  Ops.push_back(Chain);
4360  break;
4362  Opc = NVPTX::SULD_1D_V2I16_ZERO;
4363  Ops.push_back(TexHandle);
4364  Ops.push_back(N->getOperand(2));
4365  Ops.push_back(Chain);
4366  break;
4368  Opc = NVPTX::SULD_1D_V2I32_ZERO;
4369  Ops.push_back(TexHandle);
4370  Ops.push_back(N->getOperand(2));
4371  Ops.push_back(Chain);
4372  break;
4374  Opc = NVPTX::SULD_1D_V2I64_ZERO;
4375  Ops.push_back(TexHandle);
4376  Ops.push_back(N->getOperand(2));
4377  Ops.push_back(Chain);
4378  break;
4380  Opc = NVPTX::SULD_1D_V4I8_ZERO;
4381  Ops.push_back(TexHandle);
4382  Ops.push_back(N->getOperand(2));
4383  Ops.push_back(Chain);
4384  break;
4386  Opc = NVPTX::SULD_1D_V4I16_ZERO;
4387  Ops.push_back(TexHandle);
4388  Ops.push_back(N->getOperand(2));
4389  Ops.push_back(Chain);
4390  break;
4392  Opc = NVPTX::SULD_1D_V4I32_ZERO;
4393  Ops.push_back(TexHandle);
4394  Ops.push_back(N->getOperand(2));
4395  Ops.push_back(Chain);
4396  break;
4398  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4399  Ops.push_back(TexHandle);
4400  Ops.push_back(N->getOperand(2));
4401  Ops.push_back(N->getOperand(3));
4402  Ops.push_back(Chain);
4403  break;
4405  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4406  Ops.push_back(TexHandle);
4407  Ops.push_back(N->getOperand(2));
4408  Ops.push_back(N->getOperand(3));
4409  Ops.push_back(Chain);
4410  break;
4412  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4413  Ops.push_back(TexHandle);
4414  Ops.push_back(N->getOperand(2));
4415  Ops.push_back(N->getOperand(3));
4416  Ops.push_back(Chain);
4417  break;
4419  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4420  Ops.push_back(TexHandle);
4421  Ops.push_back(N->getOperand(2));
4422  Ops.push_back(N->getOperand(3));
4423  Ops.push_back(Chain);
4424  break;
4426  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4427  Ops.push_back(TexHandle);
4428  Ops.push_back(N->getOperand(2));
4429  Ops.push_back(N->getOperand(3));
4430  Ops.push_back(Chain);
4431  break;
4433  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4434  Ops.push_back(TexHandle);
4435  Ops.push_back(N->getOperand(2));
4436  Ops.push_back(N->getOperand(3));
4437  Ops.push_back(Chain);
4438  break;
4440  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4441  Ops.push_back(TexHandle);
4442  Ops.push_back(N->getOperand(2));
4443  Ops.push_back(N->getOperand(3));
4444  Ops.push_back(Chain);
4445  break;
4447  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4448  Ops.push_back(TexHandle);
4449  Ops.push_back(N->getOperand(2));
4450  Ops.push_back(N->getOperand(3));
4451  Ops.push_back(Chain);
4452  break;
4454  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4455  Ops.push_back(TexHandle);
4456  Ops.push_back(N->getOperand(2));
4457  Ops.push_back(N->getOperand(3));
4458  Ops.push_back(Chain);
4459  break;
4461  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4462  Ops.push_back(TexHandle);
4463  Ops.push_back(N->getOperand(2));
4464  Ops.push_back(N->getOperand(3));
4465  Ops.push_back(Chain);
4466  break;
4468  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4469  Ops.push_back(TexHandle);
4470  Ops.push_back(N->getOperand(2));
4471  Ops.push_back(N->getOperand(3));
4472  Ops.push_back(Chain);
4473  break;
4475  Opc = NVPTX::SULD_2D_I8_ZERO;
4476  Ops.push_back(TexHandle);
4477  Ops.push_back(N->getOperand(2));
4478  Ops.push_back(N->getOperand(3));
4479  Ops.push_back(Chain);
4480  break;
4482  Opc = NVPTX::SULD_2D_I16_ZERO;
4483  Ops.push_back(TexHandle);
4484  Ops.push_back(N->getOperand(2));
4485  Ops.push_back(N->getOperand(3));
4486  Ops.push_back(Chain);
4487  break;
4489  Opc = NVPTX::SULD_2D_I32_ZERO;
4490  Ops.push_back(TexHandle);
4491  Ops.push_back(N->getOperand(2));
4492  Ops.push_back(N->getOperand(3));
4493  Ops.push_back(Chain);
4494  break;
4496  Opc = NVPTX::SULD_2D_I64_ZERO;
4497  Ops.push_back(TexHandle);
4498  Ops.push_back(N->getOperand(2));
4499  Ops.push_back(N->getOperand(3));
4500  Ops.push_back(Chain);
4501  break;
4503  Opc = NVPTX::SULD_2D_V2I8_ZERO;
4504  Ops.push_back(TexHandle);
4505  Ops.push_back(N->getOperand(2));
4506  Ops.push_back(N->getOperand(3));
4507  Ops.push_back(Chain);
4508  break;
4510  Opc = NVPTX::SULD_2D_V2I16_ZERO;
4511  Ops.push_back(TexHandle);
4512  Ops.push_back(N->getOperand(2));
4513  Ops.push_back(N->getOperand(3));
4514  Ops.push_back(Chain);
4515  break;
4517  Opc = NVPTX::SULD_2D_V2I32_ZERO;
4518  Ops.push_back(TexHandle);
4519  Ops.push_back(N->getOperand(2));
4520  Ops.push_back(N->getOperand(3));
4521  Ops.push_back(Chain);
4522  break;
4524  Opc = NVPTX::SULD_2D_V2I64_ZERO;
4525  Ops.push_back(TexHandle);
4526  Ops.push_back(N->getOperand(2));
4527  Ops.push_back(N->getOperand(3));
4528  Ops.push_back(Chain);
4529  break;
4531  Opc = NVPTX::SULD_2D_V4I8_ZERO;
4532  Ops.push_back(TexHandle);
4533  Ops.push_back(N->getOperand(2));
4534  Ops.push_back(N->getOperand(3));
4535  Ops.push_back(Chain);
4536  break;
4538  Opc = NVPTX::SULD_2D_V4I16_ZERO;
4539  Ops.push_back(TexHandle);
4540  Ops.push_back(N->getOperand(2));
4541  Ops.push_back(N->getOperand(3));
4542  Ops.push_back(Chain);
4543  break;
4545  Opc = NVPTX::SULD_2D_V4I32_ZERO;
4546  Ops.push_back(TexHandle);
4547  Ops.push_back(N->getOperand(2));
4548  Ops.push_back(N->getOperand(3));
4549  Ops.push_back(Chain);
4550  break;
4552  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4553  Ops.push_back(TexHandle);
4554  Ops.push_back(N->getOperand(2));
4555  Ops.push_back(N->getOperand(3));
4556  Ops.push_back(N->getOperand(4));
4557  Ops.push_back(Chain);
4558  break;
4560  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4561  Ops.push_back(TexHandle);
4562  Ops.push_back(N->getOperand(2));
4563  Ops.push_back(N->getOperand(3));
4564  Ops.push_back(N->getOperand(4));
4565  Ops.push_back(Chain);
4566  break;
4568  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4569  Ops.push_back(TexHandle);
4570  Ops.push_back(N->getOperand(2));
4571  Ops.push_back(N->getOperand(3));
4572  Ops.push_back(N->getOperand(4));
4573  Ops.push_back(Chain);
4574  break;
4576  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4577  Ops.push_back(TexHandle);
4578  Ops.push_back(N->getOperand(2));
4579  Ops.push_back(N->getOperand(3));
4580  Ops.push_back(N->getOperand(4));
4581  Ops.push_back(Chain);
4582  break;
4584  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4585  Ops.push_back(TexHandle);
4586  Ops.push_back(N->getOperand(2));
4587  Ops.push_back(N->getOperand(3));
4588  Ops.push_back(N->getOperand(4));
4589  Ops.push_back(Chain);
4590  break;
4592  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4593  Ops.push_back(TexHandle);
4594  Ops.push_back(N->getOperand(2));
4595  Ops.push_back(N->getOperand(3));
4596  Ops.push_back(N->getOperand(4));
4597  Ops.push_back(Chain);
4598  break;
4600  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4601  Ops.push_back(TexHandle);
4602  Ops.push_back(N->getOperand(2));
4603  Ops.push_back(N->getOperand(3));
4604  Ops.push_back(N->getOperand(4));
4605  Ops.push_back(Chain);
4606  break;
4608  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4609  Ops.push_back(TexHandle);
4610  Ops.push_back(N->getOperand(2));
4611  Ops.push_back(N->getOperand(3));
4612  Ops.push_back(N->getOperand(4));
4613  Ops.push_back(Chain);
4614  break;
4616  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4617  Ops.push_back(TexHandle);
4618  Ops.push_back(N->getOperand(2));
4619  Ops.push_back(N->getOperand(3));
4620  Ops.push_back(N->getOperand(4));
4621  Ops.push_back(Chain);
4622  break;
4624  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4625  Ops.push_back(TexHandle);
4626  Ops.push_back(N->getOperand(2));
4627  Ops.push_back(N->getOperand(3));
4628  Ops.push_back(N->getOperand(4));
4629  Ops.push_back(Chain);
4630  break;
4632  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4633  Ops.push_back(TexHandle);
4634  Ops.push_back(N->getOperand(2));
4635  Ops.push_back(N->getOperand(3));
4636  Ops.push_back(N->getOperand(4));
4637  Ops.push_back(Chain);
4638  break;
4640  Opc = NVPTX::SULD_3D_I8_ZERO;
4641  Ops.push_back(TexHandle);
4642  Ops.push_back(N->getOperand(2));
4643  Ops.push_back(N->getOperand(3));
4644  Ops.push_back(N->getOperand(4));
4645  Ops.push_back(Chain);
4646  break;
4648  Opc = NVPTX::SULD_3D_I16_ZERO;
4649  Ops.push_back(TexHandle);
4650  Ops.push_back(N->getOperand(2));
4651  Ops.push_back(N->getOperand(3));
4652  Ops.push_back(N->getOperand(4));
4653  Ops.push_back(Chain);
4654  break;
4656  Opc = NVPTX::SULD_3D_I32_ZERO;
4657  Ops.push_back(TexHandle);
4658  Ops.push_back(N->getOperand(2));
4659  Ops.push_back(N->getOperand(3));
4660  Ops.push_back(N->getOperand(4));
4661  Ops.push_back(Chain);
4662  break;
4664  Opc = NVPTX::SULD_3D_I64_ZERO;
4665  Ops.push_back(TexHandle);
4666  Ops.push_back(N->getOperand(2));
4667  Ops.push_back(N->getOperand(3));
4668  Ops.push_back(N->getOperand(4));
4669  Ops.push_back(Chain);
4670  break;
4672  Opc = NVPTX::SULD_3D_V2I8_ZERO;
4673  Ops.push_back(TexHandle);
4674  Ops.push_back(N->getOperand(2));
4675  Ops.push_back(N->getOperand(3));
4676  Ops.push_back(N->getOperand(4));
4677  Ops.push_back(Chain);
4678  break;
4680  Opc = NVPTX::SULD_3D_V2I16_ZERO;
4681  Ops.push_back(TexHandle);
4682  Ops.push_back(N->getOperand(2));
4683  Ops.push_back(N->getOperand(3));
4684  Ops.push_back(N->getOperand(4));
4685  Ops.push_back(Chain);
4686  break;
4688  Opc = NVPTX::SULD_3D_V2I32_ZERO;
4689  Ops.push_back(TexHandle);
4690  Ops.push_back(N->getOperand(2));
4691  Ops.push_back(N->getOperand(3));
4692  Ops.push_back(N->getOperand(4));
4693  Ops.push_back(Chain);
4694  break;
4696  Opc = NVPTX::SULD_3D_V2I64_ZERO;
4697  Ops.push_back(TexHandle);
4698  Ops.push_back(N->getOperand(2));
4699  Ops.push_back(N->getOperand(3));
4700  Ops.push_back(N->getOperand(4));
4701  Ops.push_back(Chain);
4702  break;
4704  Opc = NVPTX::SULD_3D_V4I8_ZERO;
4705  Ops.push_back(TexHandle);
4706  Ops.push_back(N->getOperand(2));
4707  Ops.push_back(N->getOperand(3));
4708  Ops.push_back(N->getOperand(4));
4709  Ops.push_back(Chain);
4710  break;
4712  Opc = NVPTX::SULD_3D_V4I16_ZERO;
4713  Ops.push_back(TexHandle);
4714  Ops.push_back(N->getOperand(2));
4715  Ops.push_back(N->getOperand(3));
4716  Ops.push_back(N->getOperand(4));
4717  Ops.push_back(Chain);
4718  break;
4720  Opc = NVPTX::SULD_3D_V4I32_ZERO;
4721  Ops.push_back(TexHandle);
4722  Ops.push_back(N->getOperand(2));
4723  Ops.push_back(N->getOperand(3));
4724  Ops.push_back(N->getOperand(4));
4725  Ops.push_back(Chain);
4726  break;
4727  }
4728  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4729  return Ret;
4730 }
4731 
4732 
4733 /// SelectBFE - Look for instruction sequences that can be made more efficient
4734 /// by using the 'bfe' (bit-field extract) PTX instruction
4735 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4736  SDLoc DL(N);
4737  SDValue LHS = N->getOperand(0);
4738  SDValue RHS = N->getOperand(1);
4739  SDValue Len;
4740  SDValue Start;
4741  SDValue Val;
4742  bool IsSigned = false;
4743 
4744  if (N->getOpcode() == ISD::AND) {
4745  // Canonicalize the operands
4746  // We want 'and %val, %mask'
4747  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4748  std::swap(LHS, RHS);
4749  }
4750 
4751  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4752  if (!Mask) {
4753  // We need a constant mask on the RHS of the AND
4754  return NULL;
4755  }
4756 
4757  // Extract the mask bits
4758  uint64_t MaskVal = Mask->getZExtValue();
4759  if (!isMask_64(MaskVal)) {
4760  // We *could* handle shifted masks here, but doing so would require an
4761  // 'and' operation to fix up the low-order bits so we would trade
4762  // shr+and for bfe+and, which has the same throughput
4763  return NULL;
4764  }
4765 
4766  // How many bits are in our mask?
4767  uint64_t NumBits = countTrailingOnes(MaskVal);
4768  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4769 
4770  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4771  // We have a 'srl/and' pair, extract the effective start bit and length
4772  Val = LHS.getNode()->getOperand(0);
4773  Start = LHS.getNode()->getOperand(1);
4774  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4775  if (StartConst) {
4776  uint64_t StartVal = StartConst->getZExtValue();
4777  // How many "good" bits do we have left? "good" is defined here as bits
4778  // that exist in the original value, not shifted in.
4779  uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4780  if (NumBits > GoodBits) {
4781  // Do not handle the case where bits have been shifted in. In theory
4782  // we could handle this, but the cost is likely higher than just
4783  // emitting the srl/and pair.
4784  return NULL;
4785  }
4786  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4787  } else {
4788  // Do not handle the case where the shift amount (can be zero if no srl
4789  // was found) is not constant. We could handle this case, but it would
4790  // require run-time logic that would be more expensive than just
4791  // emitting the srl/and pair.
4792  return NULL;
4793  }
4794  } else {
4795  // Do not handle the case where the LHS of the and is not a shift. While
4796  // it would be trivial to handle this case, it would just transform
4797  // 'and' -> 'bfe', but 'and' has higher-throughput.
4798  return NULL;
4799  }
4800  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4801  if (LHS->getOpcode() == ISD::AND) {
4802  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4803  if (!ShiftCnst) {
4804  // Shift amount must be constant
4805  return NULL;
4806  }
4807 
4808  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4809 
4810  SDValue AndLHS = LHS->getOperand(0);
4811  SDValue AndRHS = LHS->getOperand(1);
4812 
4813  // Canonicalize the AND to have the mask on the RHS
4814  if (isa<ConstantSDNode>(AndLHS)) {
4815  std::swap(AndLHS, AndRHS);
4816  }
4817 
4818  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4819  if (!MaskCnst) {
4820  // Mask must be constant
4821  return NULL;
4822  }
4823 
4824  uint64_t MaskVal = MaskCnst->getZExtValue();
4825  uint64_t NumZeros;
4826  uint64_t NumBits;
4827  if (isMask_64(MaskVal)) {
4828  NumZeros = 0;
4829  // The number of bits in the result bitfield will be the number of
4830  // trailing ones (the AND) minus the number of bits we shift off
4831  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4832  } else if (isShiftedMask_64(MaskVal)) {
4833  NumZeros = countTrailingZeros(MaskVal);
4834  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4835  // The number of bits in the result bitfield will be the number of
4836  // trailing zeros plus the number of set bits in the mask minus the
4837  // number of bits we shift off
4838  NumBits = NumZeros + NumOnes - ShiftAmt;
4839  } else {
4840  // This is not a mask we can handle
4841  return NULL;
4842  }
4843 
4844  if (ShiftAmt < NumZeros) {
4845  // Handling this case would require extra logic that would make this
4846  // transformation non-profitable
4847  return NULL;
4848  }
4849 
4850  Val = AndLHS;
4851  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4852  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4853  } else if (LHS->getOpcode() == ISD::SHL) {
4854  // Here, we have a pattern like:
4855  //
4856  // (sra (shl val, NN), MM)
4857  // or
4858  // (srl (shl val, NN), MM)
4859  //
4860  // If MM >= NN, we can efficiently optimize this with bfe
4861  Val = LHS->getOperand(0);
4862 
4863  SDValue ShlRHS = LHS->getOperand(1);
4864  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4865  if (!ShlCnst) {
4866  // Shift amount must be constant
4867  return NULL;
4868  }
4869  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4870 
4871  SDValue ShrRHS = RHS;
4872  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4873  if (!ShrCnst) {
4874  // Shift amount must be constant
4875  return NULL;
4876  }
4877  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4878 
4879  // To avoid extra codegen and be profitable, we need Outer >= Inner
4880  if (OuterShiftAmt < InnerShiftAmt) {
4881  return NULL;
4882  }
4883 
4884  // If the outer shift is more than the type size, we have no bitfield to
4885  // extract (since we also check that the inner shift is <= the outer shift
4886  // then this also implies that the inner shift is < the type size)
4887  if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4888  return NULL;
4889  }
4890 
4891  Start =
4892  CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4893  Len =
4894  CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4895  OuterShiftAmt, DL, MVT::i32);
4896 
4897  if (N->getOpcode() == ISD::SRA) {
4898  // If we have a arithmetic right shift, we need to use the signed bfe
4899  // variant
4900  IsSigned = true;
4901  }
4902  } else {
4903  // No can do...
4904  return NULL;
4905  }
4906  } else {
4907  // No can do...
4908  return NULL;
4909  }
4910 
4911 
4912  unsigned Opc;
4913  // For the BFE operations we form here from "and" and "srl", always use the
4914  // unsigned variants.
4915  if (Val.getValueType() == MVT::i32) {
4916  if (IsSigned) {
4917  Opc = NVPTX::BFE_S32rii;
4918  } else {
4919  Opc = NVPTX::BFE_U32rii;
4920  }
4921  } else if (Val.getValueType() == MVT::i64) {
4922  if (IsSigned) {
4923  Opc = NVPTX::BFE_S64rii;
4924  } else {
4925  Opc = NVPTX::BFE_U64rii;
4926  }
4927  } else {
4928  // We cannot handle this type
4929  return NULL;
4930  }
4931 
4932  SDValue Ops[] = {
4933  Val, Start, Len
4934  };
4935 
4936  return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4937 }
4938 
4939 // SelectDirectAddr - Match a direct address for DAG.
4940 // A direct address could be a globaladdress or externalsymbol.
4941 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4942  // Return true if TGA or ES.
4943  if (N.getOpcode() == ISD::TargetGlobalAddress ||
4945  Address = N;
4946  return true;
4947  }
4948  if (N.getOpcode() == NVPTXISD::Wrapper) {
4949  Address = N.getOperand(0);
4950  return true;
4951  }
4952  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4953  unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4954  if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4956  return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4957  }
4958  return false;
4959 }
4960 
4961 // symbol+offset
4962 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4963  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4964  if (Addr.getOpcode() == ISD::ADD) {
4965  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4966  SDValue base = Addr.getOperand(0);
4967  if (SelectDirectAddr(base, Base)) {
4968  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4969  mvt);
4970  return true;
4971  }
4972  }
4973  }
4974  return false;
4975 }
4976 
4977 // symbol+offset
4978 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4979  SDValue &Base, SDValue &Offset) {
4980  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4981 }
4982 
4983 // symbol+offset
4984 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4985  SDValue &Base, SDValue &Offset) {
4986  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4987 }
4988 
4989 // register+offset
4990 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4991  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4992  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4993  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4994  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
4995  return true;
4996  }
4997  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4999  return false; // direct calls.
5000 
5001  if (Addr.getOpcode() == ISD::ADD) {
5002  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5003  return false;
5004  }
5005  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5006  if (FrameIndexSDNode *FIN =
5007  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5008  // Constant offset from frame ref.
5009  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5010  else
5011  Base = Addr.getOperand(0);
5012  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5013  mvt);
5014  return true;
5015  }
5016  }
5017  return false;
5018 }
5019 
5020 // register+offset
5021 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5022  SDValue &Base, SDValue &Offset) {
5023  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5024 }
5025 
5026 // register+offset
5027 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5028  SDValue &Base, SDValue &Offset) {
5029  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5030 }
5031 
5032 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5033  unsigned int spN) const {
5034  const Value *Src = nullptr;
5035  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5036  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5037  return true;
5038  Src = mN->getMemOperand()->getValue();
5039  }
5040  if (!Src)
5041  return false;
5042  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5043  return (PT->getAddressSpace() == spN);
5044  return false;
5045 }
5046 
5047 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5048 /// inline asm expressions.
5049 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5050  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5051  SDValue Op0, Op1;
5052  switch (ConstraintID) {
5053  default:
5054  return true;
5055  case InlineAsm::Constraint_m: // memory
5056  if (SelectDirectAddr(Op, Op0)) {
5057  OutOps.push_back(Op0);
5058  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5059  return false;
5060  }
5061  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5062  OutOps.push_back(Op0);
5063  OutOps.push_back(Op1);
5064  return false;
5065  }
5066  break;
5067  }
5068  return true;
5069 }
unsigned getDestAddressSpace() const
SDVTList getVTList() const
unsigned getSrcAddressSpace() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getSizeInBits() const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:122
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:225
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void setNodeId(int Id)
Set unique node id.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Shift and rotation operations.
Definition: ISDOpcodes.h:332
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SimpleValueType SimpleTy
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:335
EVT getMemoryVT() const
Return the type of the in-memory value.
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
SDNode * getNode() const
get the SDNode which holds the desired result
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
unsigned getVectorNumElements() const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
bool isVector() const
isVector - Return true if this is a vector value type.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
static cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
bool isVolatile() const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isShiftedMask_64(uint64_t Value)
isShiftedMask_64 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:348
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:481
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const Value * getValue() const
getValue - Return the base address of the memory access.
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
static cl::opt< int > UsePrecDivF32("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"" IEEE Compliant F32 div.rnd if available."), cl::init(2))
NVPTXTargetMachine.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
#define N
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
EVT getValueType() const
Return the ValueType of the referenced return value.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:140
LLVM Value Representation.
Definition: Value.h:69
static bool isVolatile(Instruction *Inst)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
uint64_t getZExtValue() const
This class is used to represent ISD::LOAD nodes.