LLVM  7.0.0svn
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
20 #include "llvm/Support/Debug.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 /// createNVPTXISelDag - This pass converts a legalized DAG into a
30 /// NVPTX-specific DAG, ready for instruction scheduling.
32  llvm::CodeGenOpt::Level OptLevel) {
33  return new NVPTXDAGToDAGISel(TM, OptLevel);
34 }
35 
37  CodeGenOpt::Level OptLevel)
38  : SelectionDAGISel(tm, OptLevel), TM(tm) {
39  doMulWide = (OptLevel > 0);
40 }
41 
43  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45 }
46 
47 int NVPTXDAGToDAGISel::getDivF32Level() const {
49 }
50 
51 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
53 }
54 
55 bool NVPTXDAGToDAGISel::useF32FTZ() const {
57 }
58 
59 bool NVPTXDAGToDAGISel::allowFMA() const {
61  return TL->allowFMA(*MF, OptLevel);
62 }
63 
64 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66  return TL->allowUnsafeFPMath(*MF);
67 }
68 
69 /// Select - Select instructions not customized! Used for
70 /// expanded, promoted and normal instructions.
71 void NVPTXDAGToDAGISel::Select(SDNode *N) {
72 
73  if (N->isMachineOpcode()) {
74  N->setNodeId(-1);
75  return; // Already selected.
76  }
77 
78  switch (N->getOpcode()) {
79  case ISD::LOAD:
80  if (tryLoad(N))
81  return;
82  break;
83  case ISD::STORE:
84  if (tryStore(N))
85  return;
86  break;
88  if (tryEXTRACT_VECTOR_ELEMENT(N))
89  return;
90  break;
92  SelectSETP_F16X2(N);
93  return;
94 
95  case NVPTXISD::LoadV2:
96  case NVPTXISD::LoadV4:
97  if (tryLoadVector(N))
98  return;
99  break;
100  case NVPTXISD::LDGV2:
101  case NVPTXISD::LDGV4:
102  case NVPTXISD::LDUV2:
103  case NVPTXISD::LDUV4:
104  if (tryLDGLDU(N))
105  return;
106  break;
107  case NVPTXISD::StoreV2:
108  case NVPTXISD::StoreV4:
109  if (tryStoreVector(N))
110  return;
111  break;
112  case NVPTXISD::LoadParam:
115  if (tryLoadParam(N))
116  return;
117  break;
121  if (tryStoreRetval(N))
122  return;
123  break;
129  if (tryStoreParam(N))
130  return;
131  break;
133  if (tryIntrinsicNoChain(N))
134  return;
135  break;
137  if (tryIntrinsicChain(N))
138  return;
139  break;
308  if (tryTextureIntrinsic(N))
309  return;
310  break;
476  if (trySurfaceIntrinsic(N))
477  return;
478  break;
479  case ISD::AND:
480  case ISD::SRA:
481  case ISD::SRL:
482  // Try to select BFE
483  if (tryBFE(N))
484  return;
485  break;
486  case ISD::ADDRSPACECAST:
487  SelectAddrSpaceCast(N);
488  return;
489  case ISD::ConstantFP:
490  if (tryConstantFP16(N))
491  return;
492  break;
493  default:
494  break;
495  }
496  SelectCode(N);
497 }
498 
499 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
500  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501  switch (IID) {
502  default:
503  return false;
504  case Intrinsic::nvvm_ldg_global_f:
505  case Intrinsic::nvvm_ldg_global_i:
506  case Intrinsic::nvvm_ldg_global_p:
507  case Intrinsic::nvvm_ldu_global_f:
508  case Intrinsic::nvvm_ldu_global_i:
509  case Intrinsic::nvvm_ldu_global_p:
510  return tryLDGLDU(N);
511  }
512 }
513 
514 // There's no way to specify FP16 immediates in .f16 ops, so we have to
515 // load them into an .f16 register first.
516 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517  if (N->getValueType(0) != MVT::f16)
518  return false;
520  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521  SDNode *LoadConstF16 =
522  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523  ReplaceNode(N, LoadConstF16);
524  return true;
525 }
526 
527 // Map ISD:CONDCODE value to appropriate CmpMode expected by
528 // NVPTXInstPrinter::printCmpMode()
529 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
531  unsigned PTXCmpMode = [](ISD::CondCode CC) {
532  switch (CC) {
533  default:
534  llvm_unreachable("Unexpected condition code.");
535  case ISD::SETOEQ:
536  return CmpMode::EQ;
537  case ISD::SETOGT:
538  return CmpMode::GT;
539  case ISD::SETOGE:
540  return CmpMode::GE;
541  case ISD::SETOLT:
542  return CmpMode::LT;
543  case ISD::SETOLE:
544  return CmpMode::LE;
545  case ISD::SETONE:
546  return CmpMode::NE;
547  case ISD::SETO:
548  return CmpMode::NUM;
549  case ISD::SETUO:
550  return CmpMode::NotANumber;
551  case ISD::SETUEQ:
552  return CmpMode::EQU;
553  case ISD::SETUGT:
554  return CmpMode::GTU;
555  case ISD::SETUGE:
556  return CmpMode::GEU;
557  case ISD::SETULT:
558  return CmpMode::LTU;
559  case ISD::SETULE:
560  return CmpMode::LEU;
561  case ISD::SETUNE:
562  return CmpMode::NEU;
563  case ISD::SETEQ:
564  return CmpMode::EQ;
565  case ISD::SETGT:
566  return CmpMode::GT;
567  case ISD::SETGE:
568  return CmpMode::GE;
569  case ISD::SETLT:
570  return CmpMode::LT;
571  case ISD::SETLE:
572  return CmpMode::LE;
573  case ISD::SETNE:
574  return CmpMode::NE;
575  }
576  }(CondCode.get());
577 
578  if (FTZ)
579  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580 
581  return PTXCmpMode;
582 }
583 
584 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585  unsigned PTXCmpMode =
586  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587  SDLoc DL(N);
588  SDNode *SetP = CurDAG->getMachineNode(
589  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591  ReplaceNode(N, SetP);
592  return true;
593 }
594 
595 // Find all instances of extract_vector_elt that use this v2f16 vector
596 // and coalesce them into a scattering move instruction.
597 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598  SDValue Vector = N->getOperand(0);
599 
600  // We only care about f16x2 as it's the only real vector type we
601  // need to deal with.
602  if (Vector.getSimpleValueType() != MVT::v2f16)
603  return false;
604 
605  // Find and record all uses of this vector that extract element 0 or 1.
607  for (const auto &U : Vector.getNode()->uses()) {
608  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609  continue;
610  if (U->getOperand(0) != Vector)
611  continue;
612  if (const ConstantSDNode *IdxConst =
613  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614  if (IdxConst->getZExtValue() == 0)
615  E0.push_back(U);
616  else if (IdxConst->getZExtValue() == 1)
617  E1.push_back(U);
618  else
619  llvm_unreachable("Invalid vector index.");
620  }
621  }
622 
623  // There's no point scattering f16x2 if we only ever access one
624  // element of it.
625  if (E0.empty() || E1.empty())
626  return false;
627 
628  unsigned Op = NVPTX::SplitF16x2;
629  // If the vector has been BITCAST'ed from i32, we can use original
630  // value directly and avoid register-to-register move.
631  SDValue Source = Vector;
632  if (Vector->getOpcode() == ISD::BITCAST) {
633  Op = NVPTX::SplitI32toF16x2;
634  Source = Vector->getOperand(0);
635  }
636  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637  // into f16,f16 SplitF16x2(V)
638  SDNode *ScatterOp =
639  CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640  for (auto *Node : E0)
641  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642  for (auto *Node : E1)
643  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644 
645  return true;
646 }
647 
648 static unsigned int getCodeAddrSpace(MemSDNode *N) {
649  const Value *Src = N->getMemOperand()->getValue();
650 
651  if (!Src)
653 
654  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
655  switch (PT->getAddressSpace()) {
662  default: break;
663  }
664  }
666 }
667 
669  unsigned CodeAddrSpace, MachineFunction *F) {
670  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671  // space.
672  //
673  // We have two ways of identifying invariant loads: Loads may be explicitly
674  // marked as invariant, or we may infer them to be invariant.
675  //
676  // We currently infer invariance for loads from
677  // - constant global variables, and
678  // - kernel function pointer params that are noalias (i.e. __restrict) and
679  // never written to.
680  //
681  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
682  // not during the SelectionDAG phase).
683  //
684  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
685  // explicitly invariant loads because these are how clang tells us to use ldg
686  // when the user uses a builtin.
687  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
688  return false;
689 
690  if (N->isInvariant())
691  return true;
692 
693  bool IsKernelFn = isKernelFunction(F->getFunction());
694 
695  // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
696  // because the former looks through phi nodes while the latter does not. We
697  // need to look through phi nodes to handle pointer induction variables.
699  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
700  Objs, F->getDataLayout());
701 
702  return all_of(Objs, [&](Value *V) {
703  if (auto *A = dyn_cast<const Argument>(V))
704  return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
705  if (auto *GV = dyn_cast<const GlobalVariable>(V))
706  return GV->isConstant();
707  return false;
708  });
709 }
710 
711 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
712  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
713  switch (IID) {
714  default:
715  return false;
716  case Intrinsic::nvvm_texsurf_handle_internal:
717  SelectTexSurfHandle(N);
718  return true;
719  }
720 }
721 
722 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
723  // Op 0 is the intrinsic ID
724  SDValue Wrapper = N->getOperand(1);
725  SDValue GlobalVal = Wrapper.getOperand(0);
726  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
727  MVT::i64, GlobalVal));
728 }
729 
730 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
731  SDValue Src = N->getOperand(0);
732  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
733  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
734  unsigned DstAddrSpace = CastN->getDestAddressSpace();
735 
736  assert(SrcAddrSpace != DstAddrSpace &&
737  "addrspacecast must be between different address spaces");
738 
739  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
740  // Specific to generic
741  unsigned Opc;
742  switch (SrcAddrSpace) {
743  default: report_fatal_error("Bad address space in addrspacecast");
745  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
746  break;
748  Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
749  break;
750  case ADDRESS_SPACE_CONST:
751  Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
752  break;
753  case ADDRESS_SPACE_LOCAL:
754  Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
755  break;
756  }
758  Src));
759  return;
760  } else {
761  // Generic to specific
762  if (SrcAddrSpace != 0)
763  report_fatal_error("Cannot cast between two non-generic address spaces");
764  unsigned Opc;
765  switch (DstAddrSpace) {
766  default: report_fatal_error("Bad address space in addrspacecast");
768  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
769  : NVPTX::cvta_to_global_yes;
770  break;
772  Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
773  : NVPTX::cvta_to_shared_yes;
774  break;
775  case ADDRESS_SPACE_CONST:
776  Opc =
777  TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
778  break;
779  case ADDRESS_SPACE_LOCAL:
780  Opc =
781  TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
782  break;
783  case ADDRESS_SPACE_PARAM:
784  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
785  : NVPTX::nvvm_ptr_gen_to_param;
786  break;
787  }
789  Src));
790  return;
791  }
792 }
793 
794 // Helper function template to reduce amount of boilerplate code for
795 // opcode selection.
797  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
798  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
799  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
800  switch (VT) {
801  case MVT::i1:
802  case MVT::i8:
803  return Opcode_i8;
804  case MVT::i16:
805  return Opcode_i16;
806  case MVT::i32:
807  return Opcode_i32;
808  case MVT::i64:
809  return Opcode_i64;
810  case MVT::f16:
811  return Opcode_f16;
812  case MVT::v2f16:
813  return Opcode_f16x2;
814  case MVT::f32:
815  return Opcode_f32;
816  case MVT::f64:
817  return Opcode_f64;
818  default:
819  return None;
820  }
821 }
822 
823 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
824  SDLoc dl(N);
825  LoadSDNode *LD = cast<LoadSDNode>(N);
826  EVT LoadedVT = LD->getMemoryVT();
827  SDNode *NVPTXLD = nullptr;
828 
829  // do not support pre/post inc/dec
830  if (LD->isIndexed())
831  return false;
832 
833  if (!LoadedVT.isSimple())
834  return false;
835 
836  // Address Space Setting
837  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
838 
839  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
840  return tryLDGLDU(N);
841  }
842 
843  // Volatile Setting
844  // - .volatile is only availalble for .global and .shared
845  bool isVolatile = LD->isVolatile();
846  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
847  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
848  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
849  isVolatile = false;
850 
851  // Type Setting: fromType + fromTypeWidth
852  //
853  // Sign : ISD::SEXTLOAD
854  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
855  // type is integer
856  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
857  MVT SimpleVT = LoadedVT.getSimpleVT();
858  MVT ScalarVT = SimpleVT.getScalarType();
859  // Read at least 8 bits (predicates are stored as 8-bit values)
860  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
861  unsigned int fromType;
862 
863  // Vector Setting
864  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
865  if (SimpleVT.isVector()) {
866  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
867  // v2f16 is loaded using ld.b32
868  fromTypeWidth = 32;
869  }
870 
871  if ((LD->getExtensionType() == ISD::SEXTLOAD))
873  else if (ScalarVT.isFloatingPoint())
874  // f16 uses .b16 as its storage type.
875  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
877  else
879 
880  // Create the machine instruction DAG
881  SDValue Chain = N->getOperand(0);
882  SDValue N1 = N->getOperand(1);
883  SDValue Addr;
885  Optional<unsigned> Opcode;
887 
888  if (SelectDirectAddr(N1, Addr)) {
889  Opcode = pickOpcodeForVT(
890  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
891  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
892  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
893  if (!Opcode)
894  return false;
895  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
896  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
897  getI32Imm(fromTypeWidth, dl), Addr, Chain };
898  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
899  MVT::Other, Ops);
900  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
901  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
902  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
903  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
904  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
905  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
906  if (!Opcode)
907  return false;
908  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
909  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
910  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
911  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
912  MVT::Other, Ops);
913  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
914  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
915  if (TM.is64Bit())
916  Opcode = pickOpcodeForVT(
917  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
918  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
919  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
920  else
921  Opcode = pickOpcodeForVT(
922  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
923  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
924  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
925  if (!Opcode)
926  return false;
927  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
928  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
929  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
930  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
931  MVT::Other, Ops);
932  } else {
933  if (TM.is64Bit())
934  Opcode = pickOpcodeForVT(
935  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
936  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
937  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
938  NVPTX::LD_f64_areg_64);
939  else
940  Opcode = pickOpcodeForVT(
941  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
942  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
943  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
944  if (!Opcode)
945  return false;
946  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
947  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
948  getI32Imm(fromTypeWidth, dl), N1, Chain };
949  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
950  MVT::Other, Ops);
951  }
952 
953  if (!NVPTXLD)
954  return false;
955 
957  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
958  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
959 
960  ReplaceNode(N, NVPTXLD);
961  return true;
962 }
963 
964 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
965 
966  SDValue Chain = N->getOperand(0);
967  SDValue Op1 = N->getOperand(1);
968  SDValue Addr, Offset, Base;
969  Optional<unsigned> Opcode;
970  SDLoc DL(N);
971  SDNode *LD;
972  MemSDNode *MemSD = cast<MemSDNode>(N);
973  EVT LoadedVT = MemSD->getMemoryVT();
974 
975  if (!LoadedVT.isSimple())
976  return false;
977 
978  // Address Space Setting
979  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
980 
981  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
982  return tryLDGLDU(N);
983  }
984 
985  // Volatile Setting
986  // - .volatile is only availalble for .global and .shared
987  bool IsVolatile = MemSD->isVolatile();
988  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
989  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
990  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
991  IsVolatile = false;
992 
993  // Vector Setting
994  MVT SimpleVT = LoadedVT.getSimpleVT();
995 
996  // Type Setting: fromType + fromTypeWidth
997  //
998  // Sign : ISD::SEXTLOAD
999  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1000  // type is integer
1001  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1002  MVT ScalarVT = SimpleVT.getScalarType();
1003  // Read at least 8 bits (predicates are stored as 8-bit values)
1004  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
1005  unsigned int FromType;
1006  // The last operand holds the original LoadSDNode::getExtensionType() value
1007  unsigned ExtensionType = cast<ConstantSDNode>(
1008  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1009  if (ExtensionType == ISD::SEXTLOAD)
1010  FromType = NVPTX::PTXLdStInstCode::Signed;
1011  else if (ScalarVT.isFloatingPoint())
1012  FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1014  else
1016 
1017  unsigned VecType;
1018 
1019  switch (N->getOpcode()) {
1020  case NVPTXISD::LoadV2:
1022  break;
1023  case NVPTXISD::LoadV4:
1025  break;
1026  default:
1027  return false;
1028  }
1029 
1030  EVT EltVT = N->getValueType(0);
1031 
1032  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1033  // instruction. Instead, we split the vector into v2f16 chunks and
1034  // load them with ld.v4.b32.
1035  if (EltVT == MVT::v2f16) {
1036  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1037  EltVT = MVT::i32;
1039  FromTypeWidth = 32;
1040  }
1041 
1042  if (SelectDirectAddr(Op1, Addr)) {
1043  switch (N->getOpcode()) {
1044  default:
1045  return false;
1046  case NVPTXISD::LoadV2:
1047  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1048  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1049  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1050  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1051  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1052  break;
1053  case NVPTXISD::LoadV4:
1054  Opcode =
1055  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1056  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1057  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1058  NVPTX::LDV_f32_v4_avar, None);
1059  break;
1060  }
1061  if (!Opcode)
1062  return false;
1063  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1064  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1065  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1066  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1067  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1068  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1069  switch (N->getOpcode()) {
1070  default:
1071  return false;
1072  case NVPTXISD::LoadV2:
1073  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1074  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1075  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1076  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1077  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1078  break;
1079  case NVPTXISD::LoadV4:
1080  Opcode =
1081  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1082  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1083  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1084  NVPTX::LDV_f32_v4_asi, None);
1085  break;
1086  }
1087  if (!Opcode)
1088  return false;
1089  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1090  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1091  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1092  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1093  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1094  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1095  if (TM.is64Bit()) {
1096  switch (N->getOpcode()) {
1097  default:
1098  return false;
1099  case NVPTXISD::LoadV2:
1100  Opcode = pickOpcodeForVT(
1101  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1102  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1103  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1104  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1105  NVPTX::LDV_f64_v2_ari_64);
1106  break;
1107  case NVPTXISD::LoadV4:
1108  Opcode = pickOpcodeForVT(
1109  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1110  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1111  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1112  NVPTX::LDV_f32_v4_ari_64, None);
1113  break;
1114  }
1115  } else {
1116  switch (N->getOpcode()) {
1117  default:
1118  return false;
1119  case NVPTXISD::LoadV2:
1120  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1121  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1122  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1123  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1124  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1125  break;
1126  case NVPTXISD::LoadV4:
1127  Opcode =
1128  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1129  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1130  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1131  NVPTX::LDV_f32_v4_ari, None);
1132  break;
1133  }
1134  }
1135  if (!Opcode)
1136  return false;
1137  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1138  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1139  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1140 
1141  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1142  } else {
1143  if (TM.is64Bit()) {
1144  switch (N->getOpcode()) {
1145  default:
1146  return false;
1147  case NVPTXISD::LoadV2:
1148  Opcode = pickOpcodeForVT(
1149  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1150  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1151  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1152  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1153  NVPTX::LDV_f64_v2_areg_64);
1154  break;
1155  case NVPTXISD::LoadV4:
1156  Opcode = pickOpcodeForVT(
1157  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1158  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1159  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1160  NVPTX::LDV_f32_v4_areg_64, None);
1161  break;
1162  }
1163  } else {
1164  switch (N->getOpcode()) {
1165  default:
1166  return false;
1167  case NVPTXISD::LoadV2:
1168  Opcode =
1169  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1170  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1171  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1172  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1173  NVPTX::LDV_f64_v2_areg);
1174  break;
1175  case NVPTXISD::LoadV4:
1176  Opcode = pickOpcodeForVT(
1177  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1178  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1179  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1180  NVPTX::LDV_f32_v4_areg, None);
1181  break;
1182  }
1183  }
1184  if (!Opcode)
1185  return false;
1186  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1187  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1188  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1189  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1190  }
1191 
1193  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1194  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1195 
1196  ReplaceNode(N, LD);
1197  return true;
1198 }
1199 
1200 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1201 
1202  SDValue Chain = N->getOperand(0);
1203  SDValue Op1;
1204  MemSDNode *Mem;
1205  bool IsLDG = true;
1206 
1207  // If this is an LDG intrinsic, the address is the third operand. If its an
1208  // LDG/LDU SD node (from custom vector handling), then its the second operand
1209  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1210  Op1 = N->getOperand(2);
1211  Mem = cast<MemIntrinsicSDNode>(N);
1212  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1213  switch (IID) {
1214  default:
1215  return false;
1216  case Intrinsic::nvvm_ldg_global_f:
1217  case Intrinsic::nvvm_ldg_global_i:
1218  case Intrinsic::nvvm_ldg_global_p:
1219  IsLDG = true;
1220  break;
1221  case Intrinsic::nvvm_ldu_global_f:
1222  case Intrinsic::nvvm_ldu_global_i:
1223  case Intrinsic::nvvm_ldu_global_p:
1224  IsLDG = false;
1225  break;
1226  }
1227  } else {
1228  Op1 = N->getOperand(1);
1229  Mem = cast<MemSDNode>(N);
1230  }
1231 
1232  Optional<unsigned> Opcode;
1233  SDLoc DL(N);
1234  SDNode *LD;
1235  SDValue Base, Offset, Addr;
1236 
1237  EVT EltVT = Mem->getMemoryVT();
1238  unsigned NumElts = 1;
1239  if (EltVT.isVector()) {
1240  NumElts = EltVT.getVectorNumElements();
1241  EltVT = EltVT.getVectorElementType();
1242  // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1243  if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1244  assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1245  EltVT = MVT::v2f16;
1246  NumElts /= 2;
1247  }
1248  }
1249 
1250  // Build the "promoted" result VTList for the load. If we are really loading
1251  // i8s, then the return type will be promoted to i16 since we do not expose
1252  // 8-bit registers in NVPTX.
1253  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1254  SmallVector<EVT, 5> InstVTs;
1255  for (unsigned i = 0; i != NumElts; ++i) {
1256  InstVTs.push_back(NodeVT);
1257  }
1258  InstVTs.push_back(MVT::Other);
1259  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1260 
1261  if (SelectDirectAddr(Op1, Addr)) {
1262  switch (N->getOpcode()) {
1263  default:
1264  return false;
1265  case ISD::LOAD:
1267  if (IsLDG)
1268  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1269  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1270  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1271  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1272  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1273  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1274  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1275  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1276  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1277  else
1278  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1279  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1280  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1281  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1282  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1283  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1284  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1285  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1286  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1287  break;
1288  case NVPTXISD::LoadV2:
1289  case NVPTXISD::LDGV2:
1290  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1291  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1292  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1293  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1294  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1295  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1296  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1297  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1298  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1299  break;
1300  case NVPTXISD::LDUV2:
1301  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1302  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1303  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1304  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1305  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1306  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1307  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1308  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1309  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1310  break;
1311  case NVPTXISD::LoadV4:
1312  case NVPTXISD::LDGV4:
1313  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1314  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1315  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1316  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1317  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1318  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1319  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1320  break;
1321  case NVPTXISD::LDUV4:
1322  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1323  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1324  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1325  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1326  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1327  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1328  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1329  break;
1330  }
1331  if (!Opcode)
1332  return false;
1333  SDValue Ops[] = { Addr, Chain };
1334  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1335  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1336  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1337  if (TM.is64Bit()) {
1338  switch (N->getOpcode()) {
1339  default:
1340  return false;
1341  case ISD::LOAD:
1343  if (IsLDG)
1344  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1346  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1347  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1348  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1349  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1350  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1351  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1352  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1353  else
1354  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1355  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1356  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1357  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1358  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1359  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1360  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1361  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1362  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1363  break;
1364  case NVPTXISD::LoadV2:
1365  case NVPTXISD::LDGV2:
1366  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1367  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1368  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1369  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1370  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1371  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1372  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1373  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1374  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1375  break;
1376  case NVPTXISD::LDUV2:
1377  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1378  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1379  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1380  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1381  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1382  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1383  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1384  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1385  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1386  break;
1387  case NVPTXISD::LoadV4:
1388  case NVPTXISD::LDGV4:
1389  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1390  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1391  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1392  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1393  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1394  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1395  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1396  break;
1397  case NVPTXISD::LDUV4:
1398  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1399  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1400  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1401  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1402  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1403  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1404  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1405  break;
1406  }
1407  } else {
1408  switch (N->getOpcode()) {
1409  default:
1410  return false;
1411  case ISD::LOAD:
1413  if (IsLDG)
1414  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1415  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1416  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1417  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1418  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1419  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1420  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1421  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1422  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1423  else
1424  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1425  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1426  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1427  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1428  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1429  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1430  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1431  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1432  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1433  break;
1434  case NVPTXISD::LoadV2:
1435  case NVPTXISD::LDGV2:
1436  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1437  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1438  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1439  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1440  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1441  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1442  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1443  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1444  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1445  break;
1446  case NVPTXISD::LDUV2:
1447  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1448  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1449  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1450  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1451  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1452  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1453  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1454  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1455  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1456  break;
1457  case NVPTXISD::LoadV4:
1458  case NVPTXISD::LDGV4:
1459  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1460  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1461  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1462  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1463  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1464  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1465  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1466  break;
1467  case NVPTXISD::LDUV4:
1468  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1469  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1470  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1471  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1472  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1473  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1474  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1475  break;
1476  }
1477  }
1478  if (!Opcode)
1479  return false;
1480  SDValue Ops[] = {Base, Offset, Chain};
1481  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1482  } else {
1483  if (TM.is64Bit()) {
1484  switch (N->getOpcode()) {
1485  default:
1486  return false;
1487  case ISD::LOAD:
1489  if (IsLDG)
1490  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1492  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1493  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1494  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1495  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1496  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1497  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1498  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1499  else
1500  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1501  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1502  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1503  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1504  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1505  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1506  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1507  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1508  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1509  break;
1510  case NVPTXISD::LoadV2:
1511  case NVPTXISD::LDGV2:
1512  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1513  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1514  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1515  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1516  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1517  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1518  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1519  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1520  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1521  break;
1522  case NVPTXISD::LDUV2:
1523  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1524  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1525  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1526  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1527  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1528  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1529  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1530  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1531  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1532  break;
1533  case NVPTXISD::LoadV4:
1534  case NVPTXISD::LDGV4:
1535  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1536  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1537  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1538  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1539  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1540  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1541  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1542  break;
1543  case NVPTXISD::LDUV4:
1544  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1545  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1546  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1547  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1548  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1549  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1550  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1551  break;
1552  }
1553  } else {
1554  switch (N->getOpcode()) {
1555  default:
1556  return false;
1557  case ISD::LOAD:
1559  if (IsLDG)
1560  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1561  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1562  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1563  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1564  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1565  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1566  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1567  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1568  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1569  else
1570  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1571  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1572  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1573  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1574  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1575  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1576  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1577  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1578  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1579  break;
1580  case NVPTXISD::LoadV2:
1581  case NVPTXISD::LDGV2:
1582  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1583  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1584  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1585  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1586  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1587  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1588  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1589  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1590  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1591  break;
1592  case NVPTXISD::LDUV2:
1593  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1594  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1595  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1596  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1597  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1598  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1599  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1600  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1601  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1602  break;
1603  case NVPTXISD::LoadV4:
1604  case NVPTXISD::LDGV4:
1605  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1606  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1607  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1608  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1609  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1610  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1611  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1612  break;
1613  case NVPTXISD::LDUV4:
1614  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1615  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1616  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1617  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1618  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1619  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1620  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1621  break;
1622  }
1623  }
1624  if (!Opcode)
1625  return false;
1626  SDValue Ops[] = { Op1, Chain };
1627  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1628  }
1629 
1631  MemRefs0[0] = Mem->getMemOperand();
1632  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1633 
1634  // For automatic generation of LDG (through SelectLoad[Vector], not the
1635  // intrinsics), we may have an extending load like:
1636  //
1637  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1638  //
1639  // In this case, the matching logic above will select a load for the original
1640  // memory type (in this case, i8) and our types will not match (the node needs
1641  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1642  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1643  // CVT instruction. Ptxas should clean up any redundancies here.
1644 
1645  EVT OrigType = N->getValueType(0);
1646  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1647 
1648  if (OrigType != EltVT && LdNode) {
1649  // We have an extending-load. The instruction we selected operates on the
1650  // smaller type, but the SDNode we are replacing has the larger type. We
1651  // need to emit a CVT to make the types match.
1652  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1653  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1654  EltVT.getSimpleVT(), IsSigned);
1655 
1656  // For each output value, apply the manual sign/zero-extension and make sure
1657  // all users of the load go through that CVT.
1658  for (unsigned i = 0; i != NumElts; ++i) {
1659  SDValue Res(LD, i);
1660  SDValue OrigVal(N, i);
1661 
1662  SDNode *CvtNode =
1663  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1665  DL, MVT::i32));
1666  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1667  }
1668  }
1669 
1670  ReplaceNode(N, LD);
1671  return true;
1672 }
1673 
1674 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1675  SDLoc dl(N);
1676  StoreSDNode *ST = cast<StoreSDNode>(N);
1677  EVT StoreVT = ST->getMemoryVT();
1678  SDNode *NVPTXST = nullptr;
1679 
1680  // do not support pre/post inc/dec
1681  if (ST->isIndexed())
1682  return false;
1683 
1684  if (!StoreVT.isSimple())
1685  return false;
1686 
1687  // Address Space Setting
1688  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
1689 
1690  // Volatile Setting
1691  // - .volatile is only availalble for .global and .shared
1692  bool isVolatile = ST->isVolatile();
1693  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1694  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1695  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1696  isVolatile = false;
1697 
1698  // Vector Setting
1699  MVT SimpleVT = StoreVT.getSimpleVT();
1700  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1701 
1702  // Type Setting: toType + toTypeWidth
1703  // - for integer type, always use 'u'
1704  //
1705  MVT ScalarVT = SimpleVT.getScalarType();
1706  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1707  if (SimpleVT.isVector()) {
1708  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1709  // v2f16 is stored using st.b32
1710  toTypeWidth = 32;
1711  }
1712 
1713  unsigned int toType;
1714  if (ScalarVT.isFloatingPoint())
1715  // f16 uses .b16 as its storage type.
1716  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1718  else
1720 
1721  // Create the machine instruction DAG
1722  SDValue Chain = N->getOperand(0);
1723  SDValue N1 = N->getOperand(1);
1724  SDValue N2 = N->getOperand(2);
1725  SDValue Addr;
1726  SDValue Offset, Base;
1727  Optional<unsigned> Opcode;
1729 
1730  if (SelectDirectAddr(N2, Addr)) {
1731  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1732  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1733  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1734  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1735  if (!Opcode)
1736  return false;
1737  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1738  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1739  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1740  Chain };
1741  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1742  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1743  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1744  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1745  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1746  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1747  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1748  if (!Opcode)
1749  return false;
1750  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1751  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1752  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1753  Offset, Chain };
1754  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1755  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1756  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1757  if (TM.is64Bit())
1758  Opcode = pickOpcodeForVT(
1759  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1760  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1761  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1762  else
1763  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1764  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1765  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1766  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1767  if (!Opcode)
1768  return false;
1769 
1770  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1771  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1772  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1773  Offset, Chain };
1774  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1775  } else {
1776  if (TM.is64Bit())
1777  Opcode =
1778  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1779  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1780  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1781  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1782  else
1783  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1784  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1785  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1786  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1787  if (!Opcode)
1788  return false;
1789  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1790  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1791  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1792  Chain };
1793  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1794  }
1795 
1796  if (!NVPTXST)
1797  return false;
1798 
1800  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1801  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1802  ReplaceNode(N, NVPTXST);
1803  return true;
1804 }
1805 
1806 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1807  SDValue Chain = N->getOperand(0);
1808  SDValue Op1 = N->getOperand(1);
1809  SDValue Addr, Offset, Base;
1810  Optional<unsigned> Opcode;
1811  SDLoc DL(N);
1812  SDNode *ST;
1813  EVT EltVT = Op1.getValueType();
1814  MemSDNode *MemSD = cast<MemSDNode>(N);
1815  EVT StoreVT = MemSD->getMemoryVT();
1816 
1817  // Address Space Setting
1818  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1819 
1820  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1821  report_fatal_error("Cannot store to pointer that points to constant "
1822  "memory space");
1823  }
1824 
1825  // Volatile Setting
1826  // - .volatile is only availalble for .global and .shared
1827  bool IsVolatile = MemSD->isVolatile();
1828  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1829  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1830  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1831  IsVolatile = false;
1832 
1833  // Type Setting: toType + toTypeWidth
1834  // - for integer type, always use 'u'
1835  assert(StoreVT.isSimple() && "Store value is not simple");
1836  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1837  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1838  unsigned ToType;
1839  if (ScalarVT.isFloatingPoint())
1840  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1842  else
1844 
1846  SDValue N2;
1847  unsigned VecType;
1848 
1849  switch (N->getOpcode()) {
1850  case NVPTXISD::StoreV2:
1852  StOps.push_back(N->getOperand(1));
1853  StOps.push_back(N->getOperand(2));
1854  N2 = N->getOperand(3);
1855  break;
1856  case NVPTXISD::StoreV4:
1858  StOps.push_back(N->getOperand(1));
1859  StOps.push_back(N->getOperand(2));
1860  StOps.push_back(N->getOperand(3));
1861  StOps.push_back(N->getOperand(4));
1862  N2 = N->getOperand(5);
1863  break;
1864  default:
1865  return false;
1866  }
1867 
1868  // v8f16 is a special case. PTX doesn't have st.v8.f16
1869  // instruction. Instead, we split the vector into v2f16 chunks and
1870  // store them with st.v4.b32.
1871  if (EltVT == MVT::v2f16) {
1872  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1873  EltVT = MVT::i32;
1875  ToTypeWidth = 32;
1876  }
1877 
1878  StOps.push_back(getI32Imm(IsVolatile, DL));
1879  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1880  StOps.push_back(getI32Imm(VecType, DL));
1881  StOps.push_back(getI32Imm(ToType, DL));
1882  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1883 
1884  if (SelectDirectAddr(N2, Addr)) {
1885  switch (N->getOpcode()) {
1886  default:
1887  return false;
1888  case NVPTXISD::StoreV2:
1889  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1890  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1891  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1892  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1893  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1894  break;
1895  case NVPTXISD::StoreV4:
1896  Opcode =
1897  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1898  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1899  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1900  NVPTX::STV_f32_v4_avar, None);
1901  break;
1902  }
1903  StOps.push_back(Addr);
1904  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1905  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1906  switch (N->getOpcode()) {
1907  default:
1908  return false;
1909  case NVPTXISD::StoreV2:
1910  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1911  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1912  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1913  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1914  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1915  break;
1916  case NVPTXISD::StoreV4:
1917  Opcode =
1918  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1919  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1920  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1921  NVPTX::STV_f32_v4_asi, None);
1922  break;
1923  }
1924  StOps.push_back(Base);
1925  StOps.push_back(Offset);
1926  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1927  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1928  if (TM.is64Bit()) {
1929  switch (N->getOpcode()) {
1930  default:
1931  return false;
1932  case NVPTXISD::StoreV2:
1933  Opcode = pickOpcodeForVT(
1934  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1935  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1936  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1937  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1938  NVPTX::STV_f64_v2_ari_64);
1939  break;
1940  case NVPTXISD::StoreV4:
1941  Opcode = pickOpcodeForVT(
1942  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1943  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1944  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1945  NVPTX::STV_f32_v4_ari_64, None);
1946  break;
1947  }
1948  } else {
1949  switch (N->getOpcode()) {
1950  default:
1951  return false;
1952  case NVPTXISD::StoreV2:
1953  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1954  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1955  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1956  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1957  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1958  break;
1959  case NVPTXISD::StoreV4:
1960  Opcode =
1961  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1962  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1963  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1964  NVPTX::STV_f32_v4_ari, None);
1965  break;
1966  }
1967  }
1968  StOps.push_back(Base);
1969  StOps.push_back(Offset);
1970  } else {
1971  if (TM.is64Bit()) {
1972  switch (N->getOpcode()) {
1973  default:
1974  return false;
1975  case NVPTXISD::StoreV2:
1976  Opcode = pickOpcodeForVT(
1977  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1978  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1979  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1980  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1981  NVPTX::STV_f64_v2_areg_64);
1982  break;
1983  case NVPTXISD::StoreV4:
1984  Opcode = pickOpcodeForVT(
1985  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1986  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1987  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1988  NVPTX::STV_f32_v4_areg_64, None);
1989  break;
1990  }
1991  } else {
1992  switch (N->getOpcode()) {
1993  default:
1994  return false;
1995  case NVPTXISD::StoreV2:
1996  Opcode =
1997  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1998  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1999  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2000  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2001  NVPTX::STV_f64_v2_areg);
2002  break;
2003  case NVPTXISD::StoreV4:
2004  Opcode =
2005  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2006  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2007  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2008  NVPTX::STV_f32_v4_areg, None);
2009  break;
2010  }
2011  }
2012  StOps.push_back(N2);
2013  }
2014 
2015  if (!Opcode)
2016  return false;
2017 
2018  StOps.push_back(Chain);
2019 
2020  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2021 
2023  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2024  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2025 
2026  ReplaceNode(N, ST);
2027  return true;
2028 }
2029 
2030 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2031  SDValue Chain = Node->getOperand(0);
2032  SDValue Offset = Node->getOperand(2);
2033  SDValue Flag = Node->getOperand(3);
2034  SDLoc DL(Node);
2035  MemSDNode *Mem = cast<MemSDNode>(Node);
2036 
2037  unsigned VecSize;
2038  switch (Node->getOpcode()) {
2039  default:
2040  return false;
2041  case NVPTXISD::LoadParam:
2042  VecSize = 1;
2043  break;
2044  case NVPTXISD::LoadParamV2:
2045  VecSize = 2;
2046  break;
2047  case NVPTXISD::LoadParamV4:
2048  VecSize = 4;
2049  break;
2050  }
2051 
2052  EVT EltVT = Node->getValueType(0);
2053  EVT MemVT = Mem->getMemoryVT();
2054 
2055  Optional<unsigned> Opcode;
2056 
2057  switch (VecSize) {
2058  default:
2059  return false;
2060  case 1:
2061  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2062  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2063  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2064  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2065  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2066  break;
2067  case 2:
2068  Opcode =
2069  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2070  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2071  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2072  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2073  NVPTX::LoadParamMemV2F64);
2074  break;
2075  case 4:
2076  Opcode = pickOpcodeForVT(
2077  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2078  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2079  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2080  NVPTX::LoadParamMemV4F32, None);
2081  break;
2082  }
2083  if (!Opcode)
2084  return false;
2085 
2086  SDVTList VTs;
2087  if (VecSize == 1) {
2088  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2089  } else if (VecSize == 2) {
2090  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2091  } else {
2092  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2093  VTs = CurDAG->getVTList(EVTs);
2094  }
2095 
2096  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2097 
2099  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2100  Ops.push_back(Chain);
2101  Ops.push_back(Flag);
2102 
2103  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2104  return true;
2105 }
2106 
2107 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2108  SDLoc DL(N);
2109  SDValue Chain = N->getOperand(0);
2110  SDValue Offset = N->getOperand(1);
2111  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2112  MemSDNode *Mem = cast<MemSDNode>(N);
2113 
2114  // How many elements do we have?
2115  unsigned NumElts = 1;
2116  switch (N->getOpcode()) {
2117  default:
2118  return false;
2119  case NVPTXISD::StoreRetval:
2120  NumElts = 1;
2121  break;
2123  NumElts = 2;
2124  break;
2126  NumElts = 4;
2127  break;
2128  }
2129 
2130  // Build vector of operands
2132  for (unsigned i = 0; i < NumElts; ++i)
2133  Ops.push_back(N->getOperand(i + 2));
2134  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2135  Ops.push_back(Chain);
2136 
2137  // Determine target opcode
2138  // If we have an i1, use an 8-bit store. The lowering code in
2139  // NVPTXISelLowering will have already emitted an upcast.
2140  Optional<unsigned> Opcode = 0;
2141  switch (NumElts) {
2142  default:
2143  return false;
2144  case 1:
2145  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2146  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2147  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2148  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2149  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2150  break;
2151  case 2:
2152  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2153  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2154  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2155  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2156  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2157  break;
2158  case 4:
2159  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2160  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2161  NVPTX::StoreRetvalV4I32, None,
2162  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2163  NVPTX::StoreRetvalV4F32, None);
2164  break;
2165  }
2166  if (!Opcode)
2167  return false;
2168 
2169  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2171  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2172  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2173 
2174  ReplaceNode(N, Ret);
2175  return true;
2176 }
2177 
2178 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2179  SDLoc DL(N);
2180  SDValue Chain = N->getOperand(0);
2181  SDValue Param = N->getOperand(1);
2182  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2183  SDValue Offset = N->getOperand(2);
2184  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2185  MemSDNode *Mem = cast<MemSDNode>(N);
2186  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2187 
2188  // How many elements do we have?
2189  unsigned NumElts = 1;
2190  switch (N->getOpcode()) {
2191  default:
2192  return false;
2195  case NVPTXISD::StoreParam:
2196  NumElts = 1;
2197  break;
2199  NumElts = 2;
2200  break;
2202  NumElts = 4;
2203  break;
2204  }
2205 
2206  // Build vector of operands
2208  for (unsigned i = 0; i < NumElts; ++i)
2209  Ops.push_back(N->getOperand(i + 3));
2210  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2211  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2212  Ops.push_back(Chain);
2213  Ops.push_back(Flag);
2214 
2215  // Determine target opcode
2216  // If we have an i1, use an 8-bit store. The lowering code in
2217  // NVPTXISelLowering will have already emitted an upcast.
2218  Optional<unsigned> Opcode = 0;
2219  switch (N->getOpcode()) {
2220  default:
2221  switch (NumElts) {
2222  default:
2223  return false;
2224  case 1:
2225  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2226  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2227  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2228  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2229  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2230  break;
2231  case 2:
2232  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2233  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2234  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2235  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2236  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2237  break;
2238  case 4:
2239  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2240  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2241  NVPTX::StoreParamV4I32, None,
2242  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2243  NVPTX::StoreParamV4F32, None);
2244  break;
2245  }
2246  if (!Opcode)
2247  return false;
2248  break;
2249  // Special case: if we have a sign-extend/zero-extend node, insert the
2250  // conversion instruction first, and use that as the value operand to
2251  // the selected StoreParam node.
2252  case NVPTXISD::StoreParamU32: {
2253  Opcode = NVPTX::StoreParamI32;
2255  MVT::i32);
2256  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2257  MVT::i32, Ops[0], CvtNone);
2258  Ops[0] = SDValue(Cvt, 0);
2259  break;
2260  }
2261  case NVPTXISD::StoreParamS32: {
2262  Opcode = NVPTX::StoreParamI32;
2264  MVT::i32);
2265  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2266  MVT::i32, Ops[0], CvtNone);
2267  Ops[0] = SDValue(Cvt, 0);
2268  break;
2269  }
2270  }
2271 
2273  SDNode *Ret =
2274  CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2276  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2277  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2278 
2279  ReplaceNode(N, Ret);
2280  return true;
2281 }
2282 
2283 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2284  unsigned Opc = 0;
2285 
2286  switch (N->getOpcode()) {
2287  default: return false;
2289  Opc = NVPTX::TEX_1D_F32_S32;
2290  break;
2292  Opc = NVPTX::TEX_1D_F32_F32;
2293  break;
2295  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2296  break;
2298  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2299  break;
2300  case NVPTXISD::Tex1DS32S32:
2301  Opc = NVPTX::TEX_1D_S32_S32;
2302  break;
2304  Opc = NVPTX::TEX_1D_S32_F32;
2305  break;
2307  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2308  break;
2310  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2311  break;
2312  case NVPTXISD::Tex1DU32S32:
2313  Opc = NVPTX::TEX_1D_U32_S32;
2314  break;
2316  Opc = NVPTX::TEX_1D_U32_F32;
2317  break;
2319  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2320  break;
2322  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2323  break;
2325  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2326  break;
2328  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2329  break;
2331  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2332  break;
2334  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2335  break;
2337  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2338  break;
2340  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2341  break;
2343  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2344  break;
2346  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2347  break;
2349  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2350  break;
2352  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2353  break;
2355  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2356  break;
2358  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2359  break;
2361  Opc = NVPTX::TEX_2D_F32_S32;
2362  break;
2364  Opc = NVPTX::TEX_2D_F32_F32;
2365  break;
2367  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2368  break;
2370  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2371  break;
2372  case NVPTXISD::Tex2DS32S32:
2373  Opc = NVPTX::TEX_2D_S32_S32;
2374  break;
2376  Opc = NVPTX::TEX_2D_S32_F32;
2377  break;
2379  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2380  break;
2382  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2383  break;
2384  case NVPTXISD::Tex2DU32S32:
2385  Opc = NVPTX::TEX_2D_U32_S32;
2386  break;
2388  Opc = NVPTX::TEX_2D_U32_F32;
2389  break;
2391  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2392  break;
2394  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2395  break;
2397  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2398  break;
2400  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2401  break;
2403  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2404  break;
2406  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2407  break;
2409  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2410  break;
2412  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2413  break;
2415  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2416  break;
2418  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2419  break;
2421  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2422  break;
2424  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2425  break;
2427  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2428  break;
2430  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2431  break;
2433  Opc = NVPTX::TEX_3D_F32_S32;
2434  break;
2436  Opc = NVPTX::TEX_3D_F32_F32;
2437  break;
2439  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2440  break;
2442  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2443  break;
2444  case NVPTXISD::Tex3DS32S32:
2445  Opc = NVPTX::TEX_3D_S32_S32;
2446  break;
2448  Opc = NVPTX::TEX_3D_S32_F32;
2449  break;
2451  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2452  break;
2454  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2455  break;
2456  case NVPTXISD::Tex3DU32S32:
2457  Opc = NVPTX::TEX_3D_U32_S32;
2458  break;
2460  Opc = NVPTX::TEX_3D_U32_F32;
2461  break;
2463  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2464  break;
2466  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2467  break;
2469  Opc = NVPTX::TEX_CUBE_F32_F32;
2470  break;
2472  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2473  break;
2475  Opc = NVPTX::TEX_CUBE_S32_F32;
2476  break;
2478  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2479  break;
2481  Opc = NVPTX::TEX_CUBE_U32_F32;
2482  break;
2484  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2485  break;
2487  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2488  break;
2490  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2491  break;
2493  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2494  break;
2496  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2497  break;
2499  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2500  break;
2502  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2503  break;
2505  Opc = NVPTX::TLD4_R_2D_F32_F32;
2506  break;
2508  Opc = NVPTX::TLD4_G_2D_F32_F32;
2509  break;
2511  Opc = NVPTX::TLD4_B_2D_F32_F32;
2512  break;
2514  Opc = NVPTX::TLD4_A_2D_F32_F32;
2515  break;
2517  Opc = NVPTX::TLD4_R_2D_S32_F32;
2518  break;
2520  Opc = NVPTX::TLD4_G_2D_S32_F32;
2521  break;
2523  Opc = NVPTX::TLD4_B_2D_S32_F32;
2524  break;
2526  Opc = NVPTX::TLD4_A_2D_S32_F32;
2527  break;
2529  Opc = NVPTX::TLD4_R_2D_U32_F32;
2530  break;
2532  Opc = NVPTX::TLD4_G_2D_U32_F32;
2533  break;
2535  Opc = NVPTX::TLD4_B_2D_U32_F32;
2536  break;
2538  Opc = NVPTX::TLD4_A_2D_U32_F32;
2539  break;
2541  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2542  break;
2544  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2545  break;
2547  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2548  break;
2550  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2551  break;
2553  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2554  break;
2556  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2557  break;
2559  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2560  break;
2562  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2563  break;
2565  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2566  break;
2568  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2569  break;
2571  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2572  break;
2574  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2575  break;
2577  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2578  break;
2580  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2581  break;
2583  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2584  break;
2586  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2587  break;
2589  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2590  break;
2592  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2593  break;
2595  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2596  break;
2598  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2599  break;
2601  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2602  break;
2604  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2605  break;
2607  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2608  break;
2610  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2611  break;
2613  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2614  break;
2616  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2617  break;
2619  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2620  break;
2622  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2623  break;
2625  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2626  break;
2628  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2629  break;
2631  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2632  break;
2634  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2635  break;
2637  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2638  break;
2640  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2641  break;
2643  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2644  break;
2646  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2647  break;
2649  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2650  break;
2652  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2653  break;
2655  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2656  break;
2658  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2659  break;
2661  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2662  break;
2664  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2665  break;
2667  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2668  break;
2670  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2671  break;
2673  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2674  break;
2676  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2677  break;
2679  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2680  break;
2682  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2683  break;
2685  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2686  break;
2688  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2689  break;
2691  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2692  break;
2694  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2695  break;
2697  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2698  break;
2700  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2701  break;
2703  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2704  break;
2706  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2707  break;
2709  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2710  break;
2712  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2713  break;
2715  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2716  break;
2718  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2719  break;
2721  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2722  break;
2724  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2725  break;
2727  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2728  break;
2730  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2731  break;
2733  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2734  break;
2736  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2737  break;
2739  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2740  break;
2742  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2743  break;
2745  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2746  break;
2748  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2749  break;
2751  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2752  break;
2754  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2755  break;
2757  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2758  break;
2760  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2761  break;
2763  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2764  break;
2766  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2767  break;
2769  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2770  break;
2772  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2773  break;
2775  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2776  break;
2778  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2779  break;
2781  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2782  break;
2784  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2785  break;
2787  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2788  break;
2790  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2791  break;
2792  }
2793 
2794  // Copy over operands
2795  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2796  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2797 
2798  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2799  return true;
2800 }
2801 
2802 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2803  unsigned Opc = 0;
2804  switch (N->getOpcode()) {
2805  default: return false;
2807  Opc = NVPTX::SULD_1D_I8_CLAMP;
2808  break;
2810  Opc = NVPTX::SULD_1D_I16_CLAMP;
2811  break;
2813  Opc = NVPTX::SULD_1D_I32_CLAMP;
2814  break;
2816  Opc = NVPTX::SULD_1D_I64_CLAMP;
2817  break;
2819  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2820  break;
2822  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2823  break;
2825  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2826  break;
2828  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2829  break;
2831  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2832  break;
2834  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2835  break;
2837  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2838  break;
2840  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2841  break;
2843  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2844  break;
2846  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2847  break;
2849  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2850  break;
2852  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2853  break;
2855  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2856  break;
2858  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2859  break;
2861  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2862  break;
2864  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2865  break;
2867  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2868  break;
2870  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2871  break;
2873  Opc = NVPTX::SULD_2D_I8_CLAMP;
2874  break;
2876  Opc = NVPTX::SULD_2D_I16_CLAMP;
2877  break;
2879  Opc = NVPTX::SULD_2D_I32_CLAMP;
2880  break;
2882  Opc = NVPTX::SULD_2D_I64_CLAMP;
2883  break;
2885  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2886  break;
2888  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2889  break;
2891  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2892  break;
2894  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2895  break;
2897  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2898  break;
2900  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2901  break;
2903  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2904  break;
2906  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2907  break;
2909  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2910  break;
2912  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2913  break;
2915  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2916  break;
2918  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2919  break;
2921  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2922  break;
2924  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2925  break;
2927  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2928  break;
2930  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2931  break;
2933  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2934  break;
2936  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
2937  break;
2939  Opc = NVPTX::SULD_3D_I8_CLAMP;
2940  break;
2942  Opc = NVPTX::SULD_3D_I16_CLAMP;
2943  break;
2945  Opc = NVPTX::SULD_3D_I32_CLAMP;
2946  break;
2948  Opc = NVPTX::SULD_3D_I64_CLAMP;
2949  break;
2951  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
2952  break;
2954  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
2955  break;
2957  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
2958  break;
2960  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
2961  break;
2963  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
2964  break;
2966  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
2967  break;
2969  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
2970  break;
2972  Opc = NVPTX::SULD_1D_I8_TRAP;
2973  break;
2975  Opc = NVPTX::SULD_1D_I16_TRAP;
2976  break;
2978  Opc = NVPTX::SULD_1D_I32_TRAP;
2979  break;
2981  Opc = NVPTX::SULD_1D_I64_TRAP;
2982  break;
2984  Opc = NVPTX::SULD_1D_V2I8_TRAP;
2985  break;
2987  Opc = NVPTX::SULD_1D_V2I16_TRAP;
2988  break;
2990  Opc = NVPTX::SULD_1D_V2I32_TRAP;
2991  break;
2993  Opc = NVPTX::SULD_1D_V2I64_TRAP;
2994  break;
2996  Opc = NVPTX::SULD_1D_V4I8_TRAP;
2997  break;
2999  Opc = NVPTX::SULD_1D_V4I16_TRAP;
3000  break;
3002  Opc = NVPTX::SULD_1D_V4I32_TRAP;
3003  break;
3005  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3006  break;
3008  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3009  break;
3011  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3012  break;
3014  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3015  break;
3017  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3018  break;
3020  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3021  break;
3023  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3024  break;
3026  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3027  break;
3029  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3030  break;
3032  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3033  break;
3035  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3036  break;
3038  Opc = NVPTX::SULD_2D_I8_TRAP;
3039  break;
3041  Opc = NVPTX::SULD_2D_I16_TRAP;
3042  break;
3044  Opc = NVPTX::SULD_2D_I32_TRAP;
3045  break;
3047  Opc = NVPTX::SULD_2D_I64_TRAP;
3048  break;
3050  Opc = NVPTX::SULD_2D_V2I8_TRAP;
3051  break;
3053  Opc = NVPTX::SULD_2D_V2I16_TRAP;
3054  break;
3056  Opc = NVPTX::SULD_2D_V2I32_TRAP;
3057  break;
3059  Opc = NVPTX::SULD_2D_V2I64_TRAP;
3060  break;
3062  Opc = NVPTX::SULD_2D_V4I8_TRAP;
3063  break;
3065  Opc = NVPTX::SULD_2D_V4I16_TRAP;
3066  break;
3068  Opc = NVPTX::SULD_2D_V4I32_TRAP;
3069  break;
3071  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3072  break;
3074  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3075  break;
3077  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3078  break;
3080  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3081  break;
3083  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3084  break;
3086  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3087  break;
3089  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3090  break;
3092  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3093  break;
3095  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3096  break;
3098  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3099  break;
3101  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3102  break;
3104  Opc = NVPTX::SULD_3D_I8_TRAP;
3105  break;
3107  Opc = NVPTX::SULD_3D_I16_TRAP;
3108  break;
3110  Opc = NVPTX::SULD_3D_I32_TRAP;
3111  break;
3113  Opc = NVPTX::SULD_3D_I64_TRAP;
3114  break;
3116  Opc = NVPTX::SULD_3D_V2I8_TRAP;
3117  break;
3119  Opc = NVPTX::SULD_3D_V2I16_TRAP;
3120  break;
3122  Opc = NVPTX::SULD_3D_V2I32_TRAP;
3123  break;
3125  Opc = NVPTX::SULD_3D_V2I64_TRAP;
3126  break;
3128  Opc = NVPTX::SULD_3D_V4I8_TRAP;
3129  break;
3131  Opc = NVPTX::SULD_3D_V4I16_TRAP;
3132  break;
3134  Opc = NVPTX::SULD_3D_V4I32_TRAP;
3135  break;
3137  Opc = NVPTX::SULD_1D_I8_ZERO;
3138  break;
3140  Opc = NVPTX::SULD_1D_I16_ZERO;
3141  break;
3143  Opc = NVPTX::SULD_1D_I32_ZERO;
3144  break;
3146  Opc = NVPTX::SULD_1D_I64_ZERO;
3147  break;
3149  Opc = NVPTX::SULD_1D_V2I8_ZERO;
3150  break;
3152  Opc = NVPTX::SULD_1D_V2I16_ZERO;
3153  break;
3155  Opc = NVPTX::SULD_1D_V2I32_ZERO;
3156  break;
3158  Opc = NVPTX::SULD_1D_V2I64_ZERO;
3159  break;
3161  Opc = NVPTX::SULD_1D_V4I8_ZERO;
3162  break;
3164  Opc = NVPTX::SULD_1D_V4I16_ZERO;
3165  break;
3167  Opc = NVPTX::SULD_1D_V4I32_ZERO;
3168  break;
3170  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3171  break;
3173  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3174  break;
3176  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3177  break;
3179  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3180  break;
3182  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3183  break;
3185  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3186  break;
3188  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3189  break;
3191  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3192  break;
3194  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3195  break;
3197  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3198  break;
3200  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3201  break;
3203  Opc = NVPTX::SULD_2D_I8_ZERO;
3204  break;
3206  Opc = NVPTX::SULD_2D_I16_ZERO;
3207  break;
3209  Opc = NVPTX::SULD_2D_I32_ZERO;
3210  break;
3212  Opc = NVPTX::SULD_2D_I64_ZERO;
3213  break;
3215  Opc = NVPTX::SULD_2D_V2I8_ZERO;
3216  break;
3218  Opc = NVPTX::SULD_2D_V2I16_ZERO;
3219  break;
3221  Opc = NVPTX::SULD_2D_V2I32_ZERO;
3222  break;
3224  Opc = NVPTX::SULD_2D_V2I64_ZERO;
3225  break;
3227  Opc = NVPTX::SULD_2D_V4I8_ZERO;
3228  break;
3230  Opc = NVPTX::SULD_2D_V4I16_ZERO;
3231  break;
3233  Opc = NVPTX::SULD_2D_V4I32_ZERO;
3234  break;
3236  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3237  break;
3239  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3240  break;
3242  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3243  break;
3245  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3246  break;
3248  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3249  break;
3251  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3252  break;
3254  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3255  break;
3257  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3258  break;
3260  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3261  break;
3263  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3264  break;
3266  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3267  break;
3269  Opc = NVPTX::SULD_3D_I8_ZERO;
3270  break;
3272  Opc = NVPTX::SULD_3D_I16_ZERO;
3273  break;
3275  Opc = NVPTX::SULD_3D_I32_ZERO;
3276  break;
3278  Opc = NVPTX::SULD_3D_I64_ZERO;
3279  break;
3281  Opc = NVPTX::SULD_3D_V2I8_ZERO;
3282  break;
3284  Opc = NVPTX::SULD_3D_V2I16_ZERO;
3285  break;
3287  Opc = NVPTX::SULD_3D_V2I32_ZERO;
3288  break;
3290  Opc = NVPTX::SULD_3D_V2I64_ZERO;
3291  break;
3293  Opc = NVPTX::SULD_3D_V4I8_ZERO;
3294  break;
3296  Opc = NVPTX::SULD_3D_V4I16_ZERO;
3297  break;
3299  Opc = NVPTX::SULD_3D_V4I32_ZERO;
3300  break;
3301  }
3302 
3303  // Copy over operands
3304  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3305  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3306 
3307  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3308  return true;
3309 }
3310 
3311 
3312 /// SelectBFE - Look for instruction sequences that can be made more efficient
3313 /// by using the 'bfe' (bit-field extract) PTX instruction
3314 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3315  SDLoc DL(N);
3316  SDValue LHS = N->getOperand(0);
3317  SDValue RHS = N->getOperand(1);
3318  SDValue Len;
3319  SDValue Start;
3320  SDValue Val;
3321  bool IsSigned = false;
3322 
3323  if (N->getOpcode() == ISD::AND) {
3324  // Canonicalize the operands
3325  // We want 'and %val, %mask'
3326  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3327  std::swap(LHS, RHS);
3328  }
3329 
3331  if (!Mask) {
3332  // We need a constant mask on the RHS of the AND
3333  return false;
3334  }
3335 
3336  // Extract the mask bits
3337  uint64_t MaskVal = Mask->getZExtValue();
3338  if (!isMask_64(MaskVal)) {
3339  // We *could* handle shifted masks here, but doing so would require an
3340  // 'and' operation to fix up the low-order bits so we would trade
3341  // shr+and for bfe+and, which has the same throughput
3342  return false;
3343  }
3344 
3345  // How many bits are in our mask?
3346  uint64_t NumBits = countTrailingOnes(MaskVal);
3347  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3348 
3349  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3350  // We have a 'srl/and' pair, extract the effective start bit and length
3351  Val = LHS.getNode()->getOperand(0);
3352  Start = LHS.getNode()->getOperand(1);
3353  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3354  if (StartConst) {
3355  uint64_t StartVal = StartConst->getZExtValue();
3356  // How many "good" bits do we have left? "good" is defined here as bits
3357  // that exist in the original value, not shifted in.
3358  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3359  if (NumBits > GoodBits) {
3360  // Do not handle the case where bits have been shifted in. In theory
3361  // we could handle this, but the cost is likely higher than just
3362  // emitting the srl/and pair.
3363  return false;
3364  }
3365  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3366  } else {
3367  // Do not handle the case where the shift amount (can be zero if no srl
3368  // was found) is not constant. We could handle this case, but it would
3369  // require run-time logic that would be more expensive than just
3370  // emitting the srl/and pair.
3371  return false;
3372  }
3373  } else {
3374  // Do not handle the case where the LHS of the and is not a shift. While
3375  // it would be trivial to handle this case, it would just transform
3376  // 'and' -> 'bfe', but 'and' has higher-throughput.
3377  return false;
3378  }
3379  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3380  if (LHS->getOpcode() == ISD::AND) {
3381  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3382  if (!ShiftCnst) {
3383  // Shift amount must be constant
3384  return false;
3385  }
3386 
3387  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3388 
3389  SDValue AndLHS = LHS->getOperand(0);
3390  SDValue AndRHS = LHS->getOperand(1);
3391 
3392  // Canonicalize the AND to have the mask on the RHS
3393  if (isa<ConstantSDNode>(AndLHS)) {
3394  std::swap(AndLHS, AndRHS);
3395  }
3396 
3397  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3398  if (!MaskCnst) {
3399  // Mask must be constant
3400  return false;
3401  }
3402 
3403  uint64_t MaskVal = MaskCnst->getZExtValue();
3404  uint64_t NumZeros;
3405  uint64_t NumBits;
3406  if (isMask_64(MaskVal)) {
3407  NumZeros = 0;
3408  // The number of bits in the result bitfield will be the number of
3409  // trailing ones (the AND) minus the number of bits we shift off
3410  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3411  } else if (isShiftedMask_64(MaskVal)) {
3412  NumZeros = countTrailingZeros(MaskVal);
3413  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3414  // The number of bits in the result bitfield will be the number of
3415  // trailing zeros plus the number of set bits in the mask minus the
3416  // number of bits we shift off
3417  NumBits = NumZeros + NumOnes - ShiftAmt;
3418  } else {
3419  // This is not a mask we can handle
3420  return false;
3421  }
3422 
3423  if (ShiftAmt < NumZeros) {
3424  // Handling this case would require extra logic that would make this
3425  // transformation non-profitable
3426  return false;
3427  }
3428 
3429  Val = AndLHS;
3430  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3431  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3432  } else if (LHS->getOpcode() == ISD::SHL) {
3433  // Here, we have a pattern like:
3434  //
3435  // (sra (shl val, NN), MM)
3436  // or
3437  // (srl (shl val, NN), MM)
3438  //
3439  // If MM >= NN, we can efficiently optimize this with bfe
3440  Val = LHS->getOperand(0);
3441 
3442  SDValue ShlRHS = LHS->getOperand(1);
3443  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3444  if (!ShlCnst) {
3445  // Shift amount must be constant
3446  return false;
3447  }
3448  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3449 
3450  SDValue ShrRHS = RHS;
3451  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3452  if (!ShrCnst) {
3453  // Shift amount must be constant
3454  return false;
3455  }
3456  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3457 
3458  // To avoid extra codegen and be profitable, we need Outer >= Inner
3459  if (OuterShiftAmt < InnerShiftAmt) {
3460  return false;
3461  }
3462 
3463  // If the outer shift is more than the type size, we have no bitfield to
3464  // extract (since we also check that the inner shift is <= the outer shift
3465  // then this also implies that the inner shift is < the type size)
3466  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3467  return false;
3468  }
3469 
3470  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3471  MVT::i32);
3472  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3473  DL, MVT::i32);
3474 
3475  if (N->getOpcode() == ISD::SRA) {
3476  // If we have a arithmetic right shift, we need to use the signed bfe
3477  // variant
3478  IsSigned = true;
3479  }
3480  } else {
3481  // No can do...
3482  return false;
3483  }
3484  } else {
3485  // No can do...
3486  return false;
3487  }
3488 
3489 
3490  unsigned Opc;
3491  // For the BFE operations we form here from "and" and "srl", always use the
3492  // unsigned variants.
3493  if (Val.getValueType() == MVT::i32) {
3494  if (IsSigned) {
3495  Opc = NVPTX::BFE_S32rii;
3496  } else {
3497  Opc = NVPTX::BFE_U32rii;
3498  }
3499  } else if (Val.getValueType() == MVT::i64) {
3500  if (IsSigned) {
3501  Opc = NVPTX::BFE_S64rii;
3502  } else {
3503  Opc = NVPTX::BFE_U64rii;
3504  }
3505  } else {
3506  // We cannot handle this type
3507  return false;
3508  }
3509 
3510  SDValue Ops[] = {
3511  Val, Start, Len
3512  };
3513 
3514  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3515  return true;
3516 }
3517 
3518 // SelectDirectAddr - Match a direct address for DAG.
3519 // A direct address could be a globaladdress or externalsymbol.
3520 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3521  // Return true if TGA or ES.
3522  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3524  Address = N;
3525  return true;
3526  }
3527  if (N.getOpcode() == NVPTXISD::Wrapper) {
3528  Address = N.getOperand(0);
3529  return true;
3530  }
3531  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3532  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3533  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3534  CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3535  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3536  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3537  }
3538  return false;
3539 }
3540 
3541 // symbol+offset
3542 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3543  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3544  if (Addr.getOpcode() == ISD::ADD) {
3545  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3546  SDValue base = Addr.getOperand(0);
3547  if (SelectDirectAddr(base, Base)) {
3548  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3549  mvt);
3550  return true;
3551  }
3552  }
3553  }
3554  return false;
3555 }
3556 
3557 // symbol+offset
3558 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3559  SDValue &Base, SDValue &Offset) {
3560  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3561 }
3562 
3563 // symbol+offset
3564 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3565  SDValue &Base, SDValue &Offset) {
3566  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3567 }
3568 
3569 // register+offset
3570 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3571  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3572  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3573  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3574  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3575  return true;
3576  }
3577  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3579  return false; // direct calls.
3580 
3581  if (Addr.getOpcode() == ISD::ADD) {
3582  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3583  return false;
3584  }
3585  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3586  if (FrameIndexSDNode *FIN =
3587  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3588  // Constant offset from frame ref.
3589  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3590  else
3591  Base = Addr.getOperand(0);
3592  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3593  mvt);
3594  return true;
3595  }
3596  }
3597  return false;
3598 }
3599 
3600 // register+offset
3601 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3602  SDValue &Base, SDValue &Offset) {
3603  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3604 }
3605 
3606 // register+offset
3607 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3608  SDValue &Base, SDValue &Offset) {
3609  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3610 }
3611 
3612 // symbol
3613 bool NVPTXDAGToDAGISel::SelectADDRvar(SDNode *OpNode, SDValue Addr,
3614  SDValue &Value) {
3615  return SelectDirectAddr(Addr, Value);
3616 }
3617 
3618 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3619  unsigned int spN) const {
3620  const Value *Src = nullptr;
3621  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3622  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3623  return true;
3624  Src = mN->getMemOperand()->getValue();
3625  }
3626  if (!Src)
3627  return false;
3628  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3629  return (PT->getAddressSpace() == spN);
3630  return false;
3631 }
3632 
3633 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3634 /// inline asm expressions.
3636  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3637  SDValue Op0, Op1;
3638  switch (ConstraintID) {
3639  default:
3640  return true;
3641  case InlineAsm::Constraint_m: // memory
3642  if (SelectDirectAddr(Op, Op0)) {
3643  OutOps.push_back(Op0);
3644  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3645  return false;
3646  }
3647  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3648  OutOps.push_back(Op0);
3649  OutOps.push_back(Op1);
3650  return false;
3651  }
3652  break;
3653  }
3654  return true;
3655 }
3656 
3657 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3658 /// conversion from \p SrcTy to \p DestTy.
3659 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3660  bool IsSigned) {
3661  switch (SrcTy.SimpleTy) {
3662  default:
3663  llvm_unreachable("Unhandled source type");
3664  case MVT::i8:
3665  switch (DestTy.SimpleTy) {
3666  default:
3667  llvm_unreachable("Unhandled dest type");
3668  case MVT::i16:
3669  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3670  case MVT::i32:
3671  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3672  case MVT::i64:
3673  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3674  }
3675  case MVT::i16:
3676  switch (DestTy.SimpleTy) {
3677  default:
3678  llvm_unreachable("Unhandled dest type");
3679  case MVT::i8:
3680  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3681  case MVT::i32:
3682  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3683  case MVT::i64:
3684  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3685  }
3686  case MVT::i32:
3687  switch (DestTy.SimpleTy) {
3688  default:
3689  llvm_unreachable("Unhandled dest type");
3690  case MVT::i8:
3691  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3692  case MVT::i16:
3693  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3694  case MVT::i64:
3695  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3696  }
3697  case MVT::i64:
3698  switch (DestTy.SimpleTy) {
3699  default:
3700  llvm_unreachable("Unhandled dest type");
3701  case MVT::i8:
3702  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3703  case MVT::i16:
3704  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3705  case MVT::i32:
3706  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3707  }
3708  }
3709 }
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:546
bool isInvariant() const
EVT getValueType() const
Return the ValueType of the referenced return value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool isVector() const
Return true if this is a vector value type.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool useF32FTZ(const MachineFunction &MF) const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:846
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const NVPTXSubtarget * Subtarget
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
MachineFunction * MF
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:613
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:179
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:918
op_iterator op_begin() const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
#define EQ(a, b)
Definition: regexec.c:112
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:561
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:593
unsigned getSrcAddressSpace() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
CodeGenOpt::Level OptLevel
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
const SDValue & getOperand(unsigned Num) const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getDestAddressSpace() const
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Extended Value Type.
Definition: ValueTypes.h:34
bool allowUnsafeFPMath(MachineFunction &MF) const
bool isVolatile() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:550
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
EVT getMemoryVT() const
Return the type of the in-memory value.
iterator_range< use_iterator > uses()
NVPTXTargetMachine.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:415
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
#define N
ISD::CondCode get() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool isVolatile(Instruction *Inst)
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
This class is used to represent ISD::LOAD nodes.