LLVM  7.0.0svn
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
20 #include "llvm/Support/Debug.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 /// createNVPTXISelDag - This pass converts a legalized DAG into a
30 /// NVPTX-specific DAG, ready for instruction scheduling.
32  llvm::CodeGenOpt::Level OptLevel) {
33  return new NVPTXDAGToDAGISel(TM, OptLevel);
34 }
35 
37  CodeGenOpt::Level OptLevel)
38  : SelectionDAGISel(tm, OptLevel), TM(tm) {
39  doMulWide = (OptLevel > 0);
40 }
41 
43  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45 }
46 
47 int NVPTXDAGToDAGISel::getDivF32Level() const {
49 }
50 
51 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
53 }
54 
55 bool NVPTXDAGToDAGISel::useF32FTZ() const {
57 }
58 
59 bool NVPTXDAGToDAGISel::allowFMA() const {
61  return TL->allowFMA(*MF, OptLevel);
62 }
63 
64 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66  return TL->allowUnsafeFPMath(*MF);
67 }
68 
69 bool NVPTXDAGToDAGISel::useShortPointers() const {
70  return TM.useShortPointers();
71 }
72 
73 /// Select - Select instructions not customized! Used for
74 /// expanded, promoted and normal instructions.
75 void NVPTXDAGToDAGISel::Select(SDNode *N) {
76 
77  if (N->isMachineOpcode()) {
78  N->setNodeId(-1);
79  return; // Already selected.
80  }
81 
82  switch (N->getOpcode()) {
83  case ISD::LOAD:
84  if (tryLoad(N))
85  return;
86  break;
87  case ISD::STORE:
88  if (tryStore(N))
89  return;
90  break;
92  if (tryEXTRACT_VECTOR_ELEMENT(N))
93  return;
94  break;
96  SelectSETP_F16X2(N);
97  return;
98 
99  case NVPTXISD::LoadV2:
100  case NVPTXISD::LoadV4:
101  if (tryLoadVector(N))
102  return;
103  break;
104  case NVPTXISD::LDGV2:
105  case NVPTXISD::LDGV4:
106  case NVPTXISD::LDUV2:
107  case NVPTXISD::LDUV4:
108  if (tryLDGLDU(N))
109  return;
110  break;
111  case NVPTXISD::StoreV2:
112  case NVPTXISD::StoreV4:
113  if (tryStoreVector(N))
114  return;
115  break;
116  case NVPTXISD::LoadParam:
119  if (tryLoadParam(N))
120  return;
121  break;
125  if (tryStoreRetval(N))
126  return;
127  break;
133  if (tryStoreParam(N))
134  return;
135  break;
137  if (tryIntrinsicNoChain(N))
138  return;
139  break;
141  if (tryIntrinsicChain(N))
142  return;
143  break;
312  if (tryTextureIntrinsic(N))
313  return;
314  break;
480  if (trySurfaceIntrinsic(N))
481  return;
482  break;
483  case ISD::AND:
484  case ISD::SRA:
485  case ISD::SRL:
486  // Try to select BFE
487  if (tryBFE(N))
488  return;
489  break;
490  case ISD::ADDRSPACECAST:
491  SelectAddrSpaceCast(N);
492  return;
493  case ISD::ConstantFP:
494  if (tryConstantFP16(N))
495  return;
496  break;
497  default:
498  break;
499  }
500  SelectCode(N);
501 }
502 
503 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
504  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
505  switch (IID) {
506  default:
507  return false;
508  case Intrinsic::nvvm_ldg_global_f:
509  case Intrinsic::nvvm_ldg_global_i:
510  case Intrinsic::nvvm_ldg_global_p:
511  case Intrinsic::nvvm_ldu_global_f:
512  case Intrinsic::nvvm_ldu_global_i:
513  case Intrinsic::nvvm_ldu_global_p:
514  return tryLDGLDU(N);
515  }
516 }
517 
518 // There's no way to specify FP16 immediates in .f16 ops, so we have to
519 // load them into an .f16 register first.
520 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
521  if (N->getValueType(0) != MVT::f16)
522  return false;
524  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
525  SDNode *LoadConstF16 =
526  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
527  ReplaceNode(N, LoadConstF16);
528  return true;
529 }
530 
531 // Map ISD:CONDCODE value to appropriate CmpMode expected by
532 // NVPTXInstPrinter::printCmpMode()
533 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
535  unsigned PTXCmpMode = [](ISD::CondCode CC) {
536  switch (CC) {
537  default:
538  llvm_unreachable("Unexpected condition code.");
539  case ISD::SETOEQ:
540  return CmpMode::EQ;
541  case ISD::SETOGT:
542  return CmpMode::GT;
543  case ISD::SETOGE:
544  return CmpMode::GE;
545  case ISD::SETOLT:
546  return CmpMode::LT;
547  case ISD::SETOLE:
548  return CmpMode::LE;
549  case ISD::SETONE:
550  return CmpMode::NE;
551  case ISD::SETO:
552  return CmpMode::NUM;
553  case ISD::SETUO:
554  return CmpMode::NotANumber;
555  case ISD::SETUEQ:
556  return CmpMode::EQU;
557  case ISD::SETUGT:
558  return CmpMode::GTU;
559  case ISD::SETUGE:
560  return CmpMode::GEU;
561  case ISD::SETULT:
562  return CmpMode::LTU;
563  case ISD::SETULE:
564  return CmpMode::LEU;
565  case ISD::SETUNE:
566  return CmpMode::NEU;
567  case ISD::SETEQ:
568  return CmpMode::EQ;
569  case ISD::SETGT:
570  return CmpMode::GT;
571  case ISD::SETGE:
572  return CmpMode::GE;
573  case ISD::SETLT:
574  return CmpMode::LT;
575  case ISD::SETLE:
576  return CmpMode::LE;
577  case ISD::SETNE:
578  return CmpMode::NE;
579  }
580  }(CondCode.get());
581 
582  if (FTZ)
583  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
584 
585  return PTXCmpMode;
586 }
587 
588 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
589  unsigned PTXCmpMode =
590  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
591  SDLoc DL(N);
592  SDNode *SetP = CurDAG->getMachineNode(
593  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
594  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
595  ReplaceNode(N, SetP);
596  return true;
597 }
598 
599 // Find all instances of extract_vector_elt that use this v2f16 vector
600 // and coalesce them into a scattering move instruction.
601 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
602  SDValue Vector = N->getOperand(0);
603 
604  // We only care about f16x2 as it's the only real vector type we
605  // need to deal with.
606  if (Vector.getSimpleValueType() != MVT::v2f16)
607  return false;
608 
609  // Find and record all uses of this vector that extract element 0 or 1.
611  for (const auto &U : Vector.getNode()->uses()) {
612  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
613  continue;
614  if (U->getOperand(0) != Vector)
615  continue;
616  if (const ConstantSDNode *IdxConst =
617  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
618  if (IdxConst->getZExtValue() == 0)
619  E0.push_back(U);
620  else if (IdxConst->getZExtValue() == 1)
621  E1.push_back(U);
622  else
623  llvm_unreachable("Invalid vector index.");
624  }
625  }
626 
627  // There's no point scattering f16x2 if we only ever access one
628  // element of it.
629  if (E0.empty() || E1.empty())
630  return false;
631 
632  unsigned Op = NVPTX::SplitF16x2;
633  // If the vector has been BITCAST'ed from i32, we can use original
634  // value directly and avoid register-to-register move.
635  SDValue Source = Vector;
636  if (Vector->getOpcode() == ISD::BITCAST) {
637  Op = NVPTX::SplitI32toF16x2;
638  Source = Vector->getOperand(0);
639  }
640  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
641  // into f16,f16 SplitF16x2(V)
642  SDNode *ScatterOp =
643  CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
644  for (auto *Node : E0)
645  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
646  for (auto *Node : E1)
647  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
648 
649  return true;
650 }
651 
652 static unsigned int getCodeAddrSpace(MemSDNode *N) {
653  const Value *Src = N->getMemOperand()->getValue();
654 
655  if (!Src)
657 
658  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
659  switch (PT->getAddressSpace()) {
666  default: break;
667  }
668  }
670 }
671 
673  unsigned CodeAddrSpace, MachineFunction *F) {
674  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
675  // space.
676  //
677  // We have two ways of identifying invariant loads: Loads may be explicitly
678  // marked as invariant, or we may infer them to be invariant.
679  //
680  // We currently infer invariance for loads from
681  // - constant global variables, and
682  // - kernel function pointer params that are noalias (i.e. __restrict) and
683  // never written to.
684  //
685  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
686  // not during the SelectionDAG phase).
687  //
688  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
689  // explicitly invariant loads because these are how clang tells us to use ldg
690  // when the user uses a builtin.
691  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
692  return false;
693 
694  if (N->isInvariant())
695  return true;
696 
697  bool IsKernelFn = isKernelFunction(F->getFunction());
698 
699  // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
700  // because the former looks through phi nodes while the latter does not. We
701  // need to look through phi nodes to handle pointer induction variables.
703  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
704  Objs, F->getDataLayout());
705 
706  return all_of(Objs, [&](Value *V) {
707  if (auto *A = dyn_cast<const Argument>(V))
708  return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
709  if (auto *GV = dyn_cast<const GlobalVariable>(V))
710  return GV->isConstant();
711  return false;
712  });
713 }
714 
715 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
716  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
717  switch (IID) {
718  default:
719  return false;
720  case Intrinsic::nvvm_texsurf_handle_internal:
721  SelectTexSurfHandle(N);
722  return true;
723  }
724 }
725 
726 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
727  // Op 0 is the intrinsic ID
728  SDValue Wrapper = N->getOperand(1);
729  SDValue GlobalVal = Wrapper.getOperand(0);
730  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
731  MVT::i64, GlobalVal));
732 }
733 
734 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
735  SDValue Src = N->getOperand(0);
736  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
737  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
738  unsigned DstAddrSpace = CastN->getDestAddressSpace();
739  assert(SrcAddrSpace != DstAddrSpace &&
740  "addrspacecast must be between different address spaces");
741 
742  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
743  // Specific to generic
744  unsigned Opc;
745  switch (SrcAddrSpace) {
746  default: report_fatal_error("Bad address space in addrspacecast");
748  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
749  break;
751  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
752  : NVPTX::cvta_shared_yes_64)
753  : NVPTX::cvta_shared_yes;
754  break;
755  case ADDRESS_SPACE_CONST:
756  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
757  : NVPTX::cvta_const_yes_64)
758  : NVPTX::cvta_const_yes;
759  break;
760  case ADDRESS_SPACE_LOCAL:
761  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
762  : NVPTX::cvta_local_yes_64)
763  : NVPTX::cvta_local_yes;
764  break;
765  }
767  Src));
768  return;
769  } else {
770  // Generic to specific
771  if (SrcAddrSpace != 0)
772  report_fatal_error("Cannot cast between two non-generic address spaces");
773  unsigned Opc;
774  switch (DstAddrSpace) {
775  default: report_fatal_error("Bad address space in addrspacecast");
777  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
778  : NVPTX::cvta_to_global_yes;
779  break;
781  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
782  : NVPTX::cvta_to_shared_yes_64)
783  : NVPTX::cvta_to_shared_yes;
784  break;
785  case ADDRESS_SPACE_CONST:
786  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
787  : NVPTX::cvta_to_const_yes_64)
788  : NVPTX::cvta_to_const_yes;
789  break;
790  case ADDRESS_SPACE_LOCAL:
791  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
792  : NVPTX::cvta_to_local_yes_64)
793  : NVPTX::cvta_to_local_yes;
794  break;
795  case ADDRESS_SPACE_PARAM:
796  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
797  : NVPTX::nvvm_ptr_gen_to_param;
798  break;
799  }
801  Src));
802  return;
803  }
804 }
805 
806 // Helper function template to reduce amount of boilerplate code for
807 // opcode selection.
809  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
810  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
811  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
812  switch (VT) {
813  case MVT::i1:
814  case MVT::i8:
815  return Opcode_i8;
816  case MVT::i16:
817  return Opcode_i16;
818  case MVT::i32:
819  return Opcode_i32;
820  case MVT::i64:
821  return Opcode_i64;
822  case MVT::f16:
823  return Opcode_f16;
824  case MVT::v2f16:
825  return Opcode_f16x2;
826  case MVT::f32:
827  return Opcode_f32;
828  case MVT::f64:
829  return Opcode_f64;
830  default:
831  return None;
832  }
833 }
834 
835 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
836  SDLoc dl(N);
837  LoadSDNode *LD = cast<LoadSDNode>(N);
838  EVT LoadedVT = LD->getMemoryVT();
839  SDNode *NVPTXLD = nullptr;
840 
841  // do not support pre/post inc/dec
842  if (LD->isIndexed())
843  return false;
844 
845  if (!LoadedVT.isSimple())
846  return false;
847 
848  // Address Space Setting
849  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
850  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
851  return tryLDGLDU(N);
852  }
853 
854  unsigned int PointerSize =
856 
857  // Volatile Setting
858  // - .volatile is only availalble for .global and .shared
859  bool isVolatile = LD->isVolatile();
860  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
861  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
862  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
863  isVolatile = false;
864 
865  // Type Setting: fromType + fromTypeWidth
866  //
867  // Sign : ISD::SEXTLOAD
868  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
869  // type is integer
870  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
871  MVT SimpleVT = LoadedVT.getSimpleVT();
872  MVT ScalarVT = SimpleVT.getScalarType();
873  // Read at least 8 bits (predicates are stored as 8-bit values)
874  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
875  unsigned int fromType;
876 
877  // Vector Setting
878  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
879  if (SimpleVT.isVector()) {
880  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
881  // v2f16 is loaded using ld.b32
882  fromTypeWidth = 32;
883  }
884 
885  if ((LD->getExtensionType() == ISD::SEXTLOAD))
887  else if (ScalarVT.isFloatingPoint())
888  // f16 uses .b16 as its storage type.
889  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
891  else
893 
894  // Create the machine instruction DAG
895  SDValue Chain = N->getOperand(0);
896  SDValue N1 = N->getOperand(1);
897  SDValue Addr;
899  Optional<unsigned> Opcode;
901 
902  if (SelectDirectAddr(N1, Addr)) {
903  Opcode = pickOpcodeForVT(
904  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
905  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
906  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
907  if (!Opcode)
908  return false;
909  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
910  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
911  getI32Imm(fromTypeWidth, dl), Addr, Chain };
912  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
913  MVT::Other, Ops);
914  } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
915  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
916  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
917  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
918  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
919  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
920  if (!Opcode)
921  return false;
922  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
923  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
924  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
925  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
926  MVT::Other, Ops);
927  } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
928  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
929  if (PointerSize == 64)
930  Opcode = pickOpcodeForVT(
931  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
932  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
933  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
934  else
935  Opcode = pickOpcodeForVT(
936  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
937  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
938  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
939  if (!Opcode)
940  return false;
941  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
942  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
943  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
944  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
945  MVT::Other, Ops);
946  } else {
947  if (PointerSize == 64)
948  Opcode = pickOpcodeForVT(
949  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
950  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
951  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
952  NVPTX::LD_f64_areg_64);
953  else
954  Opcode = pickOpcodeForVT(
955  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
956  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
957  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
958  if (!Opcode)
959  return false;
960  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
961  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
962  getI32Imm(fromTypeWidth, dl), N1, Chain };
963  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
964  MVT::Other, Ops);
965  }
966 
967  if (!NVPTXLD)
968  return false;
969 
971  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
972  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
973 
974  ReplaceNode(N, NVPTXLD);
975  return true;
976 }
977 
978 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
979 
980  SDValue Chain = N->getOperand(0);
981  SDValue Op1 = N->getOperand(1);
982  SDValue Addr, Offset, Base;
983  Optional<unsigned> Opcode;
984  SDLoc DL(N);
985  SDNode *LD;
986  MemSDNode *MemSD = cast<MemSDNode>(N);
987  EVT LoadedVT = MemSD->getMemoryVT();
988 
989  if (!LoadedVT.isSimple())
990  return false;
991 
992  // Address Space Setting
993  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
994  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
995  return tryLDGLDU(N);
996  }
997 
998  unsigned int PointerSize =
1000 
1001  // Volatile Setting
1002  // - .volatile is only availalble for .global and .shared
1003  bool IsVolatile = MemSD->isVolatile();
1004  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1005  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1006  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1007  IsVolatile = false;
1008 
1009  // Vector Setting
1010  MVT SimpleVT = LoadedVT.getSimpleVT();
1011 
1012  // Type Setting: fromType + fromTypeWidth
1013  //
1014  // Sign : ISD::SEXTLOAD
1015  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1016  // type is integer
1017  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1018  MVT ScalarVT = SimpleVT.getScalarType();
1019  // Read at least 8 bits (predicates are stored as 8-bit values)
1020  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
1021  unsigned int FromType;
1022  // The last operand holds the original LoadSDNode::getExtensionType() value
1023  unsigned ExtensionType = cast<ConstantSDNode>(
1024  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1025  if (ExtensionType == ISD::SEXTLOAD)
1026  FromType = NVPTX::PTXLdStInstCode::Signed;
1027  else if (ScalarVT.isFloatingPoint())
1028  FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1030  else
1032 
1033  unsigned VecType;
1034 
1035  switch (N->getOpcode()) {
1036  case NVPTXISD::LoadV2:
1038  break;
1039  case NVPTXISD::LoadV4:
1041  break;
1042  default:
1043  return false;
1044  }
1045 
1046  EVT EltVT = N->getValueType(0);
1047 
1048  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1049  // instruction. Instead, we split the vector into v2f16 chunks and
1050  // load them with ld.v4.b32.
1051  if (EltVT == MVT::v2f16) {
1052  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1053  EltVT = MVT::i32;
1055  FromTypeWidth = 32;
1056  }
1057 
1058  if (SelectDirectAddr(Op1, Addr)) {
1059  switch (N->getOpcode()) {
1060  default:
1061  return false;
1062  case NVPTXISD::LoadV2:
1063  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1064  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1065  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1066  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1067  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1068  break;
1069  case NVPTXISD::LoadV4:
1070  Opcode =
1071  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1072  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1073  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1074  NVPTX::LDV_f32_v4_avar, None);
1075  break;
1076  }
1077  if (!Opcode)
1078  return false;
1079  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1080  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1081  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1082  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1083  } else if (PointerSize == 64
1084  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1085  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1086  switch (N->getOpcode()) {
1087  default:
1088  return false;
1089  case NVPTXISD::LoadV2:
1090  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1091  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1092  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1093  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1094  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1095  break;
1096  case NVPTXISD::LoadV4:
1097  Opcode =
1098  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1099  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1100  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1101  NVPTX::LDV_f32_v4_asi, None);
1102  break;
1103  }
1104  if (!Opcode)
1105  return false;
1106  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1107  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1108  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1109  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1110  } else if (PointerSize == 64
1111  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1112  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1113  if (PointerSize == 64) {
1114  switch (N->getOpcode()) {
1115  default:
1116  return false;
1117  case NVPTXISD::LoadV2:
1118  Opcode = pickOpcodeForVT(
1119  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1120  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1121  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1122  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1123  NVPTX::LDV_f64_v2_ari_64);
1124  break;
1125  case NVPTXISD::LoadV4:
1126  Opcode = pickOpcodeForVT(
1127  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1128  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1129  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1130  NVPTX::LDV_f32_v4_ari_64, None);
1131  break;
1132  }
1133  } else {
1134  switch (N->getOpcode()) {
1135  default:
1136  return false;
1137  case NVPTXISD::LoadV2:
1138  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1139  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1140  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1141  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1142  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1143  break;
1144  case NVPTXISD::LoadV4:
1145  Opcode =
1146  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1147  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1148  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1149  NVPTX::LDV_f32_v4_ari, None);
1150  break;
1151  }
1152  }
1153  if (!Opcode)
1154  return false;
1155  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1156  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1157  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1158 
1159  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1160  } else {
1161  if (PointerSize == 64) {
1162  switch (N->getOpcode()) {
1163  default:
1164  return false;
1165  case NVPTXISD::LoadV2:
1166  Opcode = pickOpcodeForVT(
1167  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1168  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1169  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1170  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1171  NVPTX::LDV_f64_v2_areg_64);
1172  break;
1173  case NVPTXISD::LoadV4:
1174  Opcode = pickOpcodeForVT(
1175  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1176  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1177  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1178  NVPTX::LDV_f32_v4_areg_64, None);
1179  break;
1180  }
1181  } else {
1182  switch (N->getOpcode()) {
1183  default:
1184  return false;
1185  case NVPTXISD::LoadV2:
1186  Opcode =
1187  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1188  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1189  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1190  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1191  NVPTX::LDV_f64_v2_areg);
1192  break;
1193  case NVPTXISD::LoadV4:
1194  Opcode = pickOpcodeForVT(
1195  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1196  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1197  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1198  NVPTX::LDV_f32_v4_areg, None);
1199  break;
1200  }
1201  }
1202  if (!Opcode)
1203  return false;
1204  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1205  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1206  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1207  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1208  }
1209 
1211  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1212  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1213 
1214  ReplaceNode(N, LD);
1215  return true;
1216 }
1217 
1218 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1219 
1220  SDValue Chain = N->getOperand(0);
1221  SDValue Op1;
1222  MemSDNode *Mem;
1223  bool IsLDG = true;
1224 
1225  // If this is an LDG intrinsic, the address is the third operand. If its an
1226  // LDG/LDU SD node (from custom vector handling), then its the second operand
1227  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1228  Op1 = N->getOperand(2);
1229  Mem = cast<MemIntrinsicSDNode>(N);
1230  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1231  switch (IID) {
1232  default:
1233  return false;
1234  case Intrinsic::nvvm_ldg_global_f:
1235  case Intrinsic::nvvm_ldg_global_i:
1236  case Intrinsic::nvvm_ldg_global_p:
1237  IsLDG = true;
1238  break;
1239  case Intrinsic::nvvm_ldu_global_f:
1240  case Intrinsic::nvvm_ldu_global_i:
1241  case Intrinsic::nvvm_ldu_global_p:
1242  IsLDG = false;
1243  break;
1244  }
1245  } else {
1246  Op1 = N->getOperand(1);
1247  Mem = cast<MemSDNode>(N);
1248  }
1249 
1250  Optional<unsigned> Opcode;
1251  SDLoc DL(N);
1252  SDNode *LD;
1253  SDValue Base, Offset, Addr;
1254 
1255  EVT EltVT = Mem->getMemoryVT();
1256  unsigned NumElts = 1;
1257  if (EltVT.isVector()) {
1258  NumElts = EltVT.getVectorNumElements();
1259  EltVT = EltVT.getVectorElementType();
1260  // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1261  if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1262  assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1263  EltVT = MVT::v2f16;
1264  NumElts /= 2;
1265  }
1266  }
1267 
1268  // Build the "promoted" result VTList for the load. If we are really loading
1269  // i8s, then the return type will be promoted to i16 since we do not expose
1270  // 8-bit registers in NVPTX.
1271  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1272  SmallVector<EVT, 5> InstVTs;
1273  for (unsigned i = 0; i != NumElts; ++i) {
1274  InstVTs.push_back(NodeVT);
1275  }
1276  InstVTs.push_back(MVT::Other);
1277  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1278 
1279  if (SelectDirectAddr(Op1, Addr)) {
1280  switch (N->getOpcode()) {
1281  default:
1282  return false;
1283  case ISD::LOAD:
1285  if (IsLDG)
1286  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1287  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1288  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1289  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1290  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1291  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1292  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1293  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1294  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1295  else
1296  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1297  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1298  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1299  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1300  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1301  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1302  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1303  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1304  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1305  break;
1306  case NVPTXISD::LoadV2:
1307  case NVPTXISD::LDGV2:
1308  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1309  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1310  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1311  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1312  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1313  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1314  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1315  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1316  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1317  break;
1318  case NVPTXISD::LDUV2:
1319  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1320  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1321  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1322  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1323  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1324  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1325  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1326  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1327  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1328  break;
1329  case NVPTXISD::LoadV4:
1330  case NVPTXISD::LDGV4:
1331  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1332  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1333  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1334  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1335  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1336  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1337  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1338  break;
1339  case NVPTXISD::LDUV4:
1340  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1341  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1342  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1343  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1344  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1345  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1346  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1347  break;
1348  }
1349  if (!Opcode)
1350  return false;
1351  SDValue Ops[] = { Addr, Chain };
1352  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1353  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1354  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1355  if (TM.is64Bit()) {
1356  switch (N->getOpcode()) {
1357  default:
1358  return false;
1359  case ISD::LOAD:
1361  if (IsLDG)
1362  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1363  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1364  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1365  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1366  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1367  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1368  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1369  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1370  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1371  else
1372  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1373  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1374  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1375  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1376  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1377  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1378  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1379  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1380  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1381  break;
1382  case NVPTXISD::LoadV2:
1383  case NVPTXISD::LDGV2:
1384  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1385  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1386  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1387  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1388  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1389  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1390  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1391  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1392  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1393  break;
1394  case NVPTXISD::LDUV2:
1395  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1396  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1397  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1398  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1399  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1400  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1401  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1402  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1403  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1404  break;
1405  case NVPTXISD::LoadV4:
1406  case NVPTXISD::LDGV4:
1407  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1408  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1409  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1410  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1411  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1412  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1413  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1414  break;
1415  case NVPTXISD::LDUV4:
1416  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1417  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1418  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1419  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1420  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1421  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1422  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1423  break;
1424  }
1425  } else {
1426  switch (N->getOpcode()) {
1427  default:
1428  return false;
1429  case ISD::LOAD:
1431  if (IsLDG)
1432  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1433  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1434  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1435  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1436  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1437  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1438  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1439  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1440  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1441  else
1442  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1443  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1444  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1445  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1446  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1447  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1448  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1449  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1450  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1451  break;
1452  case NVPTXISD::LoadV2:
1453  case NVPTXISD::LDGV2:
1454  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1456  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1457  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1458  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1459  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1460  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1461  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1462  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1463  break;
1464  case NVPTXISD::LDUV2:
1465  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1466  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1467  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1468  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1469  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1470  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1471  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1472  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1473  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1474  break;
1475  case NVPTXISD::LoadV4:
1476  case NVPTXISD::LDGV4:
1477  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1478  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1479  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1480  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1481  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1482  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1483  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1484  break;
1485  case NVPTXISD::LDUV4:
1486  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1487  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1488  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1489  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1490  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1491  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1492  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1493  break;
1494  }
1495  }
1496  if (!Opcode)
1497  return false;
1498  SDValue Ops[] = {Base, Offset, Chain};
1499  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1500  } else {
1501  if (TM.is64Bit()) {
1502  switch (N->getOpcode()) {
1503  default:
1504  return false;
1505  case ISD::LOAD:
1507  if (IsLDG)
1508  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1509  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1510  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1511  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1512  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1513  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1514  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1515  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1516  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1517  else
1518  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1519  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1520  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1521  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1522  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1523  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1524  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1525  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1526  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1527  break;
1528  case NVPTXISD::LoadV2:
1529  case NVPTXISD::LDGV2:
1530  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1532  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1533  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1534  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1535  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1536  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1537  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1538  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1539  break;
1540  case NVPTXISD::LDUV2:
1541  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1542  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1543  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1544  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1545  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1546  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1547  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1548  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1549  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1550  break;
1551  case NVPTXISD::LoadV4:
1552  case NVPTXISD::LDGV4:
1553  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1554  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1555  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1556  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1557  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1558  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1559  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1560  break;
1561  case NVPTXISD::LDUV4:
1562  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1563  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1564  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1565  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1566  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1567  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1568  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1569  break;
1570  }
1571  } else {
1572  switch (N->getOpcode()) {
1573  default:
1574  return false;
1575  case ISD::LOAD:
1577  if (IsLDG)
1578  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1579  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1580  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1581  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1582  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1583  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1584  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1585  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1586  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1587  else
1588  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1589  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1590  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1591  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1592  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1593  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1594  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1595  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1596  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1597  break;
1598  case NVPTXISD::LoadV2:
1599  case NVPTXISD::LDGV2:
1600  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1601  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1602  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1603  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1604  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1605  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1606  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1607  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1608  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1609  break;
1610  case NVPTXISD::LDUV2:
1611  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1612  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1613  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1614  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1615  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1616  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1617  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1618  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1619  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1620  break;
1621  case NVPTXISD::LoadV4:
1622  case NVPTXISD::LDGV4:
1623  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1624  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1625  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1626  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1627  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1628  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1629  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1630  break;
1631  case NVPTXISD::LDUV4:
1632  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1633  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1634  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1635  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1636  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1637  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1638  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1639  break;
1640  }
1641  }
1642  if (!Opcode)
1643  return false;
1644  SDValue Ops[] = { Op1, Chain };
1645  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1646  }
1647 
1649  MemRefs0[0] = Mem->getMemOperand();
1650  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1651 
1652  // For automatic generation of LDG (through SelectLoad[Vector], not the
1653  // intrinsics), we may have an extending load like:
1654  //
1655  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1656  //
1657  // In this case, the matching logic above will select a load for the original
1658  // memory type (in this case, i8) and our types will not match (the node needs
1659  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1660  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1661  // CVT instruction. Ptxas should clean up any redundancies here.
1662 
1663  EVT OrigType = N->getValueType(0);
1664  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1665 
1666  if (OrigType != EltVT && LdNode) {
1667  // We have an extending-load. The instruction we selected operates on the
1668  // smaller type, but the SDNode we are replacing has the larger type. We
1669  // need to emit a CVT to make the types match.
1670  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1671  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1672  EltVT.getSimpleVT(), IsSigned);
1673 
1674  // For each output value, apply the manual sign/zero-extension and make sure
1675  // all users of the load go through that CVT.
1676  for (unsigned i = 0; i != NumElts; ++i) {
1677  SDValue Res(LD, i);
1678  SDValue OrigVal(N, i);
1679 
1680  SDNode *CvtNode =
1681  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1683  DL, MVT::i32));
1684  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1685  }
1686  }
1687 
1688  ReplaceNode(N, LD);
1689  return true;
1690 }
1691 
1692 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1693  SDLoc dl(N);
1694  StoreSDNode *ST = cast<StoreSDNode>(N);
1695  EVT StoreVT = ST->getMemoryVT();
1696  SDNode *NVPTXST = nullptr;
1697 
1698  // do not support pre/post inc/dec
1699  if (ST->isIndexed())
1700  return false;
1701 
1702  if (!StoreVT.isSimple())
1703  return false;
1704 
1705  // Address Space Setting
1706  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1707  unsigned int PointerSize =
1709 
1710  // Volatile Setting
1711  // - .volatile is only availalble for .global and .shared
1712  bool isVolatile = ST->isVolatile();
1713  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1714  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1715  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1716  isVolatile = false;
1717 
1718  // Vector Setting
1719  MVT SimpleVT = StoreVT.getSimpleVT();
1720  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1721 
1722  // Type Setting: toType + toTypeWidth
1723  // - for integer type, always use 'u'
1724  //
1725  MVT ScalarVT = SimpleVT.getScalarType();
1726  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1727  if (SimpleVT.isVector()) {
1728  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1729  // v2f16 is stored using st.b32
1730  toTypeWidth = 32;
1731  }
1732 
1733  unsigned int toType;
1734  if (ScalarVT.isFloatingPoint())
1735  // f16 uses .b16 as its storage type.
1736  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1738  else
1740 
1741  // Create the machine instruction DAG
1742  SDValue Chain = N->getOperand(0);
1743  SDValue N1 = N->getOperand(1);
1744  SDValue N2 = N->getOperand(2);
1745  SDValue Addr;
1746  SDValue Offset, Base;
1747  Optional<unsigned> Opcode;
1749 
1750  if (SelectDirectAddr(N2, Addr)) {
1751  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1752  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1753  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1754  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1755  if (!Opcode)
1756  return false;
1757  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1758  getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
1759  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1760  Chain };
1761  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1762  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1763  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1764  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1765  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1766  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1767  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1768  if (!Opcode)
1769  return false;
1770  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1771  getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
1772  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1773  Offset, Chain };
1774  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1775  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1776  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1777  if (PointerSize == 64)
1778  Opcode = pickOpcodeForVT(
1779  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1780  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1781  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1782  else
1783  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1784  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1785  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1786  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1787  if (!Opcode)
1788  return false;
1789 
1790  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1791  getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
1792  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1793  Offset, Chain };
1794  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1795  } else {
1796  if (PointerSize == 64)
1797  Opcode =
1798  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1799  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1800  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1801  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1802  else
1803  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1804  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1805  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1806  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1807  if (!Opcode)
1808  return false;
1809  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1810  getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
1811  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1812  Chain };
1813  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1814  }
1815 
1816  if (!NVPTXST)
1817  return false;
1818 
1820  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1821  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1822  ReplaceNode(N, NVPTXST);
1823  return true;
1824 }
1825 
1826 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1827  SDValue Chain = N->getOperand(0);
1828  SDValue Op1 = N->getOperand(1);
1829  SDValue Addr, Offset, Base;
1830  Optional<unsigned> Opcode;
1831  SDLoc DL(N);
1832  SDNode *ST;
1833  EVT EltVT = Op1.getValueType();
1834  MemSDNode *MemSD = cast<MemSDNode>(N);
1835  EVT StoreVT = MemSD->getMemoryVT();
1836 
1837  // Address Space Setting
1838  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1839  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1840  report_fatal_error("Cannot store to pointer that points to constant "
1841  "memory space");
1842  }
1843  unsigned int PointerSize =
1845 
1846  // Volatile Setting
1847  // - .volatile is only availalble for .global and .shared
1848  bool IsVolatile = MemSD->isVolatile();
1849  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1850  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1851  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1852  IsVolatile = false;
1853 
1854  // Type Setting: toType + toTypeWidth
1855  // - for integer type, always use 'u'
1856  assert(StoreVT.isSimple() && "Store value is not simple");
1857  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1858  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1859  unsigned ToType;
1860  if (ScalarVT.isFloatingPoint())
1861  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1863  else
1865 
1867  SDValue N2;
1868  unsigned VecType;
1869 
1870  switch (N->getOpcode()) {
1871  case NVPTXISD::StoreV2:
1873  StOps.push_back(N->getOperand(1));
1874  StOps.push_back(N->getOperand(2));
1875  N2 = N->getOperand(3);
1876  break;
1877  case NVPTXISD::StoreV4:
1879  StOps.push_back(N->getOperand(1));
1880  StOps.push_back(N->getOperand(2));
1881  StOps.push_back(N->getOperand(3));
1882  StOps.push_back(N->getOperand(4));
1883  N2 = N->getOperand(5);
1884  break;
1885  default:
1886  return false;
1887  }
1888 
1889  // v8f16 is a special case. PTX doesn't have st.v8.f16
1890  // instruction. Instead, we split the vector into v2f16 chunks and
1891  // store them with st.v4.b32.
1892  if (EltVT == MVT::v2f16) {
1893  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1894  EltVT = MVT::i32;
1896  ToTypeWidth = 32;
1897  }
1898 
1899  StOps.push_back(getI32Imm(IsVolatile, DL));
1900  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1901  StOps.push_back(getI32Imm(VecType, DL));
1902  StOps.push_back(getI32Imm(ToType, DL));
1903  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1904 
1905  if (SelectDirectAddr(N2, Addr)) {
1906  switch (N->getOpcode()) {
1907  default:
1908  return false;
1909  case NVPTXISD::StoreV2:
1910  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1911  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1912  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1913  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1914  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1915  break;
1916  case NVPTXISD::StoreV4:
1917  Opcode =
1918  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1919  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1920  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1921  NVPTX::STV_f32_v4_avar, None);
1922  break;
1923  }
1924  StOps.push_back(Addr);
1925  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1926  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1927  switch (N->getOpcode()) {
1928  default:
1929  return false;
1930  case NVPTXISD::StoreV2:
1931  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1932  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1933  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1934  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1935  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1936  break;
1937  case NVPTXISD::StoreV4:
1938  Opcode =
1939  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1940  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1941  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1942  NVPTX::STV_f32_v4_asi, None);
1943  break;
1944  }
1945  StOps.push_back(Base);
1946  StOps.push_back(Offset);
1947  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1948  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1949  if (PointerSize == 64) {
1950  switch (N->getOpcode()) {
1951  default:
1952  return false;
1953  case NVPTXISD::StoreV2:
1954  Opcode = pickOpcodeForVT(
1955  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1956  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1957  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1958  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1959  NVPTX::STV_f64_v2_ari_64);
1960  break;
1961  case NVPTXISD::StoreV4:
1962  Opcode = pickOpcodeForVT(
1963  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1964  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1965  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1966  NVPTX::STV_f32_v4_ari_64, None);
1967  break;
1968  }
1969  } else {
1970  switch (N->getOpcode()) {
1971  default:
1972  return false;
1973  case NVPTXISD::StoreV2:
1974  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1975  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1976  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1977  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1978  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1979  break;
1980  case NVPTXISD::StoreV4:
1981  Opcode =
1982  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1983  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1984  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1985  NVPTX::STV_f32_v4_ari, None);
1986  break;
1987  }
1988  }
1989  StOps.push_back(Base);
1990  StOps.push_back(Offset);
1991  } else {
1992  if (PointerSize == 64) {
1993  switch (N->getOpcode()) {
1994  default:
1995  return false;
1996  case NVPTXISD::StoreV2:
1997  Opcode = pickOpcodeForVT(
1998  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1999  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2000  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2001  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2002  NVPTX::STV_f64_v2_areg_64);
2003  break;
2004  case NVPTXISD::StoreV4:
2005  Opcode = pickOpcodeForVT(
2006  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2007  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2008  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2009  NVPTX::STV_f32_v4_areg_64, None);
2010  break;
2011  }
2012  } else {
2013  switch (N->getOpcode()) {
2014  default:
2015  return false;
2016  case NVPTXISD::StoreV2:
2017  Opcode =
2018  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2019  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2020  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2021  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2022  NVPTX::STV_f64_v2_areg);
2023  break;
2024  case NVPTXISD::StoreV4:
2025  Opcode =
2026  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2027  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2028  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2029  NVPTX::STV_f32_v4_areg, None);
2030  break;
2031  }
2032  }
2033  StOps.push_back(N2);
2034  }
2035 
2036  if (!Opcode)
2037  return false;
2038 
2039  StOps.push_back(Chain);
2040 
2041  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2042 
2044  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2045  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2046 
2047  ReplaceNode(N, ST);
2048  return true;
2049 }
2050 
2051 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2052  SDValue Chain = Node->getOperand(0);
2053  SDValue Offset = Node->getOperand(2);
2054  SDValue Flag = Node->getOperand(3);
2055  SDLoc DL(Node);
2056  MemSDNode *Mem = cast<MemSDNode>(Node);
2057 
2058  unsigned VecSize;
2059  switch (Node->getOpcode()) {
2060  default:
2061  return false;
2062  case NVPTXISD::LoadParam:
2063  VecSize = 1;
2064  break;
2065  case NVPTXISD::LoadParamV2:
2066  VecSize = 2;
2067  break;
2068  case NVPTXISD::LoadParamV4:
2069  VecSize = 4;
2070  break;
2071  }
2072 
2073  EVT EltVT = Node->getValueType(0);
2074  EVT MemVT = Mem->getMemoryVT();
2075 
2076  Optional<unsigned> Opcode;
2077 
2078  switch (VecSize) {
2079  default:
2080  return false;
2081  case 1:
2082  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2083  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2084  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2085  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2086  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2087  break;
2088  case 2:
2089  Opcode =
2090  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2091  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2092  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2093  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2094  NVPTX::LoadParamMemV2F64);
2095  break;
2096  case 4:
2097  Opcode = pickOpcodeForVT(
2098  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2099  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2100  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2101  NVPTX::LoadParamMemV4F32, None);
2102  break;
2103  }
2104  if (!Opcode)
2105  return false;
2106 
2107  SDVTList VTs;
2108  if (VecSize == 1) {
2109  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2110  } else if (VecSize == 2) {
2111  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2112  } else {
2113  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2114  VTs = CurDAG->getVTList(EVTs);
2115  }
2116 
2117  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2118 
2120  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2121  Ops.push_back(Chain);
2122  Ops.push_back(Flag);
2123 
2124  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2125  return true;
2126 }
2127 
2128 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2129  SDLoc DL(N);
2130  SDValue Chain = N->getOperand(0);
2131  SDValue Offset = N->getOperand(1);
2132  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2133  MemSDNode *Mem = cast<MemSDNode>(N);
2134 
2135  // How many elements do we have?
2136  unsigned NumElts = 1;
2137  switch (N->getOpcode()) {
2138  default:
2139  return false;
2140  case NVPTXISD::StoreRetval:
2141  NumElts = 1;
2142  break;
2144  NumElts = 2;
2145  break;
2147  NumElts = 4;
2148  break;
2149  }
2150 
2151  // Build vector of operands
2153  for (unsigned i = 0; i < NumElts; ++i)
2154  Ops.push_back(N->getOperand(i + 2));
2155  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2156  Ops.push_back(Chain);
2157 
2158  // Determine target opcode
2159  // If we have an i1, use an 8-bit store. The lowering code in
2160  // NVPTXISelLowering will have already emitted an upcast.
2161  Optional<unsigned> Opcode = 0;
2162  switch (NumElts) {
2163  default:
2164  return false;
2165  case 1:
2166  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2167  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2168  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2169  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2170  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2171  break;
2172  case 2:
2173  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2174  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2175  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2176  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2177  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2178  break;
2179  case 4:
2180  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2181  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2182  NVPTX::StoreRetvalV4I32, None,
2183  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2184  NVPTX::StoreRetvalV4F32, None);
2185  break;
2186  }
2187  if (!Opcode)
2188  return false;
2189 
2190  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2192  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2193  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2194 
2195  ReplaceNode(N, Ret);
2196  return true;
2197 }
2198 
2199 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2200  SDLoc DL(N);
2201  SDValue Chain = N->getOperand(0);
2202  SDValue Param = N->getOperand(1);
2203  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2204  SDValue Offset = N->getOperand(2);
2205  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2206  MemSDNode *Mem = cast<MemSDNode>(N);
2207  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2208 
2209  // How many elements do we have?
2210  unsigned NumElts = 1;
2211  switch (N->getOpcode()) {
2212  default:
2213  return false;
2216  case NVPTXISD::StoreParam:
2217  NumElts = 1;
2218  break;
2220  NumElts = 2;
2221  break;
2223  NumElts = 4;
2224  break;
2225  }
2226 
2227  // Build vector of operands
2229  for (unsigned i = 0; i < NumElts; ++i)
2230  Ops.push_back(N->getOperand(i + 3));
2231  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2232  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2233  Ops.push_back(Chain);
2234  Ops.push_back(Flag);
2235 
2236  // Determine target opcode
2237  // If we have an i1, use an 8-bit store. The lowering code in
2238  // NVPTXISelLowering will have already emitted an upcast.
2239  Optional<unsigned> Opcode = 0;
2240  switch (N->getOpcode()) {
2241  default:
2242  switch (NumElts) {
2243  default:
2244  return false;
2245  case 1:
2246  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2247  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2248  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2249  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2250  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2251  break;
2252  case 2:
2253  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2254  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2255  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2256  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2257  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2258  break;
2259  case 4:
2260  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2261  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2262  NVPTX::StoreParamV4I32, None,
2263  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2264  NVPTX::StoreParamV4F32, None);
2265  break;
2266  }
2267  if (!Opcode)
2268  return false;
2269  break;
2270  // Special case: if we have a sign-extend/zero-extend node, insert the
2271  // conversion instruction first, and use that as the value operand to
2272  // the selected StoreParam node.
2273  case NVPTXISD::StoreParamU32: {
2274  Opcode = NVPTX::StoreParamI32;
2276  MVT::i32);
2277  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2278  MVT::i32, Ops[0], CvtNone);
2279  Ops[0] = SDValue(Cvt, 0);
2280  break;
2281  }
2282  case NVPTXISD::StoreParamS32: {
2283  Opcode = NVPTX::StoreParamI32;
2285  MVT::i32);
2286  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2287  MVT::i32, Ops[0], CvtNone);
2288  Ops[0] = SDValue(Cvt, 0);
2289  break;
2290  }
2291  }
2292 
2294  SDNode *Ret =
2295  CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2297  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2298  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2299 
2300  ReplaceNode(N, Ret);
2301  return true;
2302 }
2303 
2304 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2305  unsigned Opc = 0;
2306 
2307  switch (N->getOpcode()) {
2308  default: return false;
2310  Opc = NVPTX::TEX_1D_F32_S32;
2311  break;
2313  Opc = NVPTX::TEX_1D_F32_F32;
2314  break;
2316  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2317  break;
2319  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2320  break;
2321  case NVPTXISD::Tex1DS32S32:
2322  Opc = NVPTX::TEX_1D_S32_S32;
2323  break;
2325  Opc = NVPTX::TEX_1D_S32_F32;
2326  break;
2328  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2329  break;
2331  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2332  break;
2333  case NVPTXISD::Tex1DU32S32:
2334  Opc = NVPTX::TEX_1D_U32_S32;
2335  break;
2337  Opc = NVPTX::TEX_1D_U32_F32;
2338  break;
2340  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2341  break;
2343  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2344  break;
2346  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2347  break;
2349  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2350  break;
2352  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2353  break;
2355  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2356  break;
2358  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2359  break;
2361  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2362  break;
2364  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2365  break;
2367  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2368  break;
2370  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2371  break;
2373  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2374  break;
2376  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2377  break;
2379  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2380  break;
2382  Opc = NVPTX::TEX_2D_F32_S32;
2383  break;
2385  Opc = NVPTX::TEX_2D_F32_F32;
2386  break;
2388  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2389  break;
2391  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2392  break;
2393  case NVPTXISD::Tex2DS32S32:
2394  Opc = NVPTX::TEX_2D_S32_S32;
2395  break;
2397  Opc = NVPTX::TEX_2D_S32_F32;
2398  break;
2400  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2401  break;
2403  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2404  break;
2405  case NVPTXISD::Tex2DU32S32:
2406  Opc = NVPTX::TEX_2D_U32_S32;
2407  break;
2409  Opc = NVPTX::TEX_2D_U32_F32;
2410  break;
2412  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2413  break;
2415  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2416  break;
2418  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2419  break;
2421  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2422  break;
2424  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2425  break;
2427  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2428  break;
2430  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2431  break;
2433  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2434  break;
2436  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2437  break;
2439  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2440  break;
2442  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2443  break;
2445  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2446  break;
2448  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2449  break;
2451  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2452  break;
2454  Opc = NVPTX::TEX_3D_F32_S32;
2455  break;
2457  Opc = NVPTX::TEX_3D_F32_F32;
2458  break;
2460  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2461  break;
2463  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2464  break;
2465  case NVPTXISD::Tex3DS32S32:
2466  Opc = NVPTX::TEX_3D_S32_S32;
2467  break;
2469  Opc = NVPTX::TEX_3D_S32_F32;
2470  break;
2472  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2473  break;
2475  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2476  break;
2477  case NVPTXISD::Tex3DU32S32:
2478  Opc = NVPTX::TEX_3D_U32_S32;
2479  break;
2481  Opc = NVPTX::TEX_3D_U32_F32;
2482  break;
2484  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2485  break;
2487  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2488  break;
2490  Opc = NVPTX::TEX_CUBE_F32_F32;
2491  break;
2493  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2494  break;
2496  Opc = NVPTX::TEX_CUBE_S32_F32;
2497  break;
2499  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2500  break;
2502  Opc = NVPTX::TEX_CUBE_U32_F32;
2503  break;
2505  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2506  break;
2508  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2509  break;
2511  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2512  break;
2514  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2515  break;
2517  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2518  break;
2520  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2521  break;
2523  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2524  break;
2526  Opc = NVPTX::TLD4_R_2D_F32_F32;
2527  break;
2529  Opc = NVPTX::TLD4_G_2D_F32_F32;
2530  break;
2532  Opc = NVPTX::TLD4_B_2D_F32_F32;
2533  break;
2535  Opc = NVPTX::TLD4_A_2D_F32_F32;
2536  break;
2538  Opc = NVPTX::TLD4_R_2D_S32_F32;
2539  break;
2541  Opc = NVPTX::TLD4_G_2D_S32_F32;
2542  break;
2544  Opc = NVPTX::TLD4_B_2D_S32_F32;
2545  break;
2547  Opc = NVPTX::TLD4_A_2D_S32_F32;
2548  break;
2550  Opc = NVPTX::TLD4_R_2D_U32_F32;
2551  break;
2553  Opc = NVPTX::TLD4_G_2D_U32_F32;
2554  break;
2556  Opc = NVPTX::TLD4_B_2D_U32_F32;
2557  break;
2559  Opc = NVPTX::TLD4_A_2D_U32_F32;
2560  break;
2562  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2563  break;
2565  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2566  break;
2568  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2569  break;
2571  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2572  break;
2574  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2575  break;
2577  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2578  break;
2580  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2581  break;
2583  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2584  break;
2586  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2587  break;
2589  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2590  break;
2592  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2593  break;
2595  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2596  break;
2598  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2599  break;
2601  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2602  break;
2604  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2605  break;
2607  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2608  break;
2610  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2611  break;
2613  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2614  break;
2616  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2617  break;
2619  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2620  break;
2622  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2623  break;
2625  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2626  break;
2628  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2629  break;
2631  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2632  break;
2634  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2635  break;
2637  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2638  break;
2640  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2641  break;
2643  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2644  break;
2646  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2647  break;
2649  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2650  break;
2652  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2653  break;
2655  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2656  break;
2658  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2659  break;
2661  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2662  break;
2664  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2665  break;
2667  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2668  break;
2670  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2671  break;
2673  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2674  break;
2676  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2677  break;
2679  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2680  break;
2682  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2683  break;
2685  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2686  break;
2688  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2689  break;
2691  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2692  break;
2694  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2695  break;
2697  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2698  break;
2700  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2701  break;
2703  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2704  break;
2706  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2707  break;
2709  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2710  break;
2712  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2713  break;
2715  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2716  break;
2718  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2719  break;
2721  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2722  break;
2724  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2725  break;
2727  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2728  break;
2730  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2731  break;
2733  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2734  break;
2736  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2737  break;
2739  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2740  break;
2742  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2743  break;
2745  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2746  break;
2748  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2749  break;
2751  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2752  break;
2754  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2755  break;
2757  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2758  break;
2760  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2761  break;
2763  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2764  break;
2766  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2767  break;
2769  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2770  break;
2772  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2773  break;
2775  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2776  break;
2778  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2779  break;
2781  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2782  break;
2784  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2785  break;
2787  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2788  break;
2790  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2791  break;
2793  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2794  break;
2796  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2797  break;
2799  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2800  break;
2802  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2803  break;
2805  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2806  break;
2808  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2809  break;
2811  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2812  break;
2813  }
2814 
2815  // Copy over operands
2816  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2817  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2818 
2819  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2820  return true;
2821 }
2822 
2823 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2824  unsigned Opc = 0;
2825  switch (N->getOpcode()) {
2826  default: return false;
2828  Opc = NVPTX::SULD_1D_I8_CLAMP;
2829  break;
2831  Opc = NVPTX::SULD_1D_I16_CLAMP;
2832  break;
2834  Opc = NVPTX::SULD_1D_I32_CLAMP;
2835  break;
2837  Opc = NVPTX::SULD_1D_I64_CLAMP;
2838  break;
2840  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2841  break;
2843  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2844  break;
2846  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2847  break;
2849  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2850  break;
2852  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2853  break;
2855  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2856  break;
2858  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2859  break;
2861  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2862  break;
2864  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2865  break;
2867  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2868  break;
2870  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2871  break;
2873  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2874  break;
2876  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2877  break;
2879  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2880  break;
2882  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2883  break;
2885  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2886  break;
2888  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2889  break;
2891  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2892  break;
2894  Opc = NVPTX::SULD_2D_I8_CLAMP;
2895  break;
2897  Opc = NVPTX::SULD_2D_I16_CLAMP;
2898  break;
2900  Opc = NVPTX::SULD_2D_I32_CLAMP;
2901  break;
2903  Opc = NVPTX::SULD_2D_I64_CLAMP;
2904  break;
2906  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2907  break;
2909  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2910  break;
2912  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2913  break;
2915  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2916  break;
2918  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2919  break;
2921  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2922  break;
2924  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2925  break;
2927  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2928  break;
2930  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2931  break;
2933  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2934  break;
2936  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2937  break;
2939  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2940  break;
2942  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2943  break;
2945  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2946  break;
2948  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2949  break;
2951  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2952  break;
2954  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2955  break;
2957  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
2958  break;
2960  Opc = NVPTX::SULD_3D_I8_CLAMP;
2961  break;
2963  Opc = NVPTX::SULD_3D_I16_CLAMP;
2964  break;
2966  Opc = NVPTX::SULD_3D_I32_CLAMP;
2967  break;
2969  Opc = NVPTX::SULD_3D_I64_CLAMP;
2970  break;
2972  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
2973  break;
2975  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
2976  break;
2978  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
2979  break;
2981  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
2982  break;
2984  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
2985  break;
2987  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
2988  break;
2990  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
2991  break;
2993  Opc = NVPTX::SULD_1D_I8_TRAP;
2994  break;
2996  Opc = NVPTX::SULD_1D_I16_TRAP;
2997  break;
2999  Opc = NVPTX::SULD_1D_I32_TRAP;
3000  break;
3002  Opc = NVPTX::SULD_1D_I64_TRAP;
3003  break;
3005  Opc = NVPTX::SULD_1D_V2I8_TRAP;
3006  break;
3008  Opc = NVPTX::SULD_1D_V2I16_TRAP;
3009  break;
3011  Opc = NVPTX::SULD_1D_V2I32_TRAP;
3012  break;
3014  Opc = NVPTX::SULD_1D_V2I64_TRAP;
3015  break;
3017  Opc = NVPTX::SULD_1D_V4I8_TRAP;
3018  break;
3020  Opc = NVPTX::SULD_1D_V4I16_TRAP;
3021  break;
3023  Opc = NVPTX::SULD_1D_V4I32_TRAP;
3024  break;
3026  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3027  break;
3029  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3030  break;
3032  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3033  break;
3035  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3036  break;
3038  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3039  break;
3041  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3042  break;
3044  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3045  break;
3047  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3048  break;
3050  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3051  break;
3053  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3054  break;
3056  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3057  break;
3059  Opc = NVPTX::SULD_2D_I8_TRAP;
3060  break;
3062  Opc = NVPTX::SULD_2D_I16_TRAP;
3063  break;
3065  Opc = NVPTX::SULD_2D_I32_TRAP;
3066  break;
3068  Opc = NVPTX::SULD_2D_I64_TRAP;
3069  break;
3071  Opc = NVPTX::SULD_2D_V2I8_TRAP;
3072  break;
3074  Opc = NVPTX::SULD_2D_V2I16_TRAP;
3075  break;
3077  Opc = NVPTX::SULD_2D_V2I32_TRAP;
3078  break;
3080  Opc = NVPTX::SULD_2D_V2I64_TRAP;
3081  break;
3083  Opc = NVPTX::SULD_2D_V4I8_TRAP;
3084  break;
3086  Opc = NVPTX::SULD_2D_V4I16_TRAP;
3087  break;
3089  Opc = NVPTX::SULD_2D_V4I32_TRAP;
3090  break;
3092  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3093  break;
3095  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3096  break;
3098  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3099  break;
3101  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3102  break;
3104  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3105  break;
3107  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3108  break;
3110  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3111  break;
3113  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3114  break;
3116  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3117  break;
3119  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3120  break;
3122  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3123  break;
3125  Opc = NVPTX::SULD_3D_I8_TRAP;
3126  break;
3128  Opc = NVPTX::SULD_3D_I16_TRAP;
3129  break;
3131  Opc = NVPTX::SULD_3D_I32_TRAP;
3132  break;
3134  Opc = NVPTX::SULD_3D_I64_TRAP;
3135  break;
3137  Opc = NVPTX::SULD_3D_V2I8_TRAP;
3138  break;
3140  Opc = NVPTX::SULD_3D_V2I16_TRAP;
3141  break;
3143  Opc = NVPTX::SULD_3D_V2I32_TRAP;
3144  break;
3146  Opc = NVPTX::SULD_3D_V2I64_TRAP;
3147  break;
3149  Opc = NVPTX::SULD_3D_V4I8_TRAP;
3150  break;
3152  Opc = NVPTX::SULD_3D_V4I16_TRAP;
3153  break;
3155  Opc = NVPTX::SULD_3D_V4I32_TRAP;
3156  break;
3158  Opc = NVPTX::SULD_1D_I8_ZERO;
3159  break;
3161  Opc = NVPTX::SULD_1D_I16_ZERO;
3162  break;
3164  Opc = NVPTX::SULD_1D_I32_ZERO;
3165  break;
3167  Opc = NVPTX::SULD_1D_I64_ZERO;
3168  break;
3170  Opc = NVPTX::SULD_1D_V2I8_ZERO;
3171  break;
3173  Opc = NVPTX::SULD_1D_V2I16_ZERO;
3174  break;
3176  Opc = NVPTX::SULD_1D_V2I32_ZERO;
3177  break;
3179  Opc = NVPTX::SULD_1D_V2I64_ZERO;
3180  break;
3182  Opc = NVPTX::SULD_1D_V4I8_ZERO;
3183  break;
3185  Opc = NVPTX::SULD_1D_V4I16_ZERO;
3186  break;
3188  Opc = NVPTX::SULD_1D_V4I32_ZERO;
3189  break;
3191  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3192  break;
3194  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3195  break;
3197  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3198  break;
3200  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3201  break;
3203  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3204  break;
3206  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3207  break;
3209  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3210  break;
3212  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3213  break;
3215  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3216  break;
3218  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3219  break;
3221  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3222  break;
3224  Opc = NVPTX::SULD_2D_I8_ZERO;
3225  break;
3227  Opc = NVPTX::SULD_2D_I16_ZERO;
3228  break;
3230  Opc = NVPTX::SULD_2D_I32_ZERO;
3231  break;
3233  Opc = NVPTX::SULD_2D_I64_ZERO;
3234  break;
3236  Opc = NVPTX::SULD_2D_V2I8_ZERO;
3237  break;
3239  Opc = NVPTX::SULD_2D_V2I16_ZERO;
3240  break;
3242  Opc = NVPTX::SULD_2D_V2I32_ZERO;
3243  break;
3245  Opc = NVPTX::SULD_2D_V2I64_ZERO;
3246  break;
3248  Opc = NVPTX::SULD_2D_V4I8_ZERO;
3249  break;
3251  Opc = NVPTX::SULD_2D_V4I16_ZERO;
3252  break;
3254  Opc = NVPTX::SULD_2D_V4I32_ZERO;
3255  break;
3257  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3258  break;
3260  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3261  break;
3263  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3264  break;
3266  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3267  break;
3269  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3270  break;
3272  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3273  break;
3275  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3276  break;
3278  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3279  break;
3281  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3282  break;
3284  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3285  break;
3287  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3288  break;
3290  Opc = NVPTX::SULD_3D_I8_ZERO;
3291  break;
3293  Opc = NVPTX::SULD_3D_I16_ZERO;
3294  break;
3296  Opc = NVPTX::SULD_3D_I32_ZERO;
3297  break;
3299  Opc = NVPTX::SULD_3D_I64_ZERO;
3300  break;
3302  Opc = NVPTX::SULD_3D_V2I8_ZERO;
3303  break;
3305  Opc = NVPTX::SULD_3D_V2I16_ZERO;
3306  break;
3308  Opc = NVPTX::SULD_3D_V2I32_ZERO;
3309  break;
3311  Opc = NVPTX::SULD_3D_V2I64_ZERO;
3312  break;
3314  Opc = NVPTX::SULD_3D_V4I8_ZERO;
3315  break;
3317  Opc = NVPTX::SULD_3D_V4I16_ZERO;
3318  break;
3320  Opc = NVPTX::SULD_3D_V4I32_ZERO;
3321  break;
3322  }
3323 
3324  // Copy over operands
3325  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3326  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3327 
3328  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3329  return true;
3330 }
3331 
3332 
3333 /// SelectBFE - Look for instruction sequences that can be made more efficient
3334 /// by using the 'bfe' (bit-field extract) PTX instruction
3335 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3336  SDLoc DL(N);
3337  SDValue LHS = N->getOperand(0);
3338  SDValue RHS = N->getOperand(1);
3339  SDValue Len;
3340  SDValue Start;
3341  SDValue Val;
3342  bool IsSigned = false;
3343 
3344  if (N->getOpcode() == ISD::AND) {
3345  // Canonicalize the operands
3346  // We want 'and %val, %mask'
3347  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3348  std::swap(LHS, RHS);
3349  }
3350 
3352  if (!Mask) {
3353  // We need a constant mask on the RHS of the AND
3354  return false;
3355  }
3356 
3357  // Extract the mask bits
3358  uint64_t MaskVal = Mask->getZExtValue();
3359  if (!isMask_64(MaskVal)) {
3360  // We *could* handle shifted masks here, but doing so would require an
3361  // 'and' operation to fix up the low-order bits so we would trade
3362  // shr+and for bfe+and, which has the same throughput
3363  return false;
3364  }
3365 
3366  // How many bits are in our mask?
3367  uint64_t NumBits = countTrailingOnes(MaskVal);
3368  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3369 
3370  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3371  // We have a 'srl/and' pair, extract the effective start bit and length
3372  Val = LHS.getNode()->getOperand(0);
3373  Start = LHS.getNode()->getOperand(1);
3374  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3375  if (StartConst) {
3376  uint64_t StartVal = StartConst->getZExtValue();
3377  // How many "good" bits do we have left? "good" is defined here as bits
3378  // that exist in the original value, not shifted in.
3379  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3380  if (NumBits > GoodBits) {
3381  // Do not handle the case where bits have been shifted in. In theory
3382  // we could handle this, but the cost is likely higher than just
3383  // emitting the srl/and pair.
3384  return false;
3385  }
3386  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3387  } else {
3388  // Do not handle the case where the shift amount (can be zero if no srl
3389  // was found) is not constant. We could handle this case, but it would
3390  // require run-time logic that would be more expensive than just
3391  // emitting the srl/and pair.
3392  return false;
3393  }
3394  } else {
3395  // Do not handle the case where the LHS of the and is not a shift. While
3396  // it would be trivial to handle this case, it would just transform
3397  // 'and' -> 'bfe', but 'and' has higher-throughput.
3398  return false;
3399  }
3400  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3401  if (LHS->getOpcode() == ISD::AND) {
3402  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3403  if (!ShiftCnst) {
3404  // Shift amount must be constant
3405  return false;
3406  }
3407 
3408  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3409 
3410  SDValue AndLHS = LHS->getOperand(0);
3411  SDValue AndRHS = LHS->getOperand(1);
3412 
3413  // Canonicalize the AND to have the mask on the RHS
3414  if (isa<ConstantSDNode>(AndLHS)) {
3415  std::swap(AndLHS, AndRHS);
3416  }
3417 
3418  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3419  if (!MaskCnst) {
3420  // Mask must be constant
3421  return false;
3422  }
3423 
3424  uint64_t MaskVal = MaskCnst->getZExtValue();
3425  uint64_t NumZeros;
3426  uint64_t NumBits;
3427  if (isMask_64(MaskVal)) {
3428  NumZeros = 0;
3429  // The number of bits in the result bitfield will be the number of
3430  // trailing ones (the AND) minus the number of bits we shift off
3431  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3432  } else if (isShiftedMask_64(MaskVal)) {
3433  NumZeros = countTrailingZeros(MaskVal);
3434  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3435  // The number of bits in the result bitfield will be the number of
3436  // trailing zeros plus the number of set bits in the mask minus the
3437  // number of bits we shift off
3438  NumBits = NumZeros + NumOnes - ShiftAmt;
3439  } else {
3440  // This is not a mask we can handle
3441  return false;
3442  }
3443 
3444  if (ShiftAmt < NumZeros) {
3445  // Handling this case would require extra logic that would make this
3446  // transformation non-profitable
3447  return false;
3448  }
3449 
3450  Val = AndLHS;
3451  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3452  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3453  } else if (LHS->getOpcode() == ISD::SHL) {
3454  // Here, we have a pattern like:
3455  //
3456  // (sra (shl val, NN), MM)
3457  // or
3458  // (srl (shl val, NN), MM)
3459  //
3460  // If MM >= NN, we can efficiently optimize this with bfe
3461  Val = LHS->getOperand(0);
3462 
3463  SDValue ShlRHS = LHS->getOperand(1);
3464  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3465  if (!ShlCnst) {
3466  // Shift amount must be constant
3467  return false;
3468  }
3469  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3470 
3471  SDValue ShrRHS = RHS;
3472  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3473  if (!ShrCnst) {
3474  // Shift amount must be constant
3475  return false;
3476  }
3477  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3478 
3479  // To avoid extra codegen and be profitable, we need Outer >= Inner
3480  if (OuterShiftAmt < InnerShiftAmt) {
3481  return false;
3482  }
3483 
3484  // If the outer shift is more than the type size, we have no bitfield to
3485  // extract (since we also check that the inner shift is <= the outer shift
3486  // then this also implies that the inner shift is < the type size)
3487  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3488  return false;
3489  }
3490 
3491  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3492  MVT::i32);
3493  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3494  DL, MVT::i32);
3495 
3496  if (N->getOpcode() == ISD::SRA) {
3497  // If we have a arithmetic right shift, we need to use the signed bfe
3498  // variant
3499  IsSigned = true;
3500  }
3501  } else {
3502  // No can do...
3503  return false;
3504  }
3505  } else {
3506  // No can do...
3507  return false;
3508  }
3509 
3510 
3511  unsigned Opc;
3512  // For the BFE operations we form here from "and" and "srl", always use the
3513  // unsigned variants.
3514  if (Val.getValueType() == MVT::i32) {
3515  if (IsSigned) {
3516  Opc = NVPTX::BFE_S32rii;
3517  } else {
3518  Opc = NVPTX::BFE_U32rii;
3519  }
3520  } else if (Val.getValueType() == MVT::i64) {
3521  if (IsSigned) {
3522  Opc = NVPTX::BFE_S64rii;
3523  } else {
3524  Opc = NVPTX::BFE_U64rii;
3525  }
3526  } else {
3527  // We cannot handle this type
3528  return false;
3529  }
3530 
3531  SDValue Ops[] = {
3532  Val, Start, Len
3533  };
3534 
3535  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3536  return true;
3537 }
3538 
3539 // SelectDirectAddr - Match a direct address for DAG.
3540 // A direct address could be a globaladdress or externalsymbol.
3541 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3542  // Return true if TGA or ES.
3543  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3545  Address = N;
3546  return true;
3547  }
3548  if (N.getOpcode() == NVPTXISD::Wrapper) {
3549  Address = N.getOperand(0);
3550  return true;
3551  }
3552  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3553  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3554  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3555  CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3556  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3557  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3558  }
3559  return false;
3560 }
3561 
3562 // symbol+offset
3563 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3564  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3565  if (Addr.getOpcode() == ISD::ADD) {
3566  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3567  SDValue base = Addr.getOperand(0);
3568  if (SelectDirectAddr(base, Base)) {
3569  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3570  mvt);
3571  return true;
3572  }
3573  }
3574  }
3575  return false;
3576 }
3577 
3578 // symbol+offset
3579 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3580  SDValue &Base, SDValue &Offset) {
3581  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3582 }
3583 
3584 // symbol+offset
3585 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3586  SDValue &Base, SDValue &Offset) {
3587  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3588 }
3589 
3590 // register+offset
3591 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3592  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3593  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3594  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3595  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3596  return true;
3597  }
3598  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3600  return false; // direct calls.
3601 
3602  if (Addr.getOpcode() == ISD::ADD) {
3603  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3604  return false;
3605  }
3606  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3607  if (FrameIndexSDNode *FIN =
3608  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3609  // Constant offset from frame ref.
3610  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3611  else
3612  Base = Addr.getOperand(0);
3613  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3614  mvt);
3615  return true;
3616  }
3617  }
3618  return false;
3619 }
3620 
3621 // register+offset
3622 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3623  SDValue &Base, SDValue &Offset) {
3624  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3625 }
3626 
3627 // register+offset
3628 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3629  SDValue &Base, SDValue &Offset) {
3630  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3631 }
3632 
3633 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3634  unsigned int spN) const {
3635  const Value *Src = nullptr;
3636  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3637  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3638  return true;
3639  Src = mN->getMemOperand()->getValue();
3640  }
3641  if (!Src)
3642  return false;
3643  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3644  return (PT->getAddressSpace() == spN);
3645  return false;
3646 }
3647 
3648 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3649 /// inline asm expressions.
3651  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3652  SDValue Op0, Op1;
3653  switch (ConstraintID) {
3654  default:
3655  return true;
3656  case InlineAsm::Constraint_m: // memory
3657  if (SelectDirectAddr(Op, Op0)) {
3658  OutOps.push_back(Op0);
3659  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3660  return false;
3661  }
3662  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3663  OutOps.push_back(Op0);
3664  OutOps.push_back(Op1);
3665  return false;
3666  }
3667  break;
3668  }
3669  return true;
3670 }
3671 
3672 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3673 /// conversion from \p SrcTy to \p DestTy.
3674 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3675  bool IsSigned) {
3676  switch (SrcTy.SimpleTy) {
3677  default:
3678  llvm_unreachable("Unhandled source type");
3679  case MVT::i8:
3680  switch (DestTy.SimpleTy) {
3681  default:
3682  llvm_unreachable("Unhandled dest type");
3683  case MVT::i16:
3684  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3685  case MVT::i32:
3686  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3687  case MVT::i64:
3688  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3689  }
3690  case MVT::i16:
3691  switch (DestTy.SimpleTy) {
3692  default:
3693  llvm_unreachable("Unhandled dest type");
3694  case MVT::i8:
3695  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3696  case MVT::i32:
3697  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3698  case MVT::i64:
3699  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3700  }
3701  case MVT::i32:
3702  switch (DestTy.SimpleTy) {
3703  default:
3704  llvm_unreachable("Unhandled dest type");
3705  case MVT::i8:
3706  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3707  case MVT::i16:
3708  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3709  case MVT::i64:
3710  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3711  }
3712  case MVT::i64:
3713  switch (DestTy.SimpleTy) {
3714  default:
3715  llvm_unreachable("Unhandled dest type");
3716  case MVT::i8:
3717  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3718  case MVT::i16:
3719  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3720  case MVT::i32:
3721  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3722  }
3723  }
3724 }
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:539
bool isInvariant() const
EVT getValueType() const
Return the ValueType of the referenced return value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool isVector() const
Return true if this is a vector value type.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool useF32FTZ(const MachineFunction &MF) const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:360
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const NVPTXSubtarget * Subtarget
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
MachineFunction * MF
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
unsigned getAddressSpace() const
Return the address space for the associated pointer.
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:398
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:623
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:179
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:911
op_iterator op_begin() const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
#define EQ(a, b)
Definition: regexec.c:112
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:571
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:603
unsigned getSrcAddressSpace() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
CodeGenOpt::Level OptLevel
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
const SDValue & getOperand(unsigned Num) const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getDestAddressSpace() const
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Extended Value Type.
Definition: ValueTypes.h:34
bool allowUnsafeFPMath(MachineFunction &MF) const
bool isVolatile() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:543
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
EVT getMemoryVT() const
Return the type of the in-memory value.
iterator_range< use_iterator > uses()
NVPTXTargetMachine.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
#define N
ISD::CondCode get() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool isVolatile(Instruction *Inst)
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
This class is used to represent ISD::LOAD nodes.