LLVM  6.0.0svn
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
20 #include "llvm/Support/Debug.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 /// createNVPTXISelDag - This pass converts a legalized DAG into a
30 /// NVPTX-specific DAG, ready for instruction scheduling.
32  llvm::CodeGenOpt::Level OptLevel) {
33  return new NVPTXDAGToDAGISel(TM, OptLevel);
34 }
35 
37  CodeGenOpt::Level OptLevel)
38  : SelectionDAGISel(tm, OptLevel), TM(tm) {
39  doMulWide = (OptLevel > 0);
40 }
41 
43  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45 }
46 
47 int NVPTXDAGToDAGISel::getDivF32Level() const {
49 }
50 
51 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
53 }
54 
55 bool NVPTXDAGToDAGISel::useF32FTZ() const {
57 }
58 
59 bool NVPTXDAGToDAGISel::allowFMA() const {
61  return TL->allowFMA(*MF, OptLevel);
62 }
63 
64 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66  return TL->allowUnsafeFPMath(*MF);
67 }
68 
69 /// Select - Select instructions not customized! Used for
70 /// expanded, promoted and normal instructions.
71 void NVPTXDAGToDAGISel::Select(SDNode *N) {
72 
73  if (N->isMachineOpcode()) {
74  N->setNodeId(-1);
75  return; // Already selected.
76  }
77 
78  switch (N->getOpcode()) {
79  case ISD::LOAD:
80  if (tryLoad(N))
81  return;
82  break;
83  case ISD::STORE:
84  if (tryStore(N))
85  return;
86  break;
88  if (tryEXTRACT_VECTOR_ELEMENT(N))
89  return;
90  break;
92  SelectSETP_F16X2(N);
93  return;
94 
95  case NVPTXISD::LoadV2:
96  case NVPTXISD::LoadV4:
97  if (tryLoadVector(N))
98  return;
99  break;
100  case NVPTXISD::LDGV2:
101  case NVPTXISD::LDGV4:
102  case NVPTXISD::LDUV2:
103  case NVPTXISD::LDUV4:
104  if (tryLDGLDU(N))
105  return;
106  break;
107  case NVPTXISD::StoreV2:
108  case NVPTXISD::StoreV4:
109  if (tryStoreVector(N))
110  return;
111  break;
112  case NVPTXISD::LoadParam:
115  if (tryLoadParam(N))
116  return;
117  break;
121  if (tryStoreRetval(N))
122  return;
123  break;
129  if (tryStoreParam(N))
130  return;
131  break;
133  if (tryIntrinsicNoChain(N))
134  return;
135  break;
137  if (tryIntrinsicChain(N))
138  return;
139  break;
308  if (tryTextureIntrinsic(N))
309  return;
310  break;
476  if (trySurfaceIntrinsic(N))
477  return;
478  break;
479  case ISD::AND:
480  case ISD::SRA:
481  case ISD::SRL:
482  // Try to select BFE
483  if (tryBFE(N))
484  return;
485  break;
486  case ISD::ADDRSPACECAST:
487  SelectAddrSpaceCast(N);
488  return;
489  case ISD::ConstantFP:
490  if (tryConstantFP16(N))
491  return;
492  break;
493  default:
494  break;
495  }
496  SelectCode(N);
497 }
498 
499 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
500  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501  switch (IID) {
502  default:
503  return false;
504  case Intrinsic::nvvm_ldg_global_f:
505  case Intrinsic::nvvm_ldg_global_i:
506  case Intrinsic::nvvm_ldg_global_p:
507  case Intrinsic::nvvm_ldu_global_f:
508  case Intrinsic::nvvm_ldu_global_i:
509  case Intrinsic::nvvm_ldu_global_p:
510  return tryLDGLDU(N);
511  }
512 }
513 
514 // There's no way to specify FP16 immediates in .f16 ops, so we have to
515 // load them into an .f16 register first.
516 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517  if (N->getValueType(0) != MVT::f16)
518  return false;
520  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521  SDNode *LoadConstF16 =
522  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523  ReplaceNode(N, LoadConstF16);
524  return true;
525 }
526 
527 // Map ISD:CONDCODE value to appropriate CmpMode expected by
528 // NVPTXInstPrinter::printCmpMode()
529 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
531  unsigned PTXCmpMode = [](ISD::CondCode CC) {
532  switch (CC) {
533  default:
534  llvm_unreachable("Unexpected condition code.");
535  case ISD::SETOEQ:
536  return CmpMode::EQ;
537  case ISD::SETOGT:
538  return CmpMode::GT;
539  case ISD::SETOGE:
540  return CmpMode::GE;
541  case ISD::SETOLT:
542  return CmpMode::LT;
543  case ISD::SETOLE:
544  return CmpMode::LE;
545  case ISD::SETONE:
546  return CmpMode::NE;
547  case ISD::SETO:
548  return CmpMode::NUM;
549  case ISD::SETUO:
550  return CmpMode::NotANumber;
551  case ISD::SETUEQ:
552  return CmpMode::EQU;
553  case ISD::SETUGT:
554  return CmpMode::GTU;
555  case ISD::SETUGE:
556  return CmpMode::GEU;
557  case ISD::SETULT:
558  return CmpMode::LTU;
559  case ISD::SETULE:
560  return CmpMode::LEU;
561  case ISD::SETUNE:
562  return CmpMode::NEU;
563  case ISD::SETEQ:
564  return CmpMode::EQ;
565  case ISD::SETGT:
566  return CmpMode::GT;
567  case ISD::SETGE:
568  return CmpMode::GE;
569  case ISD::SETLT:
570  return CmpMode::LT;
571  case ISD::SETLE:
572  return CmpMode::LE;
573  case ISD::SETNE:
574  return CmpMode::NE;
575  }
576  }(CondCode.get());
577 
578  if (FTZ)
579  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580 
581  return PTXCmpMode;
582 }
583 
584 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585  unsigned PTXCmpMode =
586  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587  SDLoc DL(N);
588  SDNode *SetP = CurDAG->getMachineNode(
589  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591  ReplaceNode(N, SetP);
592  return true;
593 }
594 
595 // Find all instances of extract_vector_elt that use this v2f16 vector
596 // and coalesce them into a scattering move instruction.
597 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598  SDValue Vector = N->getOperand(0);
599 
600  // We only care about f16x2 as it's the only real vector type we
601  // need to deal with.
602  if (Vector.getSimpleValueType() != MVT::v2f16)
603  return false;
604 
605  // Find and record all uses of this vector that extract element 0 or 1.
607  for (const auto &U : Vector.getNode()->uses()) {
608  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609  continue;
610  if (U->getOperand(0) != Vector)
611  continue;
612  if (const ConstantSDNode *IdxConst =
613  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614  if (IdxConst->getZExtValue() == 0)
615  E0.push_back(U);
616  else if (IdxConst->getZExtValue() == 1)
617  E1.push_back(U);
618  else
619  llvm_unreachable("Invalid vector index.");
620  }
621  }
622 
623  // There's no point scattering f16x2 if we only ever access one
624  // element of it.
625  if (E0.empty() || E1.empty())
626  return false;
627 
628  unsigned Op = NVPTX::SplitF16x2;
629  // If the vector has been BITCAST'ed from i32, we can use original
630  // value directly and avoid register-to-register move.
631  SDValue Source = Vector;
632  if (Vector->getOpcode() == ISD::BITCAST) {
633  Op = NVPTX::SplitI32toF16x2;
634  Source = Vector->getOperand(0);
635  }
636  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637  // into f16,f16 SplitF16x2(V)
638  SDNode *ScatterOp =
639  CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640  for (auto *Node : E0)
641  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642  for (auto *Node : E1)
643  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644 
645  return true;
646 }
647 
648 static unsigned int getCodeAddrSpace(MemSDNode *N) {
649  const Value *Src = N->getMemOperand()->getValue();
650 
651  if (!Src)
653 
654  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
655  switch (PT->getAddressSpace()) {
662  default: break;
663  }
664  }
666 }
667 
669  unsigned CodeAddrSpace, MachineFunction *F) {
670  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671  // space.
672  //
673  // We have two ways of identifying invariant loads: Loads may be explicitly
674  // marked as invariant, or we may infer them to be invariant.
675  //
676  // We currently infer invariance only for kernel function pointer params that
677  // are noalias (i.e. __restrict) and never written to.
678  //
679  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
680  // not during the SelectionDAG phase).
681  //
682  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
683  // explicitly invariant loads because these are how clang tells us to use ldg
684  // when the user uses a builtin.
685  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
686  return false;
687 
688  if (N->isInvariant())
689  return true;
690 
691  // Load wasn't explicitly invariant. Attempt to infer invariance.
692  if (!isKernelFunction(*F->getFunction()))
693  return false;
694 
695  // We use GetUnderlyingObjects() here instead of
696  // GetUnderlyingObject() mainly because the former looks through phi
697  // nodes while the latter does not. We need to look through phi
698  // nodes to handle pointer induction variables.
700  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
701  Objs, F->getDataLayout());
702  for (Value *Obj : Objs) {
703  auto *A = dyn_cast<const Argument>(Obj);
704  if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
705  }
706 
707  return true;
708 }
709 
710 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
711  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
712  switch (IID) {
713  default:
714  return false;
715  case Intrinsic::nvvm_texsurf_handle_internal:
716  SelectTexSurfHandle(N);
717  return true;
718  }
719 }
720 
721 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
722  // Op 0 is the intrinsic ID
723  SDValue Wrapper = N->getOperand(1);
724  SDValue GlobalVal = Wrapper.getOperand(0);
725  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
726  MVT::i64, GlobalVal));
727 }
728 
729 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
730  SDValue Src = N->getOperand(0);
731  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
732  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
733  unsigned DstAddrSpace = CastN->getDestAddressSpace();
734 
735  assert(SrcAddrSpace != DstAddrSpace &&
736  "addrspacecast must be between different address spaces");
737 
738  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
739  // Specific to generic
740  unsigned Opc;
741  switch (SrcAddrSpace) {
742  default: report_fatal_error("Bad address space in addrspacecast");
744  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
745  break;
747  Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
748  break;
749  case ADDRESS_SPACE_CONST:
750  Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
751  break;
752  case ADDRESS_SPACE_LOCAL:
753  Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
754  break;
755  }
757  Src));
758  return;
759  } else {
760  // Generic to specific
761  if (SrcAddrSpace != 0)
762  report_fatal_error("Cannot cast between two non-generic address spaces");
763  unsigned Opc;
764  switch (DstAddrSpace) {
765  default: report_fatal_error("Bad address space in addrspacecast");
767  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
768  : NVPTX::cvta_to_global_yes;
769  break;
771  Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
772  : NVPTX::cvta_to_shared_yes;
773  break;
774  case ADDRESS_SPACE_CONST:
775  Opc =
776  TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
777  break;
778  case ADDRESS_SPACE_LOCAL:
779  Opc =
780  TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
781  break;
782  case ADDRESS_SPACE_PARAM:
783  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
784  : NVPTX::nvvm_ptr_gen_to_param;
785  break;
786  }
788  Src));
789  return;
790  }
791 }
792 
793 // Helper function template to reduce amount of boilerplate code for
794 // opcode selection.
796  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
797  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
798  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
799  switch (VT) {
800  case MVT::i1:
801  case MVT::i8:
802  return Opcode_i8;
803  case MVT::i16:
804  return Opcode_i16;
805  case MVT::i32:
806  return Opcode_i32;
807  case MVT::i64:
808  return Opcode_i64;
809  case MVT::f16:
810  return Opcode_f16;
811  case MVT::v2f16:
812  return Opcode_f16x2;
813  case MVT::f32:
814  return Opcode_f32;
815  case MVT::f64:
816  return Opcode_f64;
817  default:
818  return None;
819  }
820 }
821 
822 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
823  SDLoc dl(N);
824  LoadSDNode *LD = cast<LoadSDNode>(N);
825  EVT LoadedVT = LD->getMemoryVT();
826  SDNode *NVPTXLD = nullptr;
827 
828  // do not support pre/post inc/dec
829  if (LD->isIndexed())
830  return false;
831 
832  if (!LoadedVT.isSimple())
833  return false;
834 
835  // Address Space Setting
836  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
837 
838  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
839  return tryLDGLDU(N);
840  }
841 
842  // Volatile Setting
843  // - .volatile is only availalble for .global and .shared
844  bool isVolatile = LD->isVolatile();
845  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
846  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
847  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
848  isVolatile = false;
849 
850  // Type Setting: fromType + fromTypeWidth
851  //
852  // Sign : ISD::SEXTLOAD
853  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
854  // type is integer
855  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
856  MVT SimpleVT = LoadedVT.getSimpleVT();
857  MVT ScalarVT = SimpleVT.getScalarType();
858  // Read at least 8 bits (predicates are stored as 8-bit values)
859  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
860  unsigned int fromType;
861 
862  // Vector Setting
863  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
864  if (SimpleVT.isVector()) {
865  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
866  // v2f16 is loaded using ld.b32
867  fromTypeWidth = 32;
868  }
869 
870  if ((LD->getExtensionType() == ISD::SEXTLOAD))
872  else if (ScalarVT.isFloatingPoint())
873  // f16 uses .b16 as its storage type.
874  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
876  else
878 
879  // Create the machine instruction DAG
880  SDValue Chain = N->getOperand(0);
881  SDValue N1 = N->getOperand(1);
882  SDValue Addr;
884  Optional<unsigned> Opcode;
886 
887  if (SelectDirectAddr(N1, Addr)) {
888  Opcode = pickOpcodeForVT(
889  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
890  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
891  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
892  if (!Opcode)
893  return false;
894  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
895  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
896  getI32Imm(fromTypeWidth, dl), Addr, Chain };
897  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
898  MVT::Other, Ops);
899  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
900  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
901  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
902  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
903  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
904  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
905  if (!Opcode)
906  return false;
907  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
908  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
909  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
910  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
911  MVT::Other, Ops);
912  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
913  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
914  if (TM.is64Bit())
915  Opcode = pickOpcodeForVT(
916  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
917  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
918  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
919  else
920  Opcode = pickOpcodeForVT(
921  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
922  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
923  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
924  if (!Opcode)
925  return false;
926  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
927  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
928  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
929  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
930  MVT::Other, Ops);
931  } else {
932  if (TM.is64Bit())
933  Opcode = pickOpcodeForVT(
934  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
935  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
936  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
937  NVPTX::LD_f64_areg_64);
938  else
939  Opcode = pickOpcodeForVT(
940  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
941  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
942  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
943  if (!Opcode)
944  return false;
945  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
946  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
947  getI32Imm(fromTypeWidth, dl), N1, Chain };
948  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
949  MVT::Other, Ops);
950  }
951 
952  if (!NVPTXLD)
953  return false;
954 
956  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
957  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
958 
959  ReplaceNode(N, NVPTXLD);
960  return true;
961 }
962 
963 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
964 
965  SDValue Chain = N->getOperand(0);
966  SDValue Op1 = N->getOperand(1);
967  SDValue Addr, Offset, Base;
968  Optional<unsigned> Opcode;
969  SDLoc DL(N);
970  SDNode *LD;
971  MemSDNode *MemSD = cast<MemSDNode>(N);
972  EVT LoadedVT = MemSD->getMemoryVT();
973 
974  if (!LoadedVT.isSimple())
975  return false;
976 
977  // Address Space Setting
978  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
979 
980  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
981  return tryLDGLDU(N);
982  }
983 
984  // Volatile Setting
985  // - .volatile is only availalble for .global and .shared
986  bool IsVolatile = MemSD->isVolatile();
987  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
988  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
989  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
990  IsVolatile = false;
991 
992  // Vector Setting
993  MVT SimpleVT = LoadedVT.getSimpleVT();
994 
995  // Type Setting: fromType + fromTypeWidth
996  //
997  // Sign : ISD::SEXTLOAD
998  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
999  // type is integer
1000  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1001  MVT ScalarVT = SimpleVT.getScalarType();
1002  // Read at least 8 bits (predicates are stored as 8-bit values)
1003  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
1004  unsigned int FromType;
1005  // The last operand holds the original LoadSDNode::getExtensionType() value
1006  unsigned ExtensionType = cast<ConstantSDNode>(
1007  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1008  if (ExtensionType == ISD::SEXTLOAD)
1009  FromType = NVPTX::PTXLdStInstCode::Signed;
1010  else if (ScalarVT.isFloatingPoint())
1011  FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1013  else
1015 
1016  unsigned VecType;
1017 
1018  switch (N->getOpcode()) {
1019  case NVPTXISD::LoadV2:
1021  break;
1022  case NVPTXISD::LoadV4:
1024  break;
1025  default:
1026  return false;
1027  }
1028 
1029  EVT EltVT = N->getValueType(0);
1030 
1031  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1032  // instruction. Instead, we split the vector into v2f16 chunks and
1033  // load them with ld.v4.b32.
1034  if (EltVT == MVT::v2f16) {
1035  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1036  EltVT = MVT::i32;
1038  FromTypeWidth = 32;
1039  }
1040 
1041  if (SelectDirectAddr(Op1, Addr)) {
1042  switch (N->getOpcode()) {
1043  default:
1044  return false;
1045  case NVPTXISD::LoadV2:
1046  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1047  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1048  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1049  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1050  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1051  break;
1052  case NVPTXISD::LoadV4:
1053  Opcode =
1054  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1055  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1056  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1057  NVPTX::LDV_f32_v4_avar, None);
1058  break;
1059  }
1060  if (!Opcode)
1061  return false;
1062  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1063  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1064  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1065  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1066  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1067  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1068  switch (N->getOpcode()) {
1069  default:
1070  return false;
1071  case NVPTXISD::LoadV2:
1072  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1073  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1074  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1075  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1076  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1077  break;
1078  case NVPTXISD::LoadV4:
1079  Opcode =
1080  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1081  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1082  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1083  NVPTX::LDV_f32_v4_asi, None);
1084  break;
1085  }
1086  if (!Opcode)
1087  return false;
1088  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1089  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1090  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1091  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1092  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1093  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1094  if (TM.is64Bit()) {
1095  switch (N->getOpcode()) {
1096  default:
1097  return false;
1098  case NVPTXISD::LoadV2:
1099  Opcode = pickOpcodeForVT(
1100  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1101  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1102  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1103  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1104  NVPTX::LDV_f64_v2_ari_64);
1105  break;
1106  case NVPTXISD::LoadV4:
1107  Opcode = pickOpcodeForVT(
1108  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1109  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1110  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1111  NVPTX::LDV_f32_v4_ari_64, None);
1112  break;
1113  }
1114  } else {
1115  switch (N->getOpcode()) {
1116  default:
1117  return false;
1118  case NVPTXISD::LoadV2:
1119  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1120  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1121  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1122  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1123  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1124  break;
1125  case NVPTXISD::LoadV4:
1126  Opcode =
1127  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1128  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1129  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1130  NVPTX::LDV_f32_v4_ari, None);
1131  break;
1132  }
1133  }
1134  if (!Opcode)
1135  return false;
1136  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1137  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1138  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1139 
1140  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1141  } else {
1142  if (TM.is64Bit()) {
1143  switch (N->getOpcode()) {
1144  default:
1145  return false;
1146  case NVPTXISD::LoadV2:
1147  Opcode = pickOpcodeForVT(
1148  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1149  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1150  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1151  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1152  NVPTX::LDV_f64_v2_areg_64);
1153  break;
1154  case NVPTXISD::LoadV4:
1155  Opcode = pickOpcodeForVT(
1156  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1157  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1158  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1159  NVPTX::LDV_f32_v4_areg_64, None);
1160  break;
1161  }
1162  } else {
1163  switch (N->getOpcode()) {
1164  default:
1165  return false;
1166  case NVPTXISD::LoadV2:
1167  Opcode =
1168  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1169  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1170  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1171  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1172  NVPTX::LDV_f64_v2_areg);
1173  break;
1174  case NVPTXISD::LoadV4:
1175  Opcode = pickOpcodeForVT(
1176  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1177  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1178  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1179  NVPTX::LDV_f32_v4_areg, None);
1180  break;
1181  }
1182  }
1183  if (!Opcode)
1184  return false;
1185  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1188  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1189  }
1190 
1192  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1193  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1194 
1195  ReplaceNode(N, LD);
1196  return true;
1197 }
1198 
1199 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1200 
1201  SDValue Chain = N->getOperand(0);
1202  SDValue Op1;
1203  MemSDNode *Mem;
1204  bool IsLDG = true;
1205 
1206  // If this is an LDG intrinsic, the address is the third operand. If its an
1207  // LDG/LDU SD node (from custom vector handling), then its the second operand
1208  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1209  Op1 = N->getOperand(2);
1210  Mem = cast<MemIntrinsicSDNode>(N);
1211  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1212  switch (IID) {
1213  default:
1214  return false;
1215  case Intrinsic::nvvm_ldg_global_f:
1216  case Intrinsic::nvvm_ldg_global_i:
1217  case Intrinsic::nvvm_ldg_global_p:
1218  IsLDG = true;
1219  break;
1220  case Intrinsic::nvvm_ldu_global_f:
1221  case Intrinsic::nvvm_ldu_global_i:
1222  case Intrinsic::nvvm_ldu_global_p:
1223  IsLDG = false;
1224  break;
1225  }
1226  } else {
1227  Op1 = N->getOperand(1);
1228  Mem = cast<MemSDNode>(N);
1229  }
1230 
1231  Optional<unsigned> Opcode;
1232  SDLoc DL(N);
1233  SDNode *LD;
1234  SDValue Base, Offset, Addr;
1235 
1236  EVT EltVT = Mem->getMemoryVT();
1237  unsigned NumElts = 1;
1238  if (EltVT.isVector()) {
1239  NumElts = EltVT.getVectorNumElements();
1240  EltVT = EltVT.getVectorElementType();
1241  }
1242 
1243  // Build the "promoted" result VTList for the load. If we are really loading
1244  // i8s, then the return type will be promoted to i16 since we do not expose
1245  // 8-bit registers in NVPTX.
1246  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1247  SmallVector<EVT, 5> InstVTs;
1248  for (unsigned i = 0; i != NumElts; ++i) {
1249  InstVTs.push_back(NodeVT);
1250  }
1251  InstVTs.push_back(MVT::Other);
1252  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1253 
1254  if (SelectDirectAddr(Op1, Addr)) {
1255  switch (N->getOpcode()) {
1256  default:
1257  return false;
1259  if (IsLDG)
1260  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1261  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1262  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1263  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1264  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1265  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1266  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1267  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1268  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1269  else
1270  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1271  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1272  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1273  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1274  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1275  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1276  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1277  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1278  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1279  break;
1280  case NVPTXISD::LDGV2:
1281  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1282  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1283  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1284  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1285  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1286  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1287  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1288  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1289  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1290  break;
1291  case NVPTXISD::LDUV2:
1292  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1293  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1294  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1295  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1296  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1297  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1298  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1299  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1300  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1301  break;
1302  case NVPTXISD::LDGV4:
1303  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1304  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1305  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1306  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1307  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1308  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1309  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1310  break;
1311  case NVPTXISD::LDUV4:
1312  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1313  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1314  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1315  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1316  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1317  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1318  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1319  break;
1320  }
1321  if (!Opcode)
1322  return false;
1323  SDValue Ops[] = { Addr, Chain };
1324  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1325  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1326  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1327  if (TM.is64Bit()) {
1328  switch (N->getOpcode()) {
1329  default:
1330  return false;
1331  case ISD::LOAD:
1333  if (IsLDG)
1334  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1335  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1336  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1337  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1338  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1339  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1340  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1341  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1342  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1343  else
1344  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1346  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1347  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1348  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1349  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1350  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1351  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1352  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1353  break;
1354  case NVPTXISD::LoadV2:
1355  case NVPTXISD::LDGV2:
1356  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1357  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1358  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1359  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1360  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1361  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1362  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1363  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1364  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1365  break;
1366  case NVPTXISD::LDUV2:
1367  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1368  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1369  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1370  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1371  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1372  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1373  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1374  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1375  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1376  break;
1377  case NVPTXISD::LoadV4:
1378  case NVPTXISD::LDGV4:
1379  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1380  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1381  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1382  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1383  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1384  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1385  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1386  break;
1387  case NVPTXISD::LDUV4:
1388  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1390  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1391  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1392  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1393  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1394  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1395  break;
1396  }
1397  } else {
1398  switch (N->getOpcode()) {
1399  default:
1400  return false;
1401  case ISD::LOAD:
1403  if (IsLDG)
1404  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1405  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1406  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1407  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1408  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1409  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1410  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1411  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1412  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1413  else
1414  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1415  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1416  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1417  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1418  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1419  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1420  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1421  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1422  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1423  break;
1424  case NVPTXISD::LoadV2:
1425  case NVPTXISD::LDGV2:
1426  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1427  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1428  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1429  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1430  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1431  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1432  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1433  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1434  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1435  break;
1436  case NVPTXISD::LDUV2:
1437  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1438  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1439  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1440  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1441  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1442  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1443  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1444  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1445  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1446  break;
1447  case NVPTXISD::LoadV4:
1448  case NVPTXISD::LDGV4:
1449  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1450  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1451  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1452  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1453  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1454  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1455  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1456  break;
1457  case NVPTXISD::LDUV4:
1458  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1459  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1460  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1461  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1462  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1463  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1464  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1465  break;
1466  }
1467  }
1468  if (!Opcode)
1469  return false;
1470  SDValue Ops[] = {Base, Offset, Chain};
1471  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1472  } else {
1473  if (TM.is64Bit()) {
1474  switch (N->getOpcode()) {
1475  default:
1476  return false;
1477  case ISD::LOAD:
1479  if (IsLDG)
1480  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1481  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1482  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1483  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1484  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1485  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1486  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1487  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1488  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1489  else
1490  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1492  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1493  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1494  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1495  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1496  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1497  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1498  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1499  break;
1500  case NVPTXISD::LoadV2:
1501  case NVPTXISD::LDGV2:
1502  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1503  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1504  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1505  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1506  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1507  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1508  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1509  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1510  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1511  break;
1512  case NVPTXISD::LDUV2:
1513  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1514  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1515  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1516  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1517  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1518  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1519  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1520  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1521  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1522  break;
1523  case NVPTXISD::LoadV4:
1524  case NVPTXISD::LDGV4:
1525  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1526  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1527  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1528  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1529  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1530  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1531  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1532  break;
1533  case NVPTXISD::LDUV4:
1534  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1535  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1536  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1537  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1538  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1539  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1540  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1541  break;
1542  }
1543  } else {
1544  switch (N->getOpcode()) {
1545  default:
1546  return false;
1547  case ISD::LOAD:
1549  if (IsLDG)
1550  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1551  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1552  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1553  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1554  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1555  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1556  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1557  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1558  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1559  else
1560  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1561  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1562  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1563  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1564  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1565  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1566  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1567  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1568  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1569  break;
1570  case NVPTXISD::LoadV2:
1571  case NVPTXISD::LDGV2:
1572  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1573  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1574  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1575  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1576  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1577  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1578  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1579  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1580  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1581  break;
1582  case NVPTXISD::LDUV2:
1583  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1584  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1585  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1586  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1587  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1588  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1589  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1590  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1591  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1592  break;
1593  case NVPTXISD::LoadV4:
1594  case NVPTXISD::LDGV4:
1595  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1597  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1598  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1599  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1600  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1601  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1602  break;
1603  case NVPTXISD::LDUV4:
1604  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1605  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1606  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1607  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1608  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1609  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1610  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1611  break;
1612  }
1613  }
1614  if (!Opcode)
1615  return false;
1616  SDValue Ops[] = { Op1, Chain };
1617  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1618  }
1619 
1621  MemRefs0[0] = Mem->getMemOperand();
1622  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1623 
1624  // For automatic generation of LDG (through SelectLoad[Vector], not the
1625  // intrinsics), we may have an extending load like:
1626  //
1627  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1628  //
1629  // In this case, the matching logic above will select a load for the original
1630  // memory type (in this case, i8) and our types will not match (the node needs
1631  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1632  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1633  // CVT instruction. Ptxas should clean up any redundancies here.
1634 
1635  EVT OrigType = N->getValueType(0);
1636  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1637 
1638  if (OrigType != EltVT && LdNode) {
1639  // We have an extending-load. The instruction we selected operates on the
1640  // smaller type, but the SDNode we are replacing has the larger type. We
1641  // need to emit a CVT to make the types match.
1642  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1643  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1644  EltVT.getSimpleVT(), IsSigned);
1645 
1646  // For each output value, apply the manual sign/zero-extension and make sure
1647  // all users of the load go through that CVT.
1648  for (unsigned i = 0; i != NumElts; ++i) {
1649  SDValue Res(LD, i);
1650  SDValue OrigVal(N, i);
1651 
1652  SDNode *CvtNode =
1653  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1655  DL, MVT::i32));
1656  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1657  }
1658  }
1659 
1660  ReplaceNode(N, LD);
1661  return true;
1662 }
1663 
1664 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1665  SDLoc dl(N);
1666  StoreSDNode *ST = cast<StoreSDNode>(N);
1667  EVT StoreVT = ST->getMemoryVT();
1668  SDNode *NVPTXST = nullptr;
1669 
1670  // do not support pre/post inc/dec
1671  if (ST->isIndexed())
1672  return false;
1673 
1674  if (!StoreVT.isSimple())
1675  return false;
1676 
1677  // Address Space Setting
1678  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
1679 
1680  // Volatile Setting
1681  // - .volatile is only availalble for .global and .shared
1682  bool isVolatile = ST->isVolatile();
1683  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1684  codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1685  codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1686  isVolatile = false;
1687 
1688  // Vector Setting
1689  MVT SimpleVT = StoreVT.getSimpleVT();
1690  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1691 
1692  // Type Setting: toType + toTypeWidth
1693  // - for integer type, always use 'u'
1694  //
1695  MVT ScalarVT = SimpleVT.getScalarType();
1696  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1697  if (SimpleVT.isVector()) {
1698  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1699  // v2f16 is stored using st.b32
1700  toTypeWidth = 32;
1701  }
1702 
1703  unsigned int toType;
1704  if (ScalarVT.isFloatingPoint())
1705  // f16 uses .b16 as its storage type.
1706  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1708  else
1710 
1711  // Create the machine instruction DAG
1712  SDValue Chain = N->getOperand(0);
1713  SDValue N1 = N->getOperand(1);
1714  SDValue N2 = N->getOperand(2);
1715  SDValue Addr;
1716  SDValue Offset, Base;
1717  Optional<unsigned> Opcode;
1719 
1720  if (SelectDirectAddr(N2, Addr)) {
1721  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1722  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1723  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1724  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1725  if (!Opcode)
1726  return false;
1727  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1728  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1729  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1730  Chain };
1731  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1732  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1733  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1734  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1735  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1736  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1737  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1738  if (!Opcode)
1739  return false;
1740  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1741  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1742  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1743  Offset, Chain };
1744  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1745  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1746  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1747  if (TM.is64Bit())
1748  Opcode = pickOpcodeForVT(
1749  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1750  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1751  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1752  else
1753  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1754  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1755  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1756  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1757  if (!Opcode)
1758  return false;
1759 
1760  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1761  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1762  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1763  Offset, Chain };
1764  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1765  } else {
1766  if (TM.is64Bit())
1767  Opcode =
1768  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1769  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1770  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1771  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1772  else
1773  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1774  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1775  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1776  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1777  if (!Opcode)
1778  return false;
1779  SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1780  getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1781  getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1782  Chain };
1783  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1784  }
1785 
1786  if (!NVPTXST)
1787  return false;
1788 
1790  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1791  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1792  ReplaceNode(N, NVPTXST);
1793  return true;
1794 }
1795 
1796 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1797  SDValue Chain = N->getOperand(0);
1798  SDValue Op1 = N->getOperand(1);
1799  SDValue Addr, Offset, Base;
1800  Optional<unsigned> Opcode;
1801  SDLoc DL(N);
1802  SDNode *ST;
1803  EVT EltVT = Op1.getValueType();
1804  MemSDNode *MemSD = cast<MemSDNode>(N);
1805  EVT StoreVT = MemSD->getMemoryVT();
1806 
1807  // Address Space Setting
1808  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1809 
1810  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1811  report_fatal_error("Cannot store to pointer that points to constant "
1812  "memory space");
1813  }
1814 
1815  // Volatile Setting
1816  // - .volatile is only availalble for .global and .shared
1817  bool IsVolatile = MemSD->isVolatile();
1818  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1819  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1820  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1821  IsVolatile = false;
1822 
1823  // Type Setting: toType + toTypeWidth
1824  // - for integer type, always use 'u'
1825  assert(StoreVT.isSimple() && "Store value is not simple");
1826  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1827  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1828  unsigned ToType;
1829  if (ScalarVT.isFloatingPoint())
1830  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1832  else
1834 
1836  SDValue N2;
1837  unsigned VecType;
1838 
1839  switch (N->getOpcode()) {
1840  case NVPTXISD::StoreV2:
1842  StOps.push_back(N->getOperand(1));
1843  StOps.push_back(N->getOperand(2));
1844  N2 = N->getOperand(3);
1845  break;
1846  case NVPTXISD::StoreV4:
1848  StOps.push_back(N->getOperand(1));
1849  StOps.push_back(N->getOperand(2));
1850  StOps.push_back(N->getOperand(3));
1851  StOps.push_back(N->getOperand(4));
1852  N2 = N->getOperand(5);
1853  break;
1854  default:
1855  return false;
1856  }
1857 
1858  // v8f16 is a special case. PTX doesn't have st.v8.f16
1859  // instruction. Instead, we split the vector into v2f16 chunks and
1860  // store them with st.v4.b32.
1861  if (EltVT == MVT::v2f16) {
1862  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1863  EltVT = MVT::i32;
1865  ToTypeWidth = 32;
1866  }
1867 
1868  StOps.push_back(getI32Imm(IsVolatile, DL));
1869  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1870  StOps.push_back(getI32Imm(VecType, DL));
1871  StOps.push_back(getI32Imm(ToType, DL));
1872  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1873 
1874  if (SelectDirectAddr(N2, Addr)) {
1875  switch (N->getOpcode()) {
1876  default:
1877  return false;
1878  case NVPTXISD::StoreV2:
1879  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1880  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1881  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1882  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1883  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1884  break;
1885  case NVPTXISD::StoreV4:
1886  Opcode =
1887  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1888  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1889  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1890  NVPTX::STV_f32_v4_avar, None);
1891  break;
1892  }
1893  StOps.push_back(Addr);
1894  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1895  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1896  switch (N->getOpcode()) {
1897  default:
1898  return false;
1899  case NVPTXISD::StoreV2:
1900  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1902  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1903  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1904  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1905  break;
1906  case NVPTXISD::StoreV4:
1907  Opcode =
1908  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1909  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1910  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1911  NVPTX::STV_f32_v4_asi, None);
1912  break;
1913  }
1914  StOps.push_back(Base);
1915  StOps.push_back(Offset);
1916  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1917  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1918  if (TM.is64Bit()) {
1919  switch (N->getOpcode()) {
1920  default:
1921  return false;
1922  case NVPTXISD::StoreV2:
1923  Opcode = pickOpcodeForVT(
1924  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1925  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1926  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1927  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1928  NVPTX::STV_f64_v2_ari_64);
1929  break;
1930  case NVPTXISD::StoreV4:
1931  Opcode = pickOpcodeForVT(
1932  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1933  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1934  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1935  NVPTX::STV_f32_v4_ari_64, None);
1936  break;
1937  }
1938  } else {
1939  switch (N->getOpcode()) {
1940  default:
1941  return false;
1942  case NVPTXISD::StoreV2:
1943  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1944  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1945  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1946  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1947  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1948  break;
1949  case NVPTXISD::StoreV4:
1950  Opcode =
1951  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1952  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1953  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1954  NVPTX::STV_f32_v4_ari, None);
1955  break;
1956  }
1957  }
1958  StOps.push_back(Base);
1959  StOps.push_back(Offset);
1960  } else {
1961  if (TM.is64Bit()) {
1962  switch (N->getOpcode()) {
1963  default:
1964  return false;
1965  case NVPTXISD::StoreV2:
1966  Opcode = pickOpcodeForVT(
1967  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1968  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1969  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1970  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1971  NVPTX::STV_f64_v2_areg_64);
1972  break;
1973  case NVPTXISD::StoreV4:
1974  Opcode = pickOpcodeForVT(
1975  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1976  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1977  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1978  NVPTX::STV_f32_v4_areg_64, None);
1979  break;
1980  }
1981  } else {
1982  switch (N->getOpcode()) {
1983  default:
1984  return false;
1985  case NVPTXISD::StoreV2:
1986  Opcode =
1987  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1988  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1989  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
1990  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
1991  NVPTX::STV_f64_v2_areg);
1992  break;
1993  case NVPTXISD::StoreV4:
1994  Opcode =
1995  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
1996  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
1997  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
1998  NVPTX::STV_f32_v4_areg, None);
1999  break;
2000  }
2001  }
2002  StOps.push_back(N2);
2003  }
2004 
2005  if (!Opcode)
2006  return false;
2007 
2008  StOps.push_back(Chain);
2009 
2010  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2011 
2013  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2014  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2015 
2016  ReplaceNode(N, ST);
2017  return true;
2018 }
2019 
2020 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2021  SDValue Chain = Node->getOperand(0);
2022  SDValue Offset = Node->getOperand(2);
2023  SDValue Flag = Node->getOperand(3);
2024  SDLoc DL(Node);
2025  MemSDNode *Mem = cast<MemSDNode>(Node);
2026 
2027  unsigned VecSize;
2028  switch (Node->getOpcode()) {
2029  default:
2030  return false;
2031  case NVPTXISD::LoadParam:
2032  VecSize = 1;
2033  break;
2034  case NVPTXISD::LoadParamV2:
2035  VecSize = 2;
2036  break;
2037  case NVPTXISD::LoadParamV4:
2038  VecSize = 4;
2039  break;
2040  }
2041 
2042  EVT EltVT = Node->getValueType(0);
2043  EVT MemVT = Mem->getMemoryVT();
2044 
2045  Optional<unsigned> Opcode;
2046 
2047  switch (VecSize) {
2048  default:
2049  return false;
2050  case 1:
2051  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2052  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2053  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2054  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2055  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2056  break;
2057  case 2:
2058  Opcode =
2059  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2060  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2061  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2062  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2063  NVPTX::LoadParamMemV2F64);
2064  break;
2065  case 4:
2066  Opcode = pickOpcodeForVT(
2067  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2069  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2070  NVPTX::LoadParamMemV4F32, None);
2071  break;
2072  }
2073  if (!Opcode)
2074  return false;
2075 
2076  SDVTList VTs;
2077  if (VecSize == 1) {
2078  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2079  } else if (VecSize == 2) {
2080  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2081  } else {
2082  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2083  VTs = CurDAG->getVTList(EVTs);
2084  }
2085 
2086  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2087 
2089  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2090  Ops.push_back(Chain);
2091  Ops.push_back(Flag);
2092 
2093  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2094  return true;
2095 }
2096 
2097 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2098  SDLoc DL(N);
2099  SDValue Chain = N->getOperand(0);
2100  SDValue Offset = N->getOperand(1);
2101  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2102  MemSDNode *Mem = cast<MemSDNode>(N);
2103 
2104  // How many elements do we have?
2105  unsigned NumElts = 1;
2106  switch (N->getOpcode()) {
2107  default:
2108  return false;
2109  case NVPTXISD::StoreRetval:
2110  NumElts = 1;
2111  break;
2113  NumElts = 2;
2114  break;
2116  NumElts = 4;
2117  break;
2118  }
2119 
2120  // Build vector of operands
2122  for (unsigned i = 0; i < NumElts; ++i)
2123  Ops.push_back(N->getOperand(i + 2));
2124  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2125  Ops.push_back(Chain);
2126 
2127  // Determine target opcode
2128  // If we have an i1, use an 8-bit store. The lowering code in
2129  // NVPTXISelLowering will have already emitted an upcast.
2130  Optional<unsigned> Opcode = 0;
2131  switch (NumElts) {
2132  default:
2133  return false;
2134  case 1:
2135  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2136  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2137  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2138  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2139  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2140  break;
2141  case 2:
2142  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2143  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2144  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2145  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2146  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2147  break;
2148  case 4:
2149  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2150  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2151  NVPTX::StoreRetvalV4I32, None,
2152  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2153  NVPTX::StoreRetvalV4F32, None);
2154  break;
2155  }
2156  if (!Opcode)
2157  return false;
2158 
2159  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2161  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2162  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2163 
2164  ReplaceNode(N, Ret);
2165  return true;
2166 }
2167 
2168 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2169  SDLoc DL(N);
2170  SDValue Chain = N->getOperand(0);
2171  SDValue Param = N->getOperand(1);
2172  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2173  SDValue Offset = N->getOperand(2);
2174  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175  MemSDNode *Mem = cast<MemSDNode>(N);
2176  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2177 
2178  // How many elements do we have?
2179  unsigned NumElts = 1;
2180  switch (N->getOpcode()) {
2181  default:
2182  return false;
2185  case NVPTXISD::StoreParam:
2186  NumElts = 1;
2187  break;
2189  NumElts = 2;
2190  break;
2192  NumElts = 4;
2193  break;
2194  }
2195 
2196  // Build vector of operands
2198  for (unsigned i = 0; i < NumElts; ++i)
2199  Ops.push_back(N->getOperand(i + 3));
2200  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2201  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2202  Ops.push_back(Chain);
2203  Ops.push_back(Flag);
2204 
2205  // Determine target opcode
2206  // If we have an i1, use an 8-bit store. The lowering code in
2207  // NVPTXISelLowering will have already emitted an upcast.
2208  Optional<unsigned> Opcode = 0;
2209  switch (N->getOpcode()) {
2210  default:
2211  switch (NumElts) {
2212  default:
2213  return false;
2214  case 1:
2215  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2216  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2217  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2218  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2219  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2220  break;
2221  case 2:
2222  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2224  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2225  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2226  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2227  break;
2228  case 4:
2229  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2230  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2231  NVPTX::StoreParamV4I32, None,
2232  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2233  NVPTX::StoreParamV4F32, None);
2234  break;
2235  }
2236  if (!Opcode)
2237  return false;
2238  break;
2239  // Special case: if we have a sign-extend/zero-extend node, insert the
2240  // conversion instruction first, and use that as the value operand to
2241  // the selected StoreParam node.
2242  case NVPTXISD::StoreParamU32: {
2243  Opcode = NVPTX::StoreParamI32;
2245  MVT::i32);
2246  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2247  MVT::i32, Ops[0], CvtNone);
2248  Ops[0] = SDValue(Cvt, 0);
2249  break;
2250  }
2251  case NVPTXISD::StoreParamS32: {
2252  Opcode = NVPTX::StoreParamI32;
2254  MVT::i32);
2255  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2256  MVT::i32, Ops[0], CvtNone);
2257  Ops[0] = SDValue(Cvt, 0);
2258  break;
2259  }
2260  }
2261 
2263  SDNode *Ret =
2264  CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2266  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2267  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2268 
2269  ReplaceNode(N, Ret);
2270  return true;
2271 }
2272 
2273 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2274  unsigned Opc = 0;
2275 
2276  switch (N->getOpcode()) {
2277  default: return false;
2279  Opc = NVPTX::TEX_1D_F32_S32;
2280  break;
2282  Opc = NVPTX::TEX_1D_F32_F32;
2283  break;
2285  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2286  break;
2288  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2289  break;
2290  case NVPTXISD::Tex1DS32S32:
2291  Opc = NVPTX::TEX_1D_S32_S32;
2292  break;
2294  Opc = NVPTX::TEX_1D_S32_F32;
2295  break;
2297  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2298  break;
2300  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2301  break;
2302  case NVPTXISD::Tex1DU32S32:
2303  Opc = NVPTX::TEX_1D_U32_S32;
2304  break;
2306  Opc = NVPTX::TEX_1D_U32_F32;
2307  break;
2309  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2310  break;
2312  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2313  break;
2315  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2316  break;
2318  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2319  break;
2321  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2322  break;
2324  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2325  break;
2327  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2328  break;
2330  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2331  break;
2333  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2334  break;
2336  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2337  break;
2339  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2340  break;
2342  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2343  break;
2345  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2346  break;
2348  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2349  break;
2351  Opc = NVPTX::TEX_2D_F32_S32;
2352  break;
2354  Opc = NVPTX::TEX_2D_F32_F32;
2355  break;
2357  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2358  break;
2360  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2361  break;
2362  case NVPTXISD::Tex2DS32S32:
2363  Opc = NVPTX::TEX_2D_S32_S32;
2364  break;
2366  Opc = NVPTX::TEX_2D_S32_F32;
2367  break;
2369  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2370  break;
2372  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2373  break;
2374  case NVPTXISD::Tex2DU32S32:
2375  Opc = NVPTX::TEX_2D_U32_S32;
2376  break;
2378  Opc = NVPTX::TEX_2D_U32_F32;
2379  break;
2381  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2382  break;
2384  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2385  break;
2387  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2388  break;
2390  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2391  break;
2393  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2394  break;
2396  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2397  break;
2399  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2400  break;
2402  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2403  break;
2405  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2406  break;
2408  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2409  break;
2411  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2412  break;
2414  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2415  break;
2417  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2418  break;
2420  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2421  break;
2423  Opc = NVPTX::TEX_3D_F32_S32;
2424  break;
2426  Opc = NVPTX::TEX_3D_F32_F32;
2427  break;
2429  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2430  break;
2432  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2433  break;
2434  case NVPTXISD::Tex3DS32S32:
2435  Opc = NVPTX::TEX_3D_S32_S32;
2436  break;
2438  Opc = NVPTX::TEX_3D_S32_F32;
2439  break;
2441  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2442  break;
2444  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2445  break;
2446  case NVPTXISD::Tex3DU32S32:
2447  Opc = NVPTX::TEX_3D_U32_S32;
2448  break;
2450  Opc = NVPTX::TEX_3D_U32_F32;
2451  break;
2453  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2454  break;
2456  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2457  break;
2459  Opc = NVPTX::TEX_CUBE_F32_F32;
2460  break;
2462  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2463  break;
2465  Opc = NVPTX::TEX_CUBE_S32_F32;
2466  break;
2468  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2469  break;
2471  Opc = NVPTX::TEX_CUBE_U32_F32;
2472  break;
2474  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2475  break;
2477  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2478  break;
2480  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2481  break;
2483  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2484  break;
2486  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2487  break;
2489  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2490  break;
2492  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2493  break;
2495  Opc = NVPTX::TLD4_R_2D_F32_F32;
2496  break;
2498  Opc = NVPTX::TLD4_G_2D_F32_F32;
2499  break;
2501  Opc = NVPTX::TLD4_B_2D_F32_F32;
2502  break;
2504  Opc = NVPTX::TLD4_A_2D_F32_F32;
2505  break;
2507  Opc = NVPTX::TLD4_R_2D_S32_F32;
2508  break;
2510  Opc = NVPTX::TLD4_G_2D_S32_F32;
2511  break;
2513  Opc = NVPTX::TLD4_B_2D_S32_F32;
2514  break;
2516  Opc = NVPTX::TLD4_A_2D_S32_F32;
2517  break;
2519  Opc = NVPTX::TLD4_R_2D_U32_F32;
2520  break;
2522  Opc = NVPTX::TLD4_G_2D_U32_F32;
2523  break;
2525  Opc = NVPTX::TLD4_B_2D_U32_F32;
2526  break;
2528  Opc = NVPTX::TLD4_A_2D_U32_F32;
2529  break;
2531  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2532  break;
2534  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2535  break;
2537  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2538  break;
2540  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2541  break;
2543  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2544  break;
2546  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2547  break;
2549  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2550  break;
2552  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2553  break;
2555  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2556  break;
2558  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2559  break;
2561  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2562  break;
2564  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2565  break;
2567  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2568  break;
2570  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2571  break;
2573  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2574  break;
2576  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2577  break;
2579  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2580  break;
2582  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2583  break;
2585  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2586  break;
2588  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2589  break;
2591  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2592  break;
2594  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2595  break;
2597  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2598  break;
2600  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2601  break;
2603  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2604  break;
2606  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2607  break;
2609  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2610  break;
2612  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2613  break;
2615  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2616  break;
2618  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2619  break;
2621  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2622  break;
2624  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2625  break;
2627  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2628  break;
2630  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2631  break;
2633  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2634  break;
2636  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2637  break;
2639  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2640  break;
2642  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2643  break;
2645  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2646  break;
2648  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2649  break;
2651  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2652  break;
2654  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2655  break;
2657  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2658  break;
2660  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2661  break;
2663  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2664  break;
2666  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2667  break;
2669  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2670  break;
2672  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2673  break;
2675  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2676  break;
2678  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2679  break;
2681  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2682  break;
2684  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2685  break;
2687  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2688  break;
2690  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2691  break;
2693  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2694  break;
2696  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2697  break;
2699  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2700  break;
2702  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2703  break;
2705  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2706  break;
2708  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2709  break;
2711  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2712  break;
2714  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2715  break;
2717  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2718  break;
2720  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2721  break;
2723  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2724  break;
2726  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2727  break;
2729  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2730  break;
2732  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2733  break;
2735  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2736  break;
2738  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2739  break;
2741  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2742  break;
2744  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2745  break;
2747  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2748  break;
2750  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2751  break;
2753  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2754  break;
2756  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2757  break;
2759  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2760  break;
2762  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2763  break;
2765  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2766  break;
2768  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2769  break;
2771  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2772  break;
2774  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2775  break;
2777  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2778  break;
2780  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2781  break;
2782  }
2783 
2784  // Copy over operands
2785  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2786  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2787 
2788  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2789  return true;
2790 }
2791 
2792 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2793  unsigned Opc = 0;
2794  switch (N->getOpcode()) {
2795  default: return false;
2797  Opc = NVPTX::SULD_1D_I8_CLAMP;
2798  break;
2800  Opc = NVPTX::SULD_1D_I16_CLAMP;
2801  break;
2803  Opc = NVPTX::SULD_1D_I32_CLAMP;
2804  break;
2806  Opc = NVPTX::SULD_1D_I64_CLAMP;
2807  break;
2809  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2810  break;
2812  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2813  break;
2815  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2816  break;
2818  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2819  break;
2821  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2822  break;
2824  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2825  break;
2827  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2828  break;
2830  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2831  break;
2833  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2834  break;
2836  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2837  break;
2839  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2840  break;
2842  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2843  break;
2845  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2846  break;
2848  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2849  break;
2851  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2852  break;
2854  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2855  break;
2857  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2858  break;
2860  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2861  break;
2863  Opc = NVPTX::SULD_2D_I8_CLAMP;
2864  break;
2866  Opc = NVPTX::SULD_2D_I16_CLAMP;
2867  break;
2869  Opc = NVPTX::SULD_2D_I32_CLAMP;
2870  break;
2872  Opc = NVPTX::SULD_2D_I64_CLAMP;
2873  break;
2875  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2876  break;
2878  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2879  break;
2881  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2882  break;
2884  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2885  break;
2887  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2888  break;
2890  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2891  break;
2893  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2894  break;
2896  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2897  break;
2899  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2900  break;
2902  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2903  break;
2905  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2906  break;
2908  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2909  break;
2911  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2912  break;
2914  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2915  break;
2917  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2918  break;
2920  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2921  break;
2923  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2924  break;
2926  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
2927  break;
2929  Opc = NVPTX::SULD_3D_I8_CLAMP;
2930  break;
2932  Opc = NVPTX::SULD_3D_I16_CLAMP;
2933  break;
2935  Opc = NVPTX::SULD_3D_I32_CLAMP;
2936  break;
2938  Opc = NVPTX::SULD_3D_I64_CLAMP;
2939  break;
2941  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
2942  break;
2944  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
2945  break;
2947  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
2948  break;
2950  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
2951  break;
2953  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
2954  break;
2956  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
2957  break;
2959  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
2960  break;
2962  Opc = NVPTX::SULD_1D_I8_TRAP;
2963  break;
2965  Opc = NVPTX::SULD_1D_I16_TRAP;
2966  break;
2968  Opc = NVPTX::SULD_1D_I32_TRAP;
2969  break;
2971  Opc = NVPTX::SULD_1D_I64_TRAP;
2972  break;
2974  Opc = NVPTX::SULD_1D_V2I8_TRAP;
2975  break;
2977  Opc = NVPTX::SULD_1D_V2I16_TRAP;
2978  break;
2980  Opc = NVPTX::SULD_1D_V2I32_TRAP;
2981  break;
2983  Opc = NVPTX::SULD_1D_V2I64_TRAP;
2984  break;
2986  Opc = NVPTX::SULD_1D_V4I8_TRAP;
2987  break;
2989  Opc = NVPTX::SULD_1D_V4I16_TRAP;
2990  break;
2992  Opc = NVPTX::SULD_1D_V4I32_TRAP;
2993  break;
2995  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2996  break;
2998  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2999  break;
3001  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3002  break;
3004  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3005  break;
3007  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3008  break;
3010  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3011  break;
3013  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3014  break;
3016  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3017  break;
3019  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3020  break;
3022  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3023  break;
3025  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3026  break;
3028  Opc = NVPTX::SULD_2D_I8_TRAP;
3029  break;
3031  Opc = NVPTX::SULD_2D_I16_TRAP;
3032  break;
3034  Opc = NVPTX::SULD_2D_I32_TRAP;
3035  break;
3037  Opc = NVPTX::SULD_2D_I64_TRAP;
3038  break;
3040  Opc = NVPTX::SULD_2D_V2I8_TRAP;
3041  break;
3043  Opc = NVPTX::SULD_2D_V2I16_TRAP;
3044  break;
3046  Opc = NVPTX::SULD_2D_V2I32_TRAP;
3047  break;
3049  Opc = NVPTX::SULD_2D_V2I64_TRAP;
3050  break;
3052  Opc = NVPTX::SULD_2D_V4I8_TRAP;
3053  break;
3055  Opc = NVPTX::SULD_2D_V4I16_TRAP;
3056  break;
3058  Opc = NVPTX::SULD_2D_V4I32_TRAP;
3059  break;
3061  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3062  break;
3064  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3065  break;
3067  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3068  break;
3070  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3071  break;
3073  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3074  break;
3076  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3077  break;
3079  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3080  break;
3082  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3083  break;
3085  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3086  break;
3088  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3089  break;
3091  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3092  break;
3094  Opc = NVPTX::SULD_3D_I8_TRAP;
3095  break;
3097  Opc = NVPTX::SULD_3D_I16_TRAP;
3098  break;
3100  Opc = NVPTX::SULD_3D_I32_TRAP;
3101  break;
3103  Opc = NVPTX::SULD_3D_I64_TRAP;
3104  break;
3106  Opc = NVPTX::SULD_3D_V2I8_TRAP;
3107  break;
3109  Opc = NVPTX::SULD_3D_V2I16_TRAP;
3110  break;
3112  Opc = NVPTX::SULD_3D_V2I32_TRAP;
3113  break;
3115  Opc = NVPTX::SULD_3D_V2I64_TRAP;
3116  break;
3118  Opc = NVPTX::SULD_3D_V4I8_TRAP;
3119  break;
3121  Opc = NVPTX::SULD_3D_V4I16_TRAP;
3122  break;
3124  Opc = NVPTX::SULD_3D_V4I32_TRAP;
3125  break;
3127  Opc = NVPTX::SULD_1D_I8_ZERO;
3128  break;
3130  Opc = NVPTX::SULD_1D_I16_ZERO;
3131  break;
3133  Opc = NVPTX::SULD_1D_I32_ZERO;
3134  break;
3136  Opc = NVPTX::SULD_1D_I64_ZERO;
3137  break;
3139  Opc = NVPTX::SULD_1D_V2I8_ZERO;
3140  break;
3142  Opc = NVPTX::SULD_1D_V2I16_ZERO;
3143  break;
3145  Opc = NVPTX::SULD_1D_V2I32_ZERO;
3146  break;
3148  Opc = NVPTX::SULD_1D_V2I64_ZERO;
3149  break;
3151  Opc = NVPTX::SULD_1D_V4I8_ZERO;
3152  break;
3154  Opc = NVPTX::SULD_1D_V4I16_ZERO;
3155  break;
3157  Opc = NVPTX::SULD_1D_V4I32_ZERO;
3158  break;
3160  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3161  break;
3163  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3164  break;
3166  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3167  break;
3169  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3170  break;
3172  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3173  break;
3175  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3176  break;
3178  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3179  break;
3181  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3182  break;
3184  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3185  break;
3187  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3188  break;
3190  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3191  break;
3193  Opc = NVPTX::SULD_2D_I8_ZERO;
3194  break;
3196  Opc = NVPTX::SULD_2D_I16_ZERO;
3197  break;
3199  Opc = NVPTX::SULD_2D_I32_ZERO;
3200  break;
3202  Opc = NVPTX::SULD_2D_I64_ZERO;
3203  break;
3205  Opc = NVPTX::SULD_2D_V2I8_ZERO;
3206  break;
3208  Opc = NVPTX::SULD_2D_V2I16_ZERO;
3209  break;
3211  Opc = NVPTX::SULD_2D_V2I32_ZERO;
3212  break;
3214  Opc = NVPTX::SULD_2D_V2I64_ZERO;
3215  break;
3217  Opc = NVPTX::SULD_2D_V4I8_ZERO;
3218  break;
3220  Opc = NVPTX::SULD_2D_V4I16_ZERO;
3221  break;
3223  Opc = NVPTX::SULD_2D_V4I32_ZERO;
3224  break;
3226  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3227  break;
3229  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3230  break;
3232  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3233  break;
3235  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3236  break;
3238  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3239  break;
3241  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3242  break;
3244  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3245  break;
3247  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3248  break;
3250  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3251  break;
3253  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3254  break;
3256  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3257  break;
3259  Opc = NVPTX::SULD_3D_I8_ZERO;
3260  break;
3262  Opc = NVPTX::SULD_3D_I16_ZERO;
3263  break;
3265  Opc = NVPTX::SULD_3D_I32_ZERO;
3266  break;
3268  Opc = NVPTX::SULD_3D_I64_ZERO;
3269  break;
3271  Opc = NVPTX::SULD_3D_V2I8_ZERO;
3272  break;
3274  Opc = NVPTX::SULD_3D_V2I16_ZERO;
3275  break;
3277  Opc = NVPTX::SULD_3D_V2I32_ZERO;
3278  break;
3280  Opc = NVPTX::SULD_3D_V2I64_ZERO;
3281  break;
3283  Opc = NVPTX::SULD_3D_V4I8_ZERO;
3284  break;
3286  Opc = NVPTX::SULD_3D_V4I16_ZERO;
3287  break;
3289  Opc = NVPTX::SULD_3D_V4I32_ZERO;
3290  break;
3291  }
3292 
3293  // Copy over operands
3294  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3295  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3296 
3297  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3298  return true;
3299 }
3300 
3301 
3302 /// SelectBFE - Look for instruction sequences that can be made more efficient
3303 /// by using the 'bfe' (bit-field extract) PTX instruction
3304 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3305  SDLoc DL(N);
3306  SDValue LHS = N->getOperand(0);
3307  SDValue RHS = N->getOperand(1);
3308  SDValue Len;
3309  SDValue Start;
3310  SDValue Val;
3311  bool IsSigned = false;
3312 
3313  if (N->getOpcode() == ISD::AND) {
3314  // Canonicalize the operands
3315  // We want 'and %val, %mask'
3316  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3317  std::swap(LHS, RHS);
3318  }
3319 
3321  if (!Mask) {
3322  // We need a constant mask on the RHS of the AND
3323  return false;
3324  }
3325 
3326  // Extract the mask bits
3327  uint64_t MaskVal = Mask->getZExtValue();
3328  if (!isMask_64(MaskVal)) {
3329  // We *could* handle shifted masks here, but doing so would require an
3330  // 'and' operation to fix up the low-order bits so we would trade
3331  // shr+and for bfe+and, which has the same throughput
3332  return false;
3333  }
3334 
3335  // How many bits are in our mask?
3336  uint64_t NumBits = countTrailingOnes(MaskVal);
3337  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3338 
3339  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3340  // We have a 'srl/and' pair, extract the effective start bit and length
3341  Val = LHS.getNode()->getOperand(0);
3342  Start = LHS.getNode()->getOperand(1);
3343  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3344  if (StartConst) {
3345  uint64_t StartVal = StartConst->getZExtValue();
3346  // How many "good" bits do we have left? "good" is defined here as bits
3347  // that exist in the original value, not shifted in.
3348  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3349  if (NumBits > GoodBits) {
3350  // Do not handle the case where bits have been shifted in. In theory
3351  // we could handle this, but the cost is likely higher than just
3352  // emitting the srl/and pair.
3353  return false;
3354  }
3355  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3356  } else {
3357  // Do not handle the case where the shift amount (can be zero if no srl
3358  // was found) is not constant. We could handle this case, but it would
3359  // require run-time logic that would be more expensive than just
3360  // emitting the srl/and pair.
3361  return false;
3362  }
3363  } else {
3364  // Do not handle the case where the LHS of the and is not a shift. While
3365  // it would be trivial to handle this case, it would just transform
3366  // 'and' -> 'bfe', but 'and' has higher-throughput.
3367  return false;
3368  }
3369  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3370  if (LHS->getOpcode() == ISD::AND) {
3371  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3372  if (!ShiftCnst) {
3373  // Shift amount must be constant
3374  return false;
3375  }
3376 
3377  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3378 
3379  SDValue AndLHS = LHS->getOperand(0);
3380  SDValue AndRHS = LHS->getOperand(1);
3381 
3382  // Canonicalize the AND to have the mask on the RHS
3383  if (isa<ConstantSDNode>(AndLHS)) {
3384  std::swap(AndLHS, AndRHS);
3385  }
3386 
3387  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3388  if (!MaskCnst) {
3389  // Mask must be constant
3390  return false;
3391  }
3392 
3393  uint64_t MaskVal = MaskCnst->getZExtValue();
3394  uint64_t NumZeros;
3395  uint64_t NumBits;
3396  if (isMask_64(MaskVal)) {
3397  NumZeros = 0;
3398  // The number of bits in the result bitfield will be the number of
3399  // trailing ones (the AND) minus the number of bits we shift off
3400  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3401  } else if (isShiftedMask_64(MaskVal)) {
3402  NumZeros = countTrailingZeros(MaskVal);
3403  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3404  // The number of bits in the result bitfield will be the number of
3405  // trailing zeros plus the number of set bits in the mask minus the
3406  // number of bits we shift off
3407  NumBits = NumZeros + NumOnes - ShiftAmt;
3408  } else {
3409  // This is not a mask we can handle
3410  return false;
3411  }
3412 
3413  if (ShiftAmt < NumZeros) {
3414  // Handling this case would require extra logic that would make this
3415  // transformation non-profitable
3416  return false;
3417  }
3418 
3419  Val = AndLHS;
3420  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3421  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3422  } else if (LHS->getOpcode() == ISD::SHL) {
3423  // Here, we have a pattern like:
3424  //
3425  // (sra (shl val, NN), MM)
3426  // or
3427  // (srl (shl val, NN), MM)
3428  //
3429  // If MM >= NN, we can efficiently optimize this with bfe
3430  Val = LHS->getOperand(0);
3431 
3432  SDValue ShlRHS = LHS->getOperand(1);
3433  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3434  if (!ShlCnst) {
3435  // Shift amount must be constant
3436  return false;
3437  }
3438  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3439 
3440  SDValue ShrRHS = RHS;
3441  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3442  if (!ShrCnst) {
3443  // Shift amount must be constant
3444  return false;
3445  }
3446  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3447 
3448  // To avoid extra codegen and be profitable, we need Outer >= Inner
3449  if (OuterShiftAmt < InnerShiftAmt) {
3450  return false;
3451  }
3452 
3453  // If the outer shift is more than the type size, we have no bitfield to
3454  // extract (since we also check that the inner shift is <= the outer shift
3455  // then this also implies that the inner shift is < the type size)
3456  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3457  return false;
3458  }
3459 
3460  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3461  MVT::i32);
3462  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3463  DL, MVT::i32);
3464 
3465  if (N->getOpcode() == ISD::SRA) {
3466  // If we have a arithmetic right shift, we need to use the signed bfe
3467  // variant
3468  IsSigned = true;
3469  }
3470  } else {
3471  // No can do...
3472  return false;
3473  }
3474  } else {
3475  // No can do...
3476  return false;
3477  }
3478 
3479 
3480  unsigned Opc;
3481  // For the BFE operations we form here from "and" and "srl", always use the
3482  // unsigned variants.
3483  if (Val.getValueType() == MVT::i32) {
3484  if (IsSigned) {
3485  Opc = NVPTX::BFE_S32rii;
3486  } else {
3487  Opc = NVPTX::BFE_U32rii;
3488  }
3489  } else if (Val.getValueType() == MVT::i64) {
3490  if (IsSigned) {
3491  Opc = NVPTX::BFE_S64rii;
3492  } else {
3493  Opc = NVPTX::BFE_U64rii;
3494  }
3495  } else {
3496  // We cannot handle this type
3497  return false;
3498  }
3499 
3500  SDValue Ops[] = {
3501  Val, Start, Len
3502  };
3503 
3504  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3505  return true;
3506 }
3507 
3508 // SelectDirectAddr - Match a direct address for DAG.
3509 // A direct address could be a globaladdress or externalsymbol.
3510 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3511  // Return true if TGA or ES.
3512  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3514  Address = N;
3515  return true;
3516  }
3517  if (N.getOpcode() == NVPTXISD::Wrapper) {
3518  Address = N.getOperand(0);
3519  return true;
3520  }
3521  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3522  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3523  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3524  CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3525  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3526  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3527  }
3528  return false;
3529 }
3530 
3531 // symbol+offset
3532 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3533  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3534  if (Addr.getOpcode() == ISD::ADD) {
3535  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3536  SDValue base = Addr.getOperand(0);
3537  if (SelectDirectAddr(base, Base)) {
3538  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3539  mvt);
3540  return true;
3541  }
3542  }
3543  }
3544  return false;
3545 }
3546 
3547 // symbol+offset
3548 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3549  SDValue &Base, SDValue &Offset) {
3550  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3551 }
3552 
3553 // symbol+offset
3554 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3555  SDValue &Base, SDValue &Offset) {
3556  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3557 }
3558 
3559 // register+offset
3560 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3561  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3562  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3563  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3564  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3565  return true;
3566  }
3567  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3569  return false; // direct calls.
3570 
3571  if (Addr.getOpcode() == ISD::ADD) {
3572  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3573  return false;
3574  }
3575  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3576  if (FrameIndexSDNode *FIN =
3577  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3578  // Constant offset from frame ref.
3579  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3580  else
3581  Base = Addr.getOperand(0);
3582  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3583  mvt);
3584  return true;
3585  }
3586  }
3587  return false;
3588 }
3589 
3590 // register+offset
3591 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3592  SDValue &Base, SDValue &Offset) {
3593  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3594 }
3595 
3596 // register+offset
3597 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3598  SDValue &Base, SDValue &Offset) {
3599  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3600 }
3601 
3602 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3603  unsigned int spN) const {
3604  const Value *Src = nullptr;
3605  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3606  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3607  return true;
3608  Src = mN->getMemOperand()->getValue();
3609  }
3610  if (!Src)
3611  return false;
3612  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3613  return (PT->getAddressSpace() == spN);
3614  return false;
3615 }
3616 
3617 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3618 /// inline asm expressions.
3620  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3621  SDValue Op0, Op1;
3622  switch (ConstraintID) {
3623  default:
3624  return true;
3625  case InlineAsm::Constraint_m: // memory
3626  if (SelectDirectAddr(Op, Op0)) {
3627  OutOps.push_back(Op0);
3628  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3629  return false;
3630  }
3631  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3632  OutOps.push_back(Op0);
3633  OutOps.push_back(Op1);
3634  return false;
3635  }
3636  break;
3637  }
3638  return true;
3639 }
3640 
3641 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3642 /// conversion from \p SrcTy to \p DestTy.
3643 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3644  bool IsSigned) {
3645  switch (SrcTy.SimpleTy) {
3646  default:
3647  llvm_unreachable("Unhandled source type");
3648  case MVT::i8:
3649  switch (DestTy.SimpleTy) {
3650  default:
3651  llvm_unreachable("Unhandled dest type");
3652  case MVT::i16:
3653  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3654  case MVT::i32:
3655  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3656  case MVT::i64:
3657  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3658  }
3659  case MVT::i16:
3660  switch (DestTy.SimpleTy) {
3661  default:
3662  llvm_unreachable("Unhandled dest type");
3663  case MVT::i8:
3664  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3665  case MVT::i32:
3666  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3667  case MVT::i64:
3668  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3669  }
3670  case MVT::i32:
3671  switch (DestTy.SimpleTy) {
3672  default:
3673  llvm_unreachable("Unhandled dest type");
3674  case MVT::i8:
3675  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3676  case MVT::i16:
3677  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3678  case MVT::i64:
3679  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3680  }
3681  case MVT::i64:
3682  switch (DestTy.SimpleTy) {
3683  default:
3684  llvm_unreachable("Unhandled dest type");
3685  case MVT::i8:
3686  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3687  case MVT::i16:
3688  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3689  case MVT::i32:
3690  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3691  }
3692  }
3693 }
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:545
bool isInvariant() const
EVT getValueType() const
Return the ValueType of the referenced return value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:103
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool isVector() const
Return true if this is a vector value type.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool useF32FTZ(const MachineFunction &MF) const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const NVPTXSubtarget * Subtarget
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
MachineFunction * MF
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:379
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:598
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:129
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:916
op_iterator op_begin() const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
#define EQ(a, b)
Definition: regexec.c:112
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:550
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:578
unsigned getSrcAddressSpace() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
CodeGenOpt::Level OptLevel
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
const SDValue & getOperand(unsigned Num) const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getDestAddressSpace() const
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Extended Value Type.
Definition: ValueTypes.h:34
bool allowUnsafeFPMath(MachineFunction &MF) const
bool isVolatile() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:314
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:549
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
EVT getMemoryVT() const
Return the type of the in-memory value.
iterator_range< use_iterator > uses()
NVPTXTargetMachine.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:415
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
#define N
ISD::CondCode get() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool isVolatile(Instruction *Inst)
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
This class is used to represent ISD::LOAD nodes.