LLVM  16.0.0git
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicsNVPTX.h"
22 #include "llvm/Support/Debug.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "nvptx-isel"
30 
31 /// createNVPTXISelDag - This pass converts a legalized DAG into a
32 /// NVPTX-specific DAG, ready for instruction scheduling.
34  llvm::CodeGenOpt::Level OptLevel) {
35  return new NVPTXDAGToDAGISel(TM, OptLevel);
36 }
37 
39  CodeGenOpt::Level OptLevel)
40  : SelectionDAGISel(tm, OptLevel), TM(tm) {
41  doMulWide = (OptLevel > 0);
42 }
43 
47 }
48 
49 int NVPTXDAGToDAGISel::getDivF32Level() const {
51 }
52 
53 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
55 }
56 
57 bool NVPTXDAGToDAGISel::useF32FTZ() const {
59 }
60 
61 bool NVPTXDAGToDAGISel::allowFMA() const {
63  return TL->allowFMA(*MF, OptLevel);
64 }
65 
66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
68  return TL->allowUnsafeFPMath(*MF);
69 }
70 
71 bool NVPTXDAGToDAGISel::useShortPointers() const {
72  return TM.useShortPointers();
73 }
74 
75 /// Select - Select instructions not customized! Used for
76 /// expanded, promoted and normal instructions.
77 void NVPTXDAGToDAGISel::Select(SDNode *N) {
78 
79  if (N->isMachineOpcode()) {
80  N->setNodeId(-1);
81  return; // Already selected.
82  }
83 
84  switch (N->getOpcode()) {
85  case ISD::LOAD:
86  case ISD::ATOMIC_LOAD:
87  if (tryLoad(N))
88  return;
89  break;
90  case ISD::STORE:
91  case ISD::ATOMIC_STORE:
92  if (tryStore(N))
93  return;
94  break;
96  if (tryEXTRACT_VECTOR_ELEMENT(N))
97  return;
98  break;
100  SelectSETP_F16X2(N);
101  return;
102 
103  case NVPTXISD::LoadV2:
104  case NVPTXISD::LoadV4:
105  if (tryLoadVector(N))
106  return;
107  break;
108  case NVPTXISD::LDGV2:
109  case NVPTXISD::LDGV4:
110  case NVPTXISD::LDUV2:
111  case NVPTXISD::LDUV4:
112  if (tryLDGLDU(N))
113  return;
114  break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117  if (tryStoreVector(N))
118  return;
119  break;
120  case NVPTXISD::LoadParam:
123  if (tryLoadParam(N))
124  return;
125  break;
129  if (tryStoreRetval(N))
130  return;
131  break;
137  if (tryStoreParam(N))
138  return;
139  break;
141  if (tryIntrinsicNoChain(N))
142  return;
143  break;
145  if (tryIntrinsicChain(N))
146  return;
147  break;
316  if (tryTextureIntrinsic(N))
317  return;
318  break;
484  if (trySurfaceIntrinsic(N))
485  return;
486  break;
487  case ISD::AND:
488  case ISD::SRA:
489  case ISD::SRL:
490  // Try to select BFE
491  if (tryBFE(N))
492  return;
493  break;
494  case ISD::ADDRSPACECAST:
495  SelectAddrSpaceCast(N);
496  return;
497  case ISD::ConstantFP:
498  if (tryConstantFP16(N))
499  return;
500  break;
501  default:
502  break;
503  }
504  SelectCode(N);
505 }
506 
507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
508  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
509  switch (IID) {
510  default:
511  return false;
512  case Intrinsic::nvvm_ldg_global_f:
513  case Intrinsic::nvvm_ldg_global_i:
514  case Intrinsic::nvvm_ldg_global_p:
515  case Intrinsic::nvvm_ldu_global_f:
516  case Intrinsic::nvvm_ldu_global_i:
517  case Intrinsic::nvvm_ldu_global_p:
518  return tryLDGLDU(N);
519  }
520 }
521 
522 // There's no way to specify FP16 immediates in .f16 ops, so we have to
523 // load them into an .f16 register first.
524 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
525  if (N->getValueType(0) != MVT::f16)
526  return false;
528  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
529  SDNode *LoadConstF16 =
530  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
531  ReplaceNode(N, LoadConstF16);
532  return true;
533 }
534 
535 // Map ISD:CONDCODE value to appropriate CmpMode expected by
536 // NVPTXInstPrinter::printCmpMode()
537 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
539  unsigned PTXCmpMode = [](ISD::CondCode CC) {
540  switch (CC) {
541  default:
542  llvm_unreachable("Unexpected condition code.");
543  case ISD::SETOEQ:
544  return CmpMode::EQ;
545  case ISD::SETOGT:
546  return CmpMode::GT;
547  case ISD::SETOGE:
548  return CmpMode::GE;
549  case ISD::SETOLT:
550  return CmpMode::LT;
551  case ISD::SETOLE:
552  return CmpMode::LE;
553  case ISD::SETONE:
554  return CmpMode::NE;
555  case ISD::SETO:
556  return CmpMode::NUM;
557  case ISD::SETUO:
558  return CmpMode::NotANumber;
559  case ISD::SETUEQ:
560  return CmpMode::EQU;
561  case ISD::SETUGT:
562  return CmpMode::GTU;
563  case ISD::SETUGE:
564  return CmpMode::GEU;
565  case ISD::SETULT:
566  return CmpMode::LTU;
567  case ISD::SETULE:
568  return CmpMode::LEU;
569  case ISD::SETUNE:
570  return CmpMode::NEU;
571  case ISD::SETEQ:
572  return CmpMode::EQ;
573  case ISD::SETGT:
574  return CmpMode::GT;
575  case ISD::SETGE:
576  return CmpMode::GE;
577  case ISD::SETLT:
578  return CmpMode::LT;
579  case ISD::SETLE:
580  return CmpMode::LE;
581  case ISD::SETNE:
582  return CmpMode::NE;
583  }
584  }(CondCode.get());
585 
586  if (FTZ)
587  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
588 
589  return PTXCmpMode;
590 }
591 
592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
593  unsigned PTXCmpMode =
594  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
595  SDLoc DL(N);
596  SDNode *SetP = CurDAG->getMachineNode(
597  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
598  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
599  ReplaceNode(N, SetP);
600  return true;
601 }
602 
603 // Find all instances of extract_vector_elt that use this v2f16 vector
604 // and coalesce them into a scattering move instruction.
605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
606  SDValue Vector = N->getOperand(0);
607 
608  // We only care about f16x2 as it's the only real vector type we
609  // need to deal with.
610  if (Vector.getSimpleValueType() != MVT::v2f16)
611  return false;
612 
613  // Find and record all uses of this vector that extract element 0 or 1.
615  for (auto *U : Vector.getNode()->uses()) {
616  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
617  continue;
618  if (U->getOperand(0) != Vector)
619  continue;
620  if (const ConstantSDNode *IdxConst =
621  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
622  if (IdxConst->getZExtValue() == 0)
623  E0.push_back(U);
624  else if (IdxConst->getZExtValue() == 1)
625  E1.push_back(U);
626  else
627  llvm_unreachable("Invalid vector index.");
628  }
629  }
630 
631  // There's no point scattering f16x2 if we only ever access one
632  // element of it.
633  if (E0.empty() || E1.empty())
634  return false;
635 
636  unsigned Op = NVPTX::SplitF16x2;
637  // If the vector has been BITCAST'ed from i32, we can use original
638  // value directly and avoid register-to-register move.
640  if (Vector->getOpcode() == ISD::BITCAST) {
641  Op = NVPTX::SplitI32toF16x2;
642  Source = Vector->getOperand(0);
643  }
644  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
645  // into f16,f16 SplitF16x2(V)
646  SDNode *ScatterOp =
648  for (auto *Node : E0)
649  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
650  for (auto *Node : E1)
651  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
652 
653  return true;
654 }
655 
656 static unsigned int getCodeAddrSpace(MemSDNode *N) {
657  const Value *Src = N->getMemOperand()->getValue();
658 
659  if (!Src)
661 
662  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
663  switch (PT->getAddressSpace()) {
670  default: break;
671  }
672  }
674 }
675 
676 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
677  unsigned CodeAddrSpace, MachineFunction *F) {
678  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
679  // space.
680  //
681  // We have two ways of identifying invariant loads: Loads may be explicitly
682  // marked as invariant, or we may infer them to be invariant.
683  //
684  // We currently infer invariance for loads from
685  // - constant global variables, and
686  // - kernel function pointer params that are noalias (i.e. __restrict) and
687  // never written to.
688  //
689  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
690  // not during the SelectionDAG phase).
691  //
692  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
693  // explicitly invariant loads because these are how clang tells us to use ldg
694  // when the user uses a builtin.
695  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
696  return false;
697 
698  if (N->isInvariant())
699  return true;
700 
701  bool IsKernelFn = isKernelFunction(F->getFunction());
702 
703  // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
704  // because the former looks through phi nodes while the latter does not. We
705  // need to look through phi nodes to handle pointer induction variables.
707  getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
708 
709  return all_of(Objs, [&](const Value *V) {
710  if (auto *A = dyn_cast<const Argument>(V))
711  return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712  if (auto *GV = dyn_cast<const GlobalVariable>(V))
713  return GV->isConstant();
714  return false;
715  });
716 }
717 
718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
719  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
720  switch (IID) {
721  default:
722  return false;
723  case Intrinsic::nvvm_texsurf_handle_internal:
724  SelectTexSurfHandle(N);
725  return true;
726  }
727 }
728 
729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
730  // Op 0 is the intrinsic ID
731  SDValue Wrapper = N->getOperand(1);
732  SDValue GlobalVal = Wrapper.getOperand(0);
733  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
734  MVT::i64, GlobalVal));
735 }
736 
737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
738  SDValue Src = N->getOperand(0);
739  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
740  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
741  unsigned DstAddrSpace = CastN->getDestAddressSpace();
742  assert(SrcAddrSpace != DstAddrSpace &&
743  "addrspacecast must be between different address spaces");
744 
745  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
746  // Specific to generic
747  unsigned Opc;
748  switch (SrcAddrSpace) {
749  default: report_fatal_error("Bad address space in addrspacecast");
751  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
752  break;
754  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755  : NVPTX::cvta_shared_yes_64)
756  : NVPTX::cvta_shared_yes;
757  break;
758  case ADDRESS_SPACE_CONST:
759  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760  : NVPTX::cvta_const_yes_64)
761  : NVPTX::cvta_const_yes;
762  break;
763  case ADDRESS_SPACE_LOCAL:
764  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765  : NVPTX::cvta_local_yes_64)
766  : NVPTX::cvta_local_yes;
767  break;
768  }
769  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
770  Src));
771  return;
772  } else {
773  // Generic to specific
774  if (SrcAddrSpace != 0)
775  report_fatal_error("Cannot cast between two non-generic address spaces");
776  unsigned Opc;
777  switch (DstAddrSpace) {
778  default: report_fatal_error("Bad address space in addrspacecast");
780  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
781  : NVPTX::cvta_to_global_yes;
782  break;
784  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785  : NVPTX::cvta_to_shared_yes_64)
786  : NVPTX::cvta_to_shared_yes;
787  break;
788  case ADDRESS_SPACE_CONST:
789  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790  : NVPTX::cvta_to_const_yes_64)
791  : NVPTX::cvta_to_const_yes;
792  break;
793  case ADDRESS_SPACE_LOCAL:
794  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795  : NVPTX::cvta_to_local_yes_64)
796  : NVPTX::cvta_to_local_yes;
797  break;
798  case ADDRESS_SPACE_PARAM:
799  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800  : NVPTX::nvvm_ptr_gen_to_param;
801  break;
802  }
803  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
804  Src));
805  return;
806  }
807 }
808 
809 // Helper function template to reduce amount of boilerplate code for
810 // opcode selection.
811 static std::optional<unsigned>
812 pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
813  unsigned Opcode_i16, unsigned Opcode_i32,
814  std::optional<unsigned> Opcode_i64, unsigned Opcode_f16,
815  unsigned Opcode_f16x2, unsigned Opcode_f32,
816  std::optional<unsigned> Opcode_f64) {
817  switch (VT) {
818  case MVT::i1:
819  case MVT::i8:
820  return Opcode_i8;
821  case MVT::i16:
822  return Opcode_i16;
823  case MVT::i32:
824  return Opcode_i32;
825  case MVT::i64:
826  return Opcode_i64;
827  case MVT::f16:
828  case MVT::bf16:
829  return Opcode_f16;
830  case MVT::v2f16:
831  case MVT::v2bf16:
832  return Opcode_f16x2;
833  case MVT::f32:
834  return Opcode_f32;
835  case MVT::f64:
836  return Opcode_f64;
837  default:
838  return std::nullopt;
839  }
840 }
841 
842 static int getLdStRegType(EVT VT) {
843  if (VT.isFloatingPoint())
844  switch (VT.getSimpleVT().SimpleTy) {
845  case MVT::f16:
846  case MVT::bf16:
847  case MVT::v2f16:
848  case MVT::v2bf16:
850  default:
852  }
853  else
855 }
856 
857 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
858  SDLoc dl(N);
859  MemSDNode *LD = cast<MemSDNode>(N);
860  assert(LD->readMem() && "Expected load");
861  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
862  EVT LoadedVT = LD->getMemoryVT();
863  SDNode *NVPTXLD = nullptr;
864 
865  // do not support pre/post inc/dec
866  if (PlainLoad && PlainLoad->isIndexed())
867  return false;
868 
869  if (!LoadedVT.isSimple())
870  return false;
871 
872  AtomicOrdering Ordering = LD->getSuccessOrdering();
873  // In order to lower atomic loads with stronger guarantees we would need to
874  // use load.acquire or insert fences. However these features were only added
875  // with PTX ISA 6.0 / sm_70.
876  // TODO: Check if we can actually use the new instructions and implement them.
877  if (isStrongerThanMonotonic(Ordering))
878  return false;
879 
880  // Address Space Setting
881  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
882  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
883  return tryLDGLDU(N);
884  }
885 
886  unsigned int PointerSize =
887  CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
888 
889  // Volatile Setting
890  // - .volatile is only available for .global and .shared
891  // - .volatile has the same memory synchronization semantics as .relaxed.sys
892  bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
893  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
894  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
895  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
896  isVolatile = false;
897 
898  // Type Setting: fromType + fromTypeWidth
899  //
900  // Sign : ISD::SEXTLOAD
901  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
902  // type is integer
903  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
904  MVT SimpleVT = LoadedVT.getSimpleVT();
905  MVT ScalarVT = SimpleVT.getScalarType();
906  // Read at least 8 bits (predicates are stored as 8-bit values)
907  unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
908  unsigned int fromType;
909 
910  // Vector Setting
911  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
912  if (SimpleVT.isVector()) {
913  assert((LoadedVT == MVT::v2f16 || LoadedVT == MVT::v2bf16) &&
914  "Unexpected vector type");
915  // v2f16/v2bf16 is loaded using ld.b32
916  fromTypeWidth = 32;
917  }
918 
919  if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
921  else
922  fromType = getLdStRegType(ScalarVT);
923 
924  // Create the machine instruction DAG
925  SDValue Chain = N->getOperand(0);
926  SDValue N1 = N->getOperand(1);
927  SDValue Addr;
929  std::optional<unsigned> Opcode;
930  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
931 
932  if (SelectDirectAddr(N1, Addr)) {
933  Opcode = pickOpcodeForVT(
934  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
935  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
936  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
937  if (!Opcode)
938  return false;
939  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
940  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
941  getI32Imm(fromTypeWidth, dl), Addr, Chain };
942  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
943  } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
944  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
945  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
946  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
947  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
948  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
949  if (!Opcode)
950  return false;
951  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
952  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
953  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
954  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
955  } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
956  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
957  if (PointerSize == 64)
958  Opcode = pickOpcodeForVT(
959  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
960  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
961  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
962  else
963  Opcode = pickOpcodeForVT(
964  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
965  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
966  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
967  if (!Opcode)
968  return false;
969  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
970  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
971  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
972  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
973  } else {
974  if (PointerSize == 64)
975  Opcode = pickOpcodeForVT(
976  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
977  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
978  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
979  NVPTX::LD_f64_areg_64);
980  else
981  Opcode = pickOpcodeForVT(
982  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
983  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
984  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
985  if (!Opcode)
986  return false;
987  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
988  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
989  getI32Imm(fromTypeWidth, dl), N1, Chain };
990  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
991  }
992 
993  if (!NVPTXLD)
994  return false;
995 
996  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
997  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
998 
999  ReplaceNode(N, NVPTXLD);
1000  return true;
1001 }
1002 
1003 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1004 
1005  SDValue Chain = N->getOperand(0);
1006  SDValue Op1 = N->getOperand(1);
1007  SDValue Addr, Offset, Base;
1008  std::optional<unsigned> Opcode;
1009  SDLoc DL(N);
1010  SDNode *LD;
1011  MemSDNode *MemSD = cast<MemSDNode>(N);
1012  EVT LoadedVT = MemSD->getMemoryVT();
1013 
1014  if (!LoadedVT.isSimple())
1015  return false;
1016 
1017  // Address Space Setting
1018  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1019  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1020  return tryLDGLDU(N);
1021  }
1022 
1023  unsigned int PointerSize =
1025 
1026  // Volatile Setting
1027  // - .volatile is only availalble for .global and .shared
1028  bool IsVolatile = MemSD->isVolatile();
1029  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1030  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1031  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1032  IsVolatile = false;
1033 
1034  // Vector Setting
1035  MVT SimpleVT = LoadedVT.getSimpleVT();
1036 
1037  // Type Setting: fromType + fromTypeWidth
1038  //
1039  // Sign : ISD::SEXTLOAD
1040  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1041  // type is integer
1042  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1043  MVT ScalarVT = SimpleVT.getScalarType();
1044  // Read at least 8 bits (predicates are stored as 8-bit values)
1045  unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1046  unsigned int FromType;
1047  // The last operand holds the original LoadSDNode::getExtensionType() value
1048  unsigned ExtensionType = cast<ConstantSDNode>(
1049  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1050  if (ExtensionType == ISD::SEXTLOAD)
1052  else
1053  FromType = getLdStRegType(ScalarVT);
1054 
1055  unsigned VecType;
1056 
1057  switch (N->getOpcode()) {
1058  case NVPTXISD::LoadV2:
1060  break;
1061  case NVPTXISD::LoadV4:
1063  break;
1064  default:
1065  return false;
1066  }
1067 
1068  EVT EltVT = N->getValueType(0);
1069 
1070  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1071  // instruction. Instead, we split the vector into v2f16 chunks and
1072  // load them with ld.v4.b32.
1073  if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
1074  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1075  EltVT = MVT::i32;
1077  FromTypeWidth = 32;
1078  }
1079 
1080  if (SelectDirectAddr(Op1, Addr)) {
1081  switch (N->getOpcode()) {
1082  default:
1083  return false;
1084  case NVPTXISD::LoadV2:
1085  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1086  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1087  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1088  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1089  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1090  break;
1091  case NVPTXISD::LoadV4:
1092  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1093  NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar,
1094  NVPTX::LDV_i32_v4_avar, std::nullopt,
1095  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1096  NVPTX::LDV_f32_v4_avar, std::nullopt);
1097  break;
1098  }
1099  if (!Opcode)
1100  return false;
1101  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1102  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1103  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1104  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1105  } else if (PointerSize == 64
1106  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1107  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1108  switch (N->getOpcode()) {
1109  default:
1110  return false;
1111  case NVPTXISD::LoadV2:
1112  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1113  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1114  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1115  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1116  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1117  break;
1118  case NVPTXISD::LoadV4:
1119  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1120  NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi,
1121  NVPTX::LDV_i32_v4_asi, std::nullopt,
1122  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1123  NVPTX::LDV_f32_v4_asi, std::nullopt);
1124  break;
1125  }
1126  if (!Opcode)
1127  return false;
1128  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1129  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1130  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1131  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1132  } else if (PointerSize == 64
1133  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1134  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1135  if (PointerSize == 64) {
1136  switch (N->getOpcode()) {
1137  default:
1138  return false;
1139  case NVPTXISD::LoadV2:
1140  Opcode = pickOpcodeForVT(
1141  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1142  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1143  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1144  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1145  NVPTX::LDV_f64_v2_ari_64);
1146  break;
1147  case NVPTXISD::LoadV4:
1148  Opcode = pickOpcodeForVT(
1149  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1150  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1151  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1152  NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1153  break;
1154  }
1155  } else {
1156  switch (N->getOpcode()) {
1157  default:
1158  return false;
1159  case NVPTXISD::LoadV2:
1160  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1161  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1162  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1163  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1164  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1165  break;
1166  case NVPTXISD::LoadV4:
1167  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1168  NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari,
1169  NVPTX::LDV_i32_v4_ari, std::nullopt,
1170  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1171  NVPTX::LDV_f32_v4_ari, std::nullopt);
1172  break;
1173  }
1174  }
1175  if (!Opcode)
1176  return false;
1177  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1178  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1179  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1180 
1181  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1182  } else {
1183  if (PointerSize == 64) {
1184  switch (N->getOpcode()) {
1185  default:
1186  return false;
1187  case NVPTXISD::LoadV2:
1188  Opcode = pickOpcodeForVT(
1189  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1190  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1191  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1192  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1193  NVPTX::LDV_f64_v2_areg_64);
1194  break;
1195  case NVPTXISD::LoadV4:
1196  Opcode = pickOpcodeForVT(
1197  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1198  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1199  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1200  NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1201  break;
1202  }
1203  } else {
1204  switch (N->getOpcode()) {
1205  default:
1206  return false;
1207  case NVPTXISD::LoadV2:
1208  Opcode =
1209  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1210  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1211  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1212  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1213  NVPTX::LDV_f64_v2_areg);
1214  break;
1215  case NVPTXISD::LoadV4:
1216  Opcode = pickOpcodeForVT(
1217  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1218  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt,
1219  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1220  NVPTX::LDV_f32_v4_areg, std::nullopt);
1221  break;
1222  }
1223  }
1224  if (!Opcode)
1225  return false;
1226  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1227  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1228  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1229  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1230  }
1231 
1232  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1233  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1234 
1235  ReplaceNode(N, LD);
1236  return true;
1237 }
1238 
1239 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1240 
1241  SDValue Chain = N->getOperand(0);
1242  SDValue Op1;
1243  MemSDNode *Mem;
1244  bool IsLDG = true;
1245 
1246  // If this is an LDG intrinsic, the address is the third operand. If its an
1247  // LDG/LDU SD node (from custom vector handling), then its the second operand
1248  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1249  Op1 = N->getOperand(2);
1250  Mem = cast<MemIntrinsicSDNode>(N);
1251  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1252  switch (IID) {
1253  default:
1254  return false;
1255  case Intrinsic::nvvm_ldg_global_f:
1256  case Intrinsic::nvvm_ldg_global_i:
1257  case Intrinsic::nvvm_ldg_global_p:
1258  IsLDG = true;
1259  break;
1260  case Intrinsic::nvvm_ldu_global_f:
1261  case Intrinsic::nvvm_ldu_global_i:
1262  case Intrinsic::nvvm_ldu_global_p:
1263  IsLDG = false;
1264  break;
1265  }
1266  } else {
1267  Op1 = N->getOperand(1);
1268  Mem = cast<MemSDNode>(N);
1269  }
1270 
1271  std::optional<unsigned> Opcode;
1272  SDLoc DL(N);
1273  SDNode *LD;
1274  SDValue Base, Offset, Addr;
1275 
1276  EVT EltVT = Mem->getMemoryVT();
1277  unsigned NumElts = 1;
1278  if (EltVT.isVector()) {
1279  NumElts = EltVT.getVectorNumElements();
1280  EltVT = EltVT.getVectorElementType();
1281  // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1282  if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1283  assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1284  EltVT = MVT::v2f16;
1285  NumElts /= 2;
1286  }
1287  }
1288 
1289  // Build the "promoted" result VTList for the load. If we are really loading
1290  // i8s, then the return type will be promoted to i16 since we do not expose
1291  // 8-bit registers in NVPTX.
1292  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1293  SmallVector<EVT, 5> InstVTs;
1294  for (unsigned i = 0; i != NumElts; ++i) {
1295  InstVTs.push_back(NodeVT);
1296  }
1297  InstVTs.push_back(MVT::Other);
1298  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1299 
1300  if (SelectDirectAddr(Op1, Addr)) {
1301  switch (N->getOpcode()) {
1302  default:
1303  return false;
1304  case ISD::LOAD:
1306  if (IsLDG)
1307  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1308  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1309  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1310  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1311  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1312  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1313  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1314  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1315  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1316  else
1317  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1318  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1319  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1320  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1321  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1322  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1323  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1324  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1325  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1326  break;
1327  case NVPTXISD::LoadV2:
1328  case NVPTXISD::LDGV2:
1329  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1330  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1331  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1332  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1333  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1334  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1335  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1336  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1337  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1338  break;
1339  case NVPTXISD::LDUV2:
1340  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1341  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1342  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1343  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1344  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1345  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1346  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1347  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1348  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1349  break;
1350  case NVPTXISD::LoadV4:
1351  case NVPTXISD::LDGV4:
1352  Opcode = pickOpcodeForVT(
1353  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1354  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1355  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1356  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1357  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1358  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1359  break;
1360  case NVPTXISD::LDUV4:
1361  Opcode = pickOpcodeForVT(
1362  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1363  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1364  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1365  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1366  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1367  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1368  break;
1369  }
1370  if (!Opcode)
1371  return false;
1372  SDValue Ops[] = { Addr, Chain };
1373  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1374  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1375  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1376  if (TM.is64Bit()) {
1377  switch (N->getOpcode()) {
1378  default:
1379  return false;
1380  case ISD::LOAD:
1382  if (IsLDG)
1383  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1384  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1385  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1386  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1387  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1388  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1389  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1390  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1391  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1392  else
1393  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1394  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1395  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1396  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1397  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1398  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1399  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1400  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1401  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1402  break;
1403  case NVPTXISD::LoadV2:
1404  case NVPTXISD::LDGV2:
1405  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1406  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1407  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1408  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1409  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1410  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1411  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1412  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1413  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1414  break;
1415  case NVPTXISD::LDUV2:
1416  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1417  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1418  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1419  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1420  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1421  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1422  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1423  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1424  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1425  break;
1426  case NVPTXISD::LoadV4:
1427  case NVPTXISD::LDGV4:
1428  Opcode = pickOpcodeForVT(
1429  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1430  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1431  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1432  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1433  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1434  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1435  break;
1436  case NVPTXISD::LDUV4:
1437  Opcode = pickOpcodeForVT(
1438  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1439  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1440  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1441  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1442  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1443  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1444  break;
1445  }
1446  } else {
1447  switch (N->getOpcode()) {
1448  default:
1449  return false;
1450  case ISD::LOAD:
1452  if (IsLDG)
1453  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1454  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1455  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1456  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1457  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1458  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1459  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1460  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1461  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1462  else
1463  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1464  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1465  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1466  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1467  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1468  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1469  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1470  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1471  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1472  break;
1473  case NVPTXISD::LoadV2:
1474  case NVPTXISD::LDGV2:
1475  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1476  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1477  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1478  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1479  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1480  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1481  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1482  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1483  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1484  break;
1485  case NVPTXISD::LDUV2:
1486  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1487  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1488  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1489  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1490  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1491  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1492  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1493  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1494  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1495  break;
1496  case NVPTXISD::LoadV4:
1497  case NVPTXISD::LDGV4:
1498  Opcode = pickOpcodeForVT(
1499  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1500  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1501  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1502  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1503  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1504  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1505  break;
1506  case NVPTXISD::LDUV4:
1507  Opcode = pickOpcodeForVT(
1508  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1509  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1510  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1511  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1512  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1513  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1514  break;
1515  }
1516  }
1517  if (!Opcode)
1518  return false;
1519  SDValue Ops[] = {Base, Offset, Chain};
1520  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1521  } else {
1522  if (TM.is64Bit()) {
1523  switch (N->getOpcode()) {
1524  default:
1525  return false;
1526  case ISD::LOAD:
1528  if (IsLDG)
1529  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1530  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1531  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1532  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1533  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1534  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1535  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1536  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1537  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1538  else
1539  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1540  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1541  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1542  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1543  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1544  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1545  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1546  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1547  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1548  break;
1549  case NVPTXISD::LoadV2:
1550  case NVPTXISD::LDGV2:
1551  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1552  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1553  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1554  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1555  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1556  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1557  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1558  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1559  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1560  break;
1561  case NVPTXISD::LDUV2:
1562  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1563  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1564  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1565  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1566  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1567  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1568  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1569  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1570  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1571  break;
1572  case NVPTXISD::LoadV4:
1573  case NVPTXISD::LDGV4:
1574  Opcode = pickOpcodeForVT(
1575  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1576  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1577  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1578  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1579  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1580  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1581  break;
1582  case NVPTXISD::LDUV4:
1583  Opcode = pickOpcodeForVT(
1584  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1585  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1586  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1587  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1588  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1589  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1590  break;
1591  }
1592  } else {
1593  switch (N->getOpcode()) {
1594  default:
1595  return false;
1596  case ISD::LOAD:
1598  if (IsLDG)
1599  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1600  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1601  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1602  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1603  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1604  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1605  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1606  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1607  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1608  else
1609  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1610  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1611  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1612  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1613  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1614  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1615  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1616  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1617  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1618  break;
1619  case NVPTXISD::LoadV2:
1620  case NVPTXISD::LDGV2:
1621  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1622  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1623  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1624  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1625  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1626  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1627  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1628  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1629  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1630  break;
1631  case NVPTXISD::LDUV2:
1632  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1633  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1634  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1635  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1636  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1637  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1638  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1639  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1640  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1641  break;
1642  case NVPTXISD::LoadV4:
1643  case NVPTXISD::LDGV4:
1644  Opcode = pickOpcodeForVT(
1645  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1646  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1647  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1648  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1649  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1650  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1651  break;
1652  case NVPTXISD::LDUV4:
1653  Opcode = pickOpcodeForVT(
1654  EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1655  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1656  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1657  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1658  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1659  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1660  break;
1661  }
1662  }
1663  if (!Opcode)
1664  return false;
1665  SDValue Ops[] = { Op1, Chain };
1666  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1667  }
1668 
1670  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1671 
1672  // For automatic generation of LDG (through SelectLoad[Vector], not the
1673  // intrinsics), we may have an extending load like:
1674  //
1675  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1676  //
1677  // In this case, the matching logic above will select a load for the original
1678  // memory type (in this case, i8) and our types will not match (the node needs
1679  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1680  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1681  // CVT instruction. Ptxas should clean up any redundancies here.
1682 
1683  EVT OrigType = N->getValueType(0);
1684  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1685 
1686  if (OrigType != EltVT && LdNode) {
1687  // We have an extending-load. The instruction we selected operates on the
1688  // smaller type, but the SDNode we are replacing has the larger type. We
1689  // need to emit a CVT to make the types match.
1690  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1691  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1692  EltVT.getSimpleVT(), IsSigned);
1693 
1694  // For each output value, apply the manual sign/zero-extension and make sure
1695  // all users of the load go through that CVT.
1696  for (unsigned i = 0; i != NumElts; ++i) {
1697  SDValue Res(LD, i);
1698  SDValue OrigVal(N, i);
1699 
1700  SDNode *CvtNode =
1701  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1703  DL, MVT::i32));
1704  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1705  }
1706  }
1707 
1708  ReplaceNode(N, LD);
1709  return true;
1710 }
1711 
1712 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1713  SDLoc dl(N);
1714  MemSDNode *ST = cast<MemSDNode>(N);
1715  assert(ST->writeMem() && "Expected store");
1716  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1717  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1718  assert((PlainStore || AtomicStore) && "Expected store");
1719  EVT StoreVT = ST->getMemoryVT();
1720  SDNode *NVPTXST = nullptr;
1721 
1722  // do not support pre/post inc/dec
1723  if (PlainStore && PlainStore->isIndexed())
1724  return false;
1725 
1726  if (!StoreVT.isSimple())
1727  return false;
1728 
1729  AtomicOrdering Ordering = ST->getSuccessOrdering();
1730  // In order to lower atomic loads with stronger guarantees we would need to
1731  // use store.release or insert fences. However these features were only added
1732  // with PTX ISA 6.0 / sm_70.
1733  // TODO: Check if we can actually use the new instructions and implement them.
1734  if (isStrongerThanMonotonic(Ordering))
1735  return false;
1736 
1737  // Address Space Setting
1738  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1739  unsigned int PointerSize =
1740  CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1741 
1742  // Volatile Setting
1743  // - .volatile is only available for .global and .shared
1744  // - .volatile has the same memory synchronization semantics as .relaxed.sys
1745  bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1746  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1747  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1748  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1749  isVolatile = false;
1750 
1751  // Vector Setting
1752  MVT SimpleVT = StoreVT.getSimpleVT();
1753  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1754 
1755  // Type Setting: toType + toTypeWidth
1756  // - for integer type, always use 'u'
1757  //
1758  MVT ScalarVT = SimpleVT.getScalarType();
1759  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1760  if (SimpleVT.isVector()) {
1761  assert((StoreVT == MVT::v2f16 || StoreVT == MVT::v2bf16) &&
1762  "Unexpected vector type");
1763  // v2f16 is stored using st.b32
1764  toTypeWidth = 32;
1765  }
1766 
1767  unsigned int toType = getLdStRegType(ScalarVT);
1768 
1769  // Create the machine instruction DAG
1770  SDValue Chain = ST->getChain();
1771  SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1772  SDValue BasePtr = ST->getBasePtr();
1773  SDValue Addr;
1774  SDValue Offset, Base;
1775  std::optional<unsigned> Opcode;
1776  MVT::SimpleValueType SourceVT =
1777  Value.getNode()->getSimpleValueType(0).SimpleTy;
1778 
1779  if (SelectDirectAddr(BasePtr, Addr)) {
1780  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1781  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1782  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1783  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1784  if (!Opcode)
1785  return false;
1786  SDValue Ops[] = {Value,
1787  getI32Imm(isVolatile, dl),
1788  getI32Imm(CodeAddrSpace, dl),
1789  getI32Imm(vecType, dl),
1790  getI32Imm(toType, dl),
1791  getI32Imm(toTypeWidth, dl),
1792  Addr,
1793  Chain};
1794  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1795  } else if (PointerSize == 64
1796  ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1797  : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1798  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1799  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1800  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1801  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1802  if (!Opcode)
1803  return false;
1804  SDValue Ops[] = {Value,
1805  getI32Imm(isVolatile, dl),
1806  getI32Imm(CodeAddrSpace, dl),
1807  getI32Imm(vecType, dl),
1808  getI32Imm(toType, dl),
1809  getI32Imm(toTypeWidth, dl),
1810  Base,
1811  Offset,
1812  Chain};
1813  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1814  } else if (PointerSize == 64
1815  ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1816  : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1817  if (PointerSize == 64)
1818  Opcode = pickOpcodeForVT(
1819  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1820  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1821  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1822  else
1823  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1824  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1825  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1826  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1827  if (!Opcode)
1828  return false;
1829 
1830  SDValue Ops[] = {Value,
1831  getI32Imm(isVolatile, dl),
1832  getI32Imm(CodeAddrSpace, dl),
1833  getI32Imm(vecType, dl),
1834  getI32Imm(toType, dl),
1835  getI32Imm(toTypeWidth, dl),
1836  Base,
1837  Offset,
1838  Chain};
1839  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1840  } else {
1841  if (PointerSize == 64)
1842  Opcode =
1843  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1844  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1845  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1846  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1847  else
1848  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1849  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1850  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1851  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1852  if (!Opcode)
1853  return false;
1854  SDValue Ops[] = {Value,
1855  getI32Imm(isVolatile, dl),
1856  getI32Imm(CodeAddrSpace, dl),
1857  getI32Imm(vecType, dl),
1858  getI32Imm(toType, dl),
1859  getI32Imm(toTypeWidth, dl),
1860  BasePtr,
1861  Chain};
1862  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1863  }
1864 
1865  if (!NVPTXST)
1866  return false;
1867 
1868  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1869  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1870  ReplaceNode(N, NVPTXST);
1871  return true;
1872 }
1873 
1874 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1875  SDValue Chain = N->getOperand(0);
1876  SDValue Op1 = N->getOperand(1);
1877  SDValue Addr, Offset, Base;
1878  std::optional<unsigned> Opcode;
1879  SDLoc DL(N);
1880  SDNode *ST;
1881  EVT EltVT = Op1.getValueType();
1882  MemSDNode *MemSD = cast<MemSDNode>(N);
1883  EVT StoreVT = MemSD->getMemoryVT();
1884 
1885  // Address Space Setting
1886  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1887  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1888  report_fatal_error("Cannot store to pointer that points to constant "
1889  "memory space");
1890  }
1891  unsigned int PointerSize =
1893 
1894  // Volatile Setting
1895  // - .volatile is only availalble for .global and .shared
1896  bool IsVolatile = MemSD->isVolatile();
1897  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1898  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1899  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1900  IsVolatile = false;
1901 
1902  // Type Setting: toType + toTypeWidth
1903  // - for integer type, always use 'u'
1904  assert(StoreVT.isSimple() && "Store value is not simple");
1905  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1906  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1907  unsigned ToType = getLdStRegType(ScalarVT);
1908 
1910  SDValue N2;
1911  unsigned VecType;
1912 
1913  switch (N->getOpcode()) {
1914  case NVPTXISD::StoreV2:
1916  StOps.push_back(N->getOperand(1));
1917  StOps.push_back(N->getOperand(2));
1918  N2 = N->getOperand(3);
1919  break;
1920  case NVPTXISD::StoreV4:
1922  StOps.push_back(N->getOperand(1));
1923  StOps.push_back(N->getOperand(2));
1924  StOps.push_back(N->getOperand(3));
1925  StOps.push_back(N->getOperand(4));
1926  N2 = N->getOperand(5);
1927  break;
1928  default:
1929  return false;
1930  }
1931 
1932  // v8f16 is a special case. PTX doesn't have st.v8.f16
1933  // instruction. Instead, we split the vector into v2f16 chunks and
1934  // store them with st.v4.b32.
1935  if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
1936  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1937  EltVT = MVT::i32;
1939  ToTypeWidth = 32;
1940  }
1941 
1942  StOps.push_back(getI32Imm(IsVolatile, DL));
1943  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1944  StOps.push_back(getI32Imm(VecType, DL));
1945  StOps.push_back(getI32Imm(ToType, DL));
1946  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1947 
1948  if (SelectDirectAddr(N2, Addr)) {
1949  switch (N->getOpcode()) {
1950  default:
1951  return false;
1952  case NVPTXISD::StoreV2:
1953  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1954  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1955  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1956  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1957  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1958  break;
1959  case NVPTXISD::StoreV4:
1960  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1961  NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1962  NVPTX::STV_i32_v4_avar, std::nullopt,
1963  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1964  NVPTX::STV_f32_v4_avar, std::nullopt);
1965  break;
1966  }
1967  StOps.push_back(Addr);
1968  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1969  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1970  switch (N->getOpcode()) {
1971  default:
1972  return false;
1973  case NVPTXISD::StoreV2:
1974  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1975  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1976  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1977  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1978  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1979  break;
1980  case NVPTXISD::StoreV4:
1981  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1982  NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi,
1983  NVPTX::STV_i32_v4_asi, std::nullopt,
1984  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1985  NVPTX::STV_f32_v4_asi, std::nullopt);
1986  break;
1987  }
1988  StOps.push_back(Base);
1989  StOps.push_back(Offset);
1990  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1991  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1992  if (PointerSize == 64) {
1993  switch (N->getOpcode()) {
1994  default:
1995  return false;
1996  case NVPTXISD::StoreV2:
1997  Opcode = pickOpcodeForVT(
1998  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1999  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2000  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2001  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2002  NVPTX::STV_f64_v2_ari_64);
2003  break;
2004  case NVPTXISD::StoreV4:
2005  Opcode = pickOpcodeForVT(
2006  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2007  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
2008  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2009  NVPTX::STV_f32_v4_ari_64, std::nullopt);
2010  break;
2011  }
2012  } else {
2013  switch (N->getOpcode()) {
2014  default:
2015  return false;
2016  case NVPTXISD::StoreV2:
2017  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2018  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2019  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2020  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2021  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2022  break;
2023  case NVPTXISD::StoreV4:
2024  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2025  NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
2026  NVPTX::STV_i32_v4_ari, std::nullopt,
2027  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2028  NVPTX::STV_f32_v4_ari, std::nullopt);
2029  break;
2030  }
2031  }
2032  StOps.push_back(Base);
2033  StOps.push_back(Offset);
2034  } else {
2035  if (PointerSize == 64) {
2036  switch (N->getOpcode()) {
2037  default:
2038  return false;
2039  case NVPTXISD::StoreV2:
2040  Opcode = pickOpcodeForVT(
2041  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2042  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2043  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2044  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2045  NVPTX::STV_f64_v2_areg_64);
2046  break;
2047  case NVPTXISD::StoreV4:
2048  Opcode = pickOpcodeForVT(
2049  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2050  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
2051  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2052  NVPTX::STV_f32_v4_areg_64, std::nullopt);
2053  break;
2054  }
2055  } else {
2056  switch (N->getOpcode()) {
2057  default:
2058  return false;
2059  case NVPTXISD::StoreV2:
2060  Opcode =
2061  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2062  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2063  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2064  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2065  NVPTX::STV_f64_v2_areg);
2066  break;
2067  case NVPTXISD::StoreV4:
2068  Opcode = pickOpcodeForVT(
2069  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2070  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt,
2071  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2072  NVPTX::STV_f32_v4_areg, std::nullopt);
2073  break;
2074  }
2075  }
2076  StOps.push_back(N2);
2077  }
2078 
2079  if (!Opcode)
2080  return false;
2081 
2082  StOps.push_back(Chain);
2083 
2084  ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2085 
2086  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2087  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2088 
2089  ReplaceNode(N, ST);
2090  return true;
2091 }
2092 
2093 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2094  SDValue Chain = Node->getOperand(0);
2095  SDValue Offset = Node->getOperand(2);
2096  SDValue Flag = Node->getOperand(3);
2097  SDLoc DL(Node);
2098  MemSDNode *Mem = cast<MemSDNode>(Node);
2099 
2100  unsigned VecSize;
2101  switch (Node->getOpcode()) {
2102  default:
2103  return false;
2104  case NVPTXISD::LoadParam:
2105  VecSize = 1;
2106  break;
2107  case NVPTXISD::LoadParamV2:
2108  VecSize = 2;
2109  break;
2110  case NVPTXISD::LoadParamV4:
2111  VecSize = 4;
2112  break;
2113  }
2114 
2115  EVT EltVT = Node->getValueType(0);
2116  EVT MemVT = Mem->getMemoryVT();
2117 
2118  std::optional<unsigned> Opcode;
2119 
2120  switch (VecSize) {
2121  default:
2122  return false;
2123  case 1:
2124  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2125  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2126  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2127  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2128  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2129  break;
2130  case 2:
2131  Opcode =
2132  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2133  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2134  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2135  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2136  NVPTX::LoadParamMemV2F64);
2137  break;
2138  case 4:
2139  Opcode = pickOpcodeForVT(
2140  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2141  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt,
2142  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2143  NVPTX::LoadParamMemV4F32, std::nullopt);
2144  break;
2145  }
2146  if (!Opcode)
2147  return false;
2148 
2149  SDVTList VTs;
2150  if (VecSize == 1) {
2151  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2152  } else if (VecSize == 2) {
2153  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2154  } else {
2155  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2156  VTs = CurDAG->getVTList(EVTs);
2157  }
2158 
2159  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2160 
2162  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2163  Ops.push_back(Chain);
2164  Ops.push_back(Flag);
2165 
2166  ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2167  return true;
2168 }
2169 
2170 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2171  SDLoc DL(N);
2172  SDValue Chain = N->getOperand(0);
2173  SDValue Offset = N->getOperand(1);
2174  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175  MemSDNode *Mem = cast<MemSDNode>(N);
2176 
2177  // How many elements do we have?
2178  unsigned NumElts = 1;
2179  switch (N->getOpcode()) {
2180  default:
2181  return false;
2182  case NVPTXISD::StoreRetval:
2183  NumElts = 1;
2184  break;
2186  NumElts = 2;
2187  break;
2189  NumElts = 4;
2190  break;
2191  }
2192 
2193  // Build vector of operands
2195  for (unsigned i = 0; i < NumElts; ++i)
2196  Ops.push_back(N->getOperand(i + 2));
2197  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2198  Ops.push_back(Chain);
2199 
2200  // Determine target opcode
2201  // If we have an i1, use an 8-bit store. The lowering code in
2202  // NVPTXISelLowering will have already emitted an upcast.
2203  std::optional<unsigned> Opcode = 0;
2204  switch (NumElts) {
2205  default:
2206  return false;
2207  case 1:
2208  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2209  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2210  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2211  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2212  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2213  break;
2214  case 2:
2215  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2216  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2217  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2218  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2219  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2220  break;
2221  case 4:
2222  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2224  NVPTX::StoreRetvalV4I32, std::nullopt,
2225  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2226  NVPTX::StoreRetvalV4F32, std::nullopt);
2227  break;
2228  }
2229  if (!Opcode)
2230  return false;
2231 
2232  SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2233  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2234  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2235 
2236  ReplaceNode(N, Ret);
2237  return true;
2238 }
2239 
2240 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2241  SDLoc DL(N);
2242  SDValue Chain = N->getOperand(0);
2243  SDValue Param = N->getOperand(1);
2244  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2245  SDValue Offset = N->getOperand(2);
2246  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2247  MemSDNode *Mem = cast<MemSDNode>(N);
2248  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2249 
2250  // How many elements do we have?
2251  unsigned NumElts = 1;
2252  switch (N->getOpcode()) {
2253  default:
2254  return false;
2257  case NVPTXISD::StoreParam:
2258  NumElts = 1;
2259  break;
2261  NumElts = 2;
2262  break;
2264  NumElts = 4;
2265  break;
2266  }
2267 
2268  // Build vector of operands
2270  for (unsigned i = 0; i < NumElts; ++i)
2271  Ops.push_back(N->getOperand(i + 3));
2272  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2273  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2274  Ops.push_back(Chain);
2275  Ops.push_back(Flag);
2276 
2277  // Determine target opcode
2278  // If we have an i1, use an 8-bit store. The lowering code in
2279  // NVPTXISelLowering will have already emitted an upcast.
2280  std::optional<unsigned> Opcode = 0;
2281  switch (N->getOpcode()) {
2282  default:
2283  switch (NumElts) {
2284  default:
2285  return false;
2286  case 1:
2287  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2288  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2289  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2290  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2291  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2292  break;
2293  case 2:
2294  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2295  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2296  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2297  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2298  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2299  break;
2300  case 4:
2301  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2302  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2303  NVPTX::StoreParamV4I32, std::nullopt,
2304  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2305  NVPTX::StoreParamV4F32, std::nullopt);
2306  break;
2307  }
2308  if (!Opcode)
2309  return false;
2310  break;
2311  // Special case: if we have a sign-extend/zero-extend node, insert the
2312  // conversion instruction first, and use that as the value operand to
2313  // the selected StoreParam node.
2314  case NVPTXISD::StoreParamU32: {
2315  Opcode = NVPTX::StoreParamI32;
2317  MVT::i32);
2318  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2319  MVT::i32, Ops[0], CvtNone);
2320  Ops[0] = SDValue(Cvt, 0);
2321  break;
2322  }
2323  case NVPTXISD::StoreParamS32: {
2324  Opcode = NVPTX::StoreParamI32;
2326  MVT::i32);
2327  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2328  MVT::i32, Ops[0], CvtNone);
2329  Ops[0] = SDValue(Cvt, 0);
2330  break;
2331  }
2332  }
2333 
2335  SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2336  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2337  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2338 
2339  ReplaceNode(N, Ret);
2340  return true;
2341 }
2342 
2343 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2344  unsigned Opc = 0;
2345 
2346  switch (N->getOpcode()) {
2347  default: return false;
2349  Opc = NVPTX::TEX_1D_F32_S32_RR;
2350  break;
2352  Opc = NVPTX::TEX_1D_F32_F32_RR;
2353  break;
2355  Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2356  break;
2358  Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2359  break;
2360  case NVPTXISD::Tex1DS32S32:
2361  Opc = NVPTX::TEX_1D_S32_S32_RR;
2362  break;
2364  Opc = NVPTX::TEX_1D_S32_F32_RR;
2365  break;
2367  Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2368  break;
2370  Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2371  break;
2372  case NVPTXISD::Tex1DU32S32:
2373  Opc = NVPTX::TEX_1D_U32_S32_RR;
2374  break;
2376  Opc = NVPTX::TEX_1D_U32_F32_RR;
2377  break;
2379  Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2380  break;
2382  Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2383  break;
2385  Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2386  break;
2388  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2389  break;
2391  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2392  break;
2394  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2395  break;
2397  Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2398  break;
2400  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2401  break;
2403  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2404  break;
2406  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2407  break;
2409  Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2410  break;
2412  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2413  break;
2415  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2416  break;
2418  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2419  break;
2421  Opc = NVPTX::TEX_2D_F32_S32_RR;
2422  break;
2424  Opc = NVPTX::TEX_2D_F32_F32_RR;
2425  break;
2427  Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2428  break;
2430  Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2431  break;
2432  case NVPTXISD::Tex2DS32S32:
2433  Opc = NVPTX::TEX_2D_S32_S32_RR;
2434  break;
2436  Opc = NVPTX::TEX_2D_S32_F32_RR;
2437  break;
2439  Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2440  break;
2442  Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2443  break;
2444  case NVPTXISD::Tex2DU32S32:
2445  Opc = NVPTX::TEX_2D_U32_S32_RR;
2446  break;
2448  Opc = NVPTX::TEX_2D_U32_F32_RR;
2449  break;
2451  Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2452  break;
2454  Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2455  break;
2457  Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2458  break;
2460  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2461  break;
2463  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2464  break;
2466  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2467  break;
2469  Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2470  break;
2472  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2473  break;
2475  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2476  break;
2478  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2479  break;
2481  Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2482  break;
2484  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2485  break;
2487  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2488  break;
2490  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2491  break;
2493  Opc = NVPTX::TEX_3D_F32_S32_RR;
2494  break;
2496  Opc = NVPTX::TEX_3D_F32_F32_RR;
2497  break;
2499  Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2500  break;
2502  Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2503  break;
2504  case NVPTXISD::Tex3DS32S32:
2505  Opc = NVPTX::TEX_3D_S32_S32_RR;
2506  break;
2508  Opc = NVPTX::TEX_3D_S32_F32_RR;
2509  break;
2511  Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2512  break;
2514  Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2515  break;
2516  case NVPTXISD::Tex3DU32S32:
2517  Opc = NVPTX::TEX_3D_U32_S32_RR;
2518  break;
2520  Opc = NVPTX::TEX_3D_U32_F32_RR;
2521  break;
2523  Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2524  break;
2526  Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2527  break;
2529  Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2530  break;
2532  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2533  break;
2535  Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2536  break;
2538  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2539  break;
2541  Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2542  break;
2544  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2545  break;
2547  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2548  break;
2550  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2551  break;
2553  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2554  break;
2556  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2557  break;
2559  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2560  break;
2562  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2563  break;
2565  Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2566  break;
2568  Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2569  break;
2571  Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2572  break;
2574  Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2575  break;
2577  Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2578  break;
2580  Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2581  break;
2583  Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2584  break;
2586  Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2587  break;
2589  Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2590  break;
2592  Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2593  break;
2595  Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2596  break;
2598  Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2599  break;
2601  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2602  break;
2604  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2605  break;
2607  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2608  break;
2610  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2611  break;
2613  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2614  break;
2616  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2617  break;
2619  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2620  break;
2622  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2623  break;
2625  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2626  break;
2628  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2629  break;
2631  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2632  break;
2634  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2635  break;
2637  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2638  break;
2640  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2641  break;
2643  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2644  break;
2646  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2647  break;
2649  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2650  break;
2652  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2653  break;
2655  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2656  break;
2658  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2659  break;
2661  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2662  break;
2664  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2665  break;
2667  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2668  break;
2670  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2671  break;
2673  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2674  break;
2676  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2677  break;
2679  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2680  break;
2682  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2683  break;
2685  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2686  break;
2688  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2689  break;
2691  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2692  break;
2694  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2695  break;
2697  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2698  break;
2700  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2701  break;
2703  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2704  break;
2706  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2707  break;
2709  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2710  break;
2712  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2713  break;
2715  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2716  break;
2718  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2719  break;
2721  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2722  break;
2724  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2725  break;
2727  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2728  break;
2730  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2731  break;
2733  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2734  break;
2736  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2737  break;
2739  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2740  break;
2742  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2743  break;
2745  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2746  break;
2748  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2749  break;
2751  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2752  break;
2754  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2755  break;
2757  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2758  break;
2760  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2761  break;
2763  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2764  break;
2766  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2767  break;
2769  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2770  break;
2772  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2773  break;
2775  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2776  break;
2778  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2779  break;
2781  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2782  break;
2784  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2785  break;
2787  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2788  break;
2790  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2791  break;
2793  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2794  break;
2796  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2797  break;
2799  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2800  break;
2802  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2803  break;
2805  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2806  break;
2808  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2809  break;
2811  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2812  break;
2814  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2815  break;
2817  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2818  break;
2820  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2821  break;
2823  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2824  break;
2826  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2827  break;
2829  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2830  break;
2832  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2833  break;
2835  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2836  break;
2838  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2839  break;
2841  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2842  break;
2844  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2845  break;
2847  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2848  break;
2850  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2851  break;
2852  }
2853 
2854  // Copy over operands
2855  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
2856  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2857 
2858  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2859  return true;
2860 }
2861 
2862 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2863  unsigned Opc = 0;
2864  switch (N->getOpcode()) {
2865  default: return false;
2867  Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2868  break;
2870  Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2871  break;
2873  Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2874  break;
2876  Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2877  break;
2879  Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2880  break;
2882  Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2883  break;
2885  Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2886  break;
2888  Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2889  break;
2891  Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2892  break;
2894  Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2895  break;
2897  Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2898  break;
2900  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2901  break;
2903  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2904  break;
2906  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2907  break;
2909  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2910  break;
2912  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2913  break;
2915  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2916  break;
2918  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2919  break;
2921  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2922  break;
2924  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2925  break;
2927  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2928  break;
2930  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2931  break;
2933  Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2934  break;
2936  Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2937  break;
2939  Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2940  break;
2942  Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2943  break;
2945  Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2946  break;
2948  Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2949  break;
2951  Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2952  break;
2954  Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2955  break;
2957  Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2958  break;
2960  Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2961  break;
2963  Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2964  break;
2966  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2967  break;
2969  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2970  break;
2972  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2973  break;
2975  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2976  break;
2978  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2979  break;
2981  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2982  break;
2984  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2985  break;
2987  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2988  break;
2990  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2991  break;
2993  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2994  break;
2996  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2997  break;
2999  Opc = NVPTX::SULD_3D_I8_CLAMP_R;
3000  break;
3002  Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3003  break;
3005  Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3006  break;
3008  Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3009  break;
3011  Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3012  break;
3014  Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3015  break;
3017  Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3018  break;
3020  Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3021  break;
3023  Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3024  break;
3026  Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3027  break;
3029  Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3030  break;
3032  Opc = NVPTX::SULD_1D_I8_TRAP_R;
3033  break;
3035  Opc = NVPTX::SULD_1D_I16_TRAP_R;
3036  break;
3038  Opc = NVPTX::SULD_1D_I32_TRAP_R;
3039  break;
3041  Opc = NVPTX::SULD_1D_I64_TRAP_R;
3042  break;
3044  Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3045  break;
3047  Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3048  break;
3050  Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3051  break;
3053  Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3054  break;
3056  Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3057  break;
3059  Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3060  break;
3062  Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3063  break;
3065  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3066  break;
3068  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3069  break;
3071  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3072  break;
3074  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3075  break;
3077  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3078  break;
3080  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3081  break;
3083  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3084  break;
3086  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3087  break;
3089  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3090  break;
3092  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3093  break;
3095  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3096  break;
3098  Opc = NVPTX::SULD_2D_I8_TRAP_R;
3099  break;
3101  Opc = NVPTX::SULD_2D_I16_TRAP_R;
3102  break;
3104  Opc = NVPTX::SULD_2D_I32_TRAP_R;
3105  break;
3107  Opc = NVPTX::SULD_2D_I64_TRAP_R;
3108  break;
3110  Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3111  break;
3113  Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3114  break;
3116  Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3117  break;
3119  Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3120  break;
3122  Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3123  break;
3125  Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3126  break;
3128  Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3129  break;
3131  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3132  break;
3134  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3135  break;
3137  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3138  break;
3140  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3141  break;
3143  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3144  break;
3146  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3147  break;
3149  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3150  break;
3152  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3153  break;
3155  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3156  break;
3158  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3159  break;
3161  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3162  break;
3164  Opc = NVPTX::SULD_3D_I8_TRAP_R;
3165  break;
3167  Opc = NVPTX::SULD_3D_I16_TRAP_R;
3168  break;
3170  Opc = NVPTX::SULD_3D_I32_TRAP_R;
3171  break;
3173  Opc = NVPTX::SULD_3D_I64_TRAP_R;
3174  break;
3176  Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3177  break;
3179  Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3180  break;
3182  Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3183  break;
3185  Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3186  break;
3188  Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3189  break;
3191  Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3192  break;
3194  Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3195  break;
3197  Opc = NVPTX::SULD_1D_I8_ZERO_R;
3198  break;
3200  Opc = NVPTX::SULD_1D_I16_ZERO_R;
3201  break;
3203  Opc = NVPTX::SULD_1D_I32_ZERO_R;
3204  break;
3206  Opc = NVPTX::SULD_1D_I64_ZERO_R;
3207  break;
3209  Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3210  break;
3212  Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3213  break;
3215  Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3216  break;
3218  Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3219  break;
3221  Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3222  break;
3224  Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3225  break;
3227  Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3228  break;
3230  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3231  break;
3233  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3234  break;
3236  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3237  break;
3239  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3240  break;
3242  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3243  break;
3245  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3246  break;
3248  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3249  break;
3251  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3252  break;
3254  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3255  break;
3257  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3258  break;
3260  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3261  break;
3263  Opc = NVPTX::SULD_2D_I8_ZERO_R;
3264  break;
3266  Opc = NVPTX::SULD_2D_I16_ZERO_R;
3267  break;
3269  Opc = NVPTX::SULD_2D_I32_ZERO_R;
3270  break;
3272  Opc = NVPTX::SULD_2D_I64_ZERO_R;
3273  break;
3275  Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3276  break;
3278  Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3279  break;
3281  Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3282  break;
3284  Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3285  break;
3287  Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3288  break;
3290  Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3291  break;
3293  Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3294  break;
3296  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3297  break;
3299  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3300  break;
3302  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3303  break;
3305  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3306  break;
3308  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3309  break;
3311  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3312  break;
3314  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3315  break;
3317  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3318  break;
3320  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3321  break;
3323  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3324  break;
3326  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3327  break;
3329  Opc = NVPTX::SULD_3D_I8_ZERO_R;
3330  break;
3332  Opc = NVPTX::SULD_3D_I16_ZERO_R;
3333  break;
3335  Opc = NVPTX::SULD_3D_I32_ZERO_R;
3336  break;
3338  Opc = NVPTX::SULD_3D_I64_ZERO_R;
3339  break;
3341  Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3342  break;
3344  Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3345  break;
3347  Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3348  break;
3350  Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3351  break;
3353  Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3354  break;
3356  Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3357  break;
3359  Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3360  break;
3361  }
3362 
3363  // Copy over operands
3364  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
3365  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3366 
3367  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3368  return true;
3369 }
3370 
3371 
3372 /// SelectBFE - Look for instruction sequences that can be made more efficient
3373 /// by using the 'bfe' (bit-field extract) PTX instruction
3374 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3375  SDLoc DL(N);
3376  SDValue LHS = N->getOperand(0);
3377  SDValue RHS = N->getOperand(1);
3378  SDValue Len;
3379  SDValue Start;
3380  SDValue Val;
3381  bool IsSigned = false;
3382 
3383  if (N->getOpcode() == ISD::AND) {
3384  // Canonicalize the operands
3385  // We want 'and %val, %mask'
3386  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3387  std::swap(LHS, RHS);
3388  }
3389 
3390  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3391  if (!Mask) {
3392  // We need a constant mask on the RHS of the AND
3393  return false;
3394  }
3395 
3396  // Extract the mask bits
3397  uint64_t MaskVal = Mask->getZExtValue();
3398  if (!isMask_64(MaskVal)) {
3399  // We *could* handle shifted masks here, but doing so would require an
3400  // 'and' operation to fix up the low-order bits so we would trade
3401  // shr+and for bfe+and, which has the same throughput
3402  return false;
3403  }
3404 
3405  // How many bits are in our mask?
3406  uint64_t NumBits = countTrailingOnes(MaskVal);
3407  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3408 
3409  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3410  // We have a 'srl/and' pair, extract the effective start bit and length
3411  Val = LHS.getNode()->getOperand(0);
3412  Start = LHS.getNode()->getOperand(1);
3413  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3414  if (StartConst) {
3415  uint64_t StartVal = StartConst->getZExtValue();
3416  // How many "good" bits do we have left? "good" is defined here as bits
3417  // that exist in the original value, not shifted in.
3418  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3419  if (NumBits > GoodBits) {
3420  // Do not handle the case where bits have been shifted in. In theory
3421  // we could handle this, but the cost is likely higher than just
3422  // emitting the srl/and pair.
3423  return false;
3424  }
3425  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3426  } else {
3427  // Do not handle the case where the shift amount (can be zero if no srl
3428  // was found) is not constant. We could handle this case, but it would
3429  // require run-time logic that would be more expensive than just
3430  // emitting the srl/and pair.
3431  return false;
3432  }
3433  } else {
3434  // Do not handle the case where the LHS of the and is not a shift. While
3435  // it would be trivial to handle this case, it would just transform
3436  // 'and' -> 'bfe', but 'and' has higher-throughput.
3437  return false;
3438  }
3439  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3440  if (LHS->getOpcode() == ISD::AND) {
3441  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3442  if (!ShiftCnst) {
3443  // Shift amount must be constant
3444  return false;
3445  }
3446 
3447  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3448 
3449  SDValue AndLHS = LHS->getOperand(0);
3450  SDValue AndRHS = LHS->getOperand(1);
3451 
3452  // Canonicalize the AND to have the mask on the RHS
3453  if (isa<ConstantSDNode>(AndLHS)) {
3454  std::swap(AndLHS, AndRHS);
3455  }
3456 
3457  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3458  if (!MaskCnst) {
3459  // Mask must be constant
3460  return false;
3461  }
3462 
3463  uint64_t MaskVal = MaskCnst->getZExtValue();
3464  uint64_t NumZeros;
3465  uint64_t NumBits;
3466  if (isMask_64(MaskVal)) {
3467  NumZeros = 0;
3468  // The number of bits in the result bitfield will be the number of
3469  // trailing ones (the AND) minus the number of bits we shift off
3470  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3471  } else if (isShiftedMask_64(MaskVal)) {
3472  NumZeros = countTrailingZeros(MaskVal);
3473  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3474  // The number of bits in the result bitfield will be the number of
3475  // trailing zeros plus the number of set bits in the mask minus the
3476  // number of bits we shift off
3477  NumBits = NumZeros + NumOnes - ShiftAmt;
3478  } else {
3479  // This is not a mask we can handle
3480  return false;
3481  }
3482 
3483  if (ShiftAmt < NumZeros) {
3484  // Handling this case would require extra logic that would make this
3485  // transformation non-profitable
3486  return false;
3487  }
3488 
3489  Val = AndLHS;
3490  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3491  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3492  } else if (LHS->getOpcode() == ISD::SHL) {
3493  // Here, we have a pattern like:
3494  //
3495  // (sra (shl val, NN), MM)
3496  // or
3497  // (srl (shl val, NN), MM)
3498  //
3499  // If MM >= NN, we can efficiently optimize this with bfe
3500  Val = LHS->getOperand(0);
3501 
3502  SDValue ShlRHS = LHS->getOperand(1);
3503  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3504  if (!ShlCnst) {
3505  // Shift amount must be constant
3506  return false;
3507  }
3508  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3509 
3510  SDValue ShrRHS = RHS;
3511  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3512  if (!ShrCnst) {
3513  // Shift amount must be constant
3514  return false;
3515  }
3516  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3517 
3518  // To avoid extra codegen and be profitable, we need Outer >= Inner
3519  if (OuterShiftAmt < InnerShiftAmt) {
3520  return false;
3521  }
3522 
3523  // If the outer shift is more than the type size, we have no bitfield to
3524  // extract (since we also check that the inner shift is <= the outer shift
3525  // then this also implies that the inner shift is < the type size)
3526  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3527  return false;
3528  }
3529 
3530  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3531  MVT::i32);
3532  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3533  DL, MVT::i32);
3534 
3535  if (N->getOpcode() == ISD::SRA) {
3536  // If we have a arithmetic right shift, we need to use the signed bfe
3537  // variant
3538  IsSigned = true;
3539  }
3540  } else {
3541  // No can do...
3542  return false;
3543  }
3544  } else {
3545  // No can do...
3546  return false;
3547  }
3548 
3549 
3550  unsigned Opc;
3551  // For the BFE operations we form here from "and" and "srl", always use the
3552  // unsigned variants.
3553  if (Val.getValueType() == MVT::i32) {
3554  if (IsSigned) {
3555  Opc = NVPTX::BFE_S32rii;
3556  } else {
3557  Opc = NVPTX::BFE_U32rii;
3558  }
3559  } else if (Val.getValueType() == MVT::i64) {
3560  if (IsSigned) {
3561  Opc = NVPTX::BFE_S64rii;
3562  } else {
3563  Opc = NVPTX::BFE_U64rii;
3564  }
3565  } else {
3566  // We cannot handle this type
3567  return false;
3568  }
3569 
3570  SDValue Ops[] = {
3571  Val, Start, Len
3572  };
3573 
3574  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3575  return true;
3576 }
3577 
3578 // SelectDirectAddr - Match a direct address for DAG.
3579 // A direct address could be a globaladdress or externalsymbol.
3580 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3581  // Return true if TGA or ES.
3582  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3583  N.getOpcode() == ISD::TargetExternalSymbol) {
3584  Address = N;
3585  return true;
3586  }
3587  if (N.getOpcode() == NVPTXISD::Wrapper) {
3588  Address = N.getOperand(0);
3589  return true;
3590  }
3591  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3592  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3593  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3595  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3596  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3597  }
3598  return false;
3599 }
3600 
3601 // symbol+offset
3602 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3603  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3604  if (Addr.getOpcode() == ISD::ADD) {
3605  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3606  SDValue base = Addr.getOperand(0);
3607  if (SelectDirectAddr(base, Base)) {
3608  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3609  mvt);
3610  return true;
3611  }
3612  }
3613  }
3614  return false;
3615 }
3616 
3617 // symbol+offset
3618 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3619  SDValue &Base, SDValue &Offset) {
3620  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3621 }
3622 
3623 // symbol+offset
3624 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3625  SDValue &Base, SDValue &Offset) {
3626  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3627 }
3628 
3629 // register+offset
3630 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3631  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3632  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3633  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3634  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3635  return true;
3636  }
3637  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3638  Addr.getOpcode() == ISD::TargetGlobalAddress)
3639  return false; // direct calls.
3640 
3641  if (Addr.getOpcode() == ISD::ADD) {
3642  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3643  return false;
3644  }
3645  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3646  if (FrameIndexSDNode *FIN =
3647  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3648  // Constant offset from frame ref.
3649  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3650  else
3651  Base = Addr.getOperand(0);
3652  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3653  mvt);
3654  return true;
3655  }
3656  }
3657  return false;
3658 }
3659 
3660 // register+offset
3661 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3662  SDValue &Base, SDValue &Offset) {
3663  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3664 }
3665 
3666 // register+offset
3667 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3668  SDValue &Base, SDValue &Offset) {
3669  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3670 }
3671 
3672 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3673  unsigned int spN) const {
3674  const Value *Src = nullptr;
3675  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3676  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3677  return true;
3678  Src = mN->getMemOperand()->getValue();
3679  }
3680  if (!Src)
3681  return false;
3682  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3683  return (PT->getAddressSpace() == spN);
3684  return false;
3685 }
3686 
3687 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3688 /// inline asm expressions.
3690  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3691  SDValue Op0, Op1;
3692  switch (ConstraintID) {
3693  default:
3694  return true;
3695  case InlineAsm::Constraint_m: // memory
3696  if (SelectDirectAddr(Op, Op0)) {
3697  OutOps.push_back(Op0);
3698  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3699  return false;
3700  }
3701  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3702  OutOps.push_back(Op0);
3703  OutOps.push_back(Op1);
3704  return false;
3705  }
3706  break;
3707  }
3708  return true;
3709 }
3710 
3711 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3712 /// conversion from \p SrcTy to \p DestTy.
3713 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3714  bool IsSigned) {
3715  switch (SrcTy.SimpleTy) {
3716  default:
3717  llvm_unreachable("Unhandled source type");
3718  case MVT::i8:
3719  switch (DestTy.SimpleTy) {
3720  default:
3721  llvm_unreachable("Unhandled dest type");
3722  case MVT::i16:
3723  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3724  case MVT::i32:
3725  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3726  case MVT::i64:
3727  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3728  }
3729  case MVT::i16:
3730  switch (DestTy.SimpleTy) {
3731  default:
3732  llvm_unreachable("Unhandled dest type");
3733  case MVT::i8:
3734  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3735  case MVT::i32:
3736  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3737  case MVT::i64:
3738  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3739  }
3740  case MVT::i32:
3741  switch (DestTy.SimpleTy) {
3742  default:
3743  llvm_unreachable("Unhandled dest type");
3744  case MVT::i8:
3745  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3746  case MVT::i16:
3747  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3748  case MVT::i64:
3749  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3750  }
3751  case MVT::i64:
3752  switch (DestTy.SimpleTy) {
3753  default:
3754  llvm_unreachable("Unhandled dest type");
3755  case MVT::i8:
3756  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3757  case MVT::i16:
3758  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3759  case MVT::i32:
3760  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3761  }
3762  }
3763 }
llvm::NVPTXISD::Suld1DI16Clamp
@ Suld1DI16Clamp
Definition: NVPTXISelLowering.h:254
i
i
Definition: README.txt:29
llvm::NVPTXISD::TexUnified2DU32FloatLevel
@ TexUnified2DU32FloatLevel
Definition: NVPTXISelLowering.h:201
llvm::NVPTXISD::Suld3DV2I64Trap
@ Suld3DV2I64Trap
Definition: NVPTXISelLowering.h:368
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1437
llvm::NVPTXISD::Suld2DV2I32Clamp
@ Suld2DV2I32Clamp
Definition: NVPTXISelLowering.h:283
llvm::NVPTXISD::Tex1DFloatS32
@ Tex1DFloatS32
Definition: NVPTXISelLowering.h:83
llvm::NVPTXISD::Tld4UnifiedR2DU64Float
@ Tld4UnifiedR2DU64Float
Definition: NVPTXISelLowering.h:247
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1582
llvm::NVPTXISD::Suld1DArrayI16Zero
@ Suld1DArrayI16Zero
Definition: NVPTXISelLowering.h:386
llvm::NVPTXISD::Suld1DV4I16Trap
@ Suld1DV4I16Trap
Definition: NVPTXISelLowering.h:322
llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel
@ TexUnifiedCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:236
llvm::NVPTXISD::TexUnified1DU32S32
@ TexUnified1DU32S32
Definition: NVPTXISelLowering.h:175
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1448
llvm::NVPTXISD::TexCubeArrayU32Float
@ TexCubeArrayU32Float
Definition: NVPTXISelLowering.h:153
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1433
llvm::NVPTXISD::Suld1DArrayV4I8Trap
@ Suld1DArrayV4I8Trap
Definition: NVPTXISelLowering.h:333
llvm::AddrSpaceCastSDNode::getSrcAddressSpace
unsigned getSrcAddressSpace() const
Definition: SelectionDAGNodes.h:1266
llvm::NVPTXISD::Suld1DArrayI32Clamp
@ Suld1DArrayI32Clamp
Definition: NVPTXISelLowering.h:267
llvm::NVPTXISD::TexUnified3DFloatFloat
@ TexUnified3DFloatFloat
Definition: NVPTXISelLowering.h:216
llvm::NVPTXISD::Suld2DArrayI32Zero
@ Suld2DArrayI32Zero
Definition: NVPTXISelLowering.h:411
llvm::NVPTXISD::Suld2DArrayV4I32Trap
@ Suld2DArrayV4I32Trap
Definition: NVPTXISelLowering.h:359
llvm::NVPTXISD::Suld1DArrayI64Trap
@ Suld1DArrayI64Trap
Definition: NVPTXISelLowering.h:328
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::NVPTXISD::Suld2DArrayV4I16Zero
@ Suld2DArrayV4I16Zero
Definition: NVPTXISelLowering.h:418
llvm::NVPTXDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: NVPTXISelDAGToDAG.cpp:44
llvm::NVPTX::PTXLdStInstCode::SHARED
@ SHARED
Definition: NVPTX.h:111
llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat
@ Tld4UnifiedR2DFloatFloat
Definition: NVPTXISelLowering.h:239
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel
@ TexUnifiedCubeU32FloatLevel
Definition: NVPTXISelLowering.h:232
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::NVPTXISD::LoadParamV2
@ LoadParamV2
Definition: NVPTXISelLowering.h:71
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1445
llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel
@ TexUnifiedCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:228
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::NVPTXISD::LoadV4
@ LoadV4
Definition: NVPTXISelLowering.h:63
llvm::NVPTXISD::Suld2DArrayV2I8Clamp
@ Suld2DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:293
llvm::NVPTXISD::Suld1DArrayV4I16Zero
@ Suld1DArrayV4I16Zero
Definition: NVPTXISelLowering.h:394
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1449
llvm::NVPTXISD::MoveParam
@ MoveParam
Definition: NVPTXISelLowering.h:47
llvm::NVPTXISD::TexCubeS32Float
@ TexCubeS32Float
Definition: NVPTXISelLowering.h:145
llvm::NVPTXISD::TexUnified1DArrayU32Float
@ TexUnified1DArrayU32Float
Definition: NVPTXISelLowering.h:188
llvm::NVPTXISD::TexUnified1DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
Definition: NVPTXISelLowering.h:169
llvm::NVPTXISD::Tex1DU32S32
@ Tex1DU32S32
Definition: NVPTXISelLowering.h:91
llvm::NVPTXSubtarget::hasLDG
bool hasLDG() const
Definition: NVPTXSubtarget.h:75
llvm::NVPTX::PTXCmpMode::EQU
@ EQU
Definition: NVPTX.h:162
llvm::NVPTXISD::Suld1DI16Zero
@ Suld1DI16Zero
Definition: NVPTXISelLowering.h:374
AtomicOrdering.h
llvm::NVPTXISD::Suld3DI16Zero
@ Suld3DI16Zero
Definition: NVPTXISelLowering.h:422
llvm::NVPTXISD::Suld2DV2I32Trap
@ Suld2DV2I32Trap
Definition: NVPTXISelLowering.h:343
llvm::NVPTXISD::Tex1DArrayS32Float
@ Tex1DArrayS32Float
Definition: NVPTXISelLowering.h:100
llvm::NVPTXISD::Suld3DI8Zero
@ Suld3DI8Zero
Definition: NVPTXISelLowering.h:421
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::NVPTXISD::Suld1DArrayV4I32Trap
@ Suld1DArrayV4I32Trap
Definition: NVPTXISelLowering.h:335
llvm::NVPTXISD::TexUnifiedCubeS32Float
@ TexUnifiedCubeS32Float
Definition: NVPTXISelLowering.h:229
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
getCodeAddrSpace
static unsigned int getCodeAddrSpace(MemSDNode *N)
Definition: NVPTXISelDAGToDAG.cpp:656
llvm::NVPTX::PTXCmpMode::NotANumber
@ NotANumber
Definition: NVPTX.h:170
llvm::NVPTXISD::Tex2DS32Float
@ Tex2DS32Float
Definition: NVPTXISelLowering.h:112
llvm::NVPTXISD::Tld4G2DS64Float
@ Tld4G2DS64Float
Definition: NVPTXISelLowering.h:160
llvm::NVPTXISD::Tex1DArrayFloatFloatLevel
@ Tex1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:97
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::NVPTX::PTXCmpMode::NUM
@ NUM
Definition: NVPTX.h:168
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:48
llvm::NVPTXISD::Suld3DV2I32Trap
@ Suld3DV2I32Trap
Definition: NVPTXISelLowering.h:367
llvm::NVPTXISD::Suld3DV2I8Clamp
@ Suld3DV2I8Clamp
Definition: NVPTXISelLowering.h:305
getLdStRegType
static int getLdStRegType(EVT VT)
Definition: NVPTXISelDAGToDAG.cpp:842
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::NVPTX::PTXLdStInstCode::LOCAL
@ LOCAL
Definition: NVPTX.h:113
Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1444
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:386
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9439
llvm::NVPTXISD::Tld4A2DS64Float
@ Tld4A2DS64Float
Definition: NVPTXISelLowering.h:162
llvm::NVPTXISD::Suld1DV2I64Trap
@ Suld1DV2I64Trap
Definition: NVPTXISelLowering.h:320
llvm::NVPTXISD::TexUnified1DArrayS32S32
@ TexUnified1DArrayS32S32
Definition: NVPTXISelLowering.h:183
llvm::NVPTXISD::TexUnified2DArrayFloatS32
@ TexUnified2DArrayFloatS32
Definition: NVPTXISelLowering.h:203
llvm::NVPTXISD::Suld1DArrayV2I32Clamp
@ Suld1DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:271
llvm::NVPTXISD::Tex3DFloatFloatLevel
@ Tex3DFloatFloatLevel
Definition: NVPTXISelLowering.h:133
llvm::NVPTXISD::Suld1DV2I32Clamp
@ Suld1DV2I32Clamp
Definition: NVPTXISelLowering.h:259
ErrorHandling.h
llvm::NVPTXISD::Tld4UnifiedA2DU64Float
@ Tld4UnifiedA2DU64Float
Definition: NVPTXISelLowering.h:250
llvm::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29
llvm::NVPTXISD::Tld4UnifiedR2DS64Float
@ Tld4UnifiedR2DS64Float
Definition: NVPTXISelLowering.h:243
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
ValueTracking.h
llvm::NVPTXISD::Tld4B2DS64Float
@ Tld4B2DS64Float
Definition: NVPTXISelLowering.h:161
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::NVPTXISD::TexUnified2DU32FloatGrad
@ TexUnified2DU32FloatGrad
Definition: NVPTXISelLowering.h:202
llvm::NVPTXISD::TexUnified3DS32FloatGrad
@ TexUnified3DS32FloatGrad
Definition: NVPTXISelLowering.h:222
llvm::NVPTXISD::Tex2DArrayFloatS32
@ Tex2DArrayFloatS32
Definition: NVPTXISelLowering.h:119
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2344
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:282
llvm::NVPTX::PTXCmpMode::GEU
@ GEU
Definition: NVPTX.h:167
llvm::NVPTXISD::Suld2DI16Trap
@ Suld2DI16Trap
Definition: NVPTXISelLowering.h:338
llvm::NVPTXISD::TexCubeFloatFloatLevel
@ TexCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:144
llvm::NVPTXISD::TexUnifiedCubeArrayU32Float
@ TexUnifiedCubeArrayU32Float
Definition: NVPTXISelLowering.h:237
llvm::NVPTXISD::Tld4B2DU64Float
@ Tld4B2DU64Float
Definition: NVPTXISelLowering.h:165
llvm::NVPTXISD::Tex1DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:101
llvm::NVPTXISD::Tld4UnifiedB2DS64Float
@ Tld4UnifiedB2DS64Float
Definition: NVPTXISelLowering.h:245
llvm::NVPTXISD::TexUnified1DArrayS32Float
@ TexUnified1DArrayS32Float
Definition: NVPTXISelLowering.h:184
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1439
llvm::NVPTXISD::Suld3DI64Trap
@ Suld3DI64Trap
Definition: NVPTXISelLowering.h:364
llvm::NVPTXISD::Suld1DV2I64Zero
@ Suld1DV2I64Zero
Definition: NVPTXISelLowering.h:380
llvm::NVPTXISD::Tex3DFloatS32
@ Tex3DFloatS32
Definition: NVPTXISelLowering.h:131
llvm::NVPTXISD::TexUnified2DArrayU32S32
@ TexUnified2DArrayU32S32
Definition: NVPTXISelLowering.h:211
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::NVPTXISD::Suld1DV4I8Clamp
@ Suld1DV4I8Clamp
Definition: NVPTXISelLowering.h:261
llvm::NVPTXISD::Suld3DV4I32Trap
@ Suld3DV4I32Trap
Definition: NVPTXISelLowering.h:371
llvm::NVPTXISD::Suld2DV2I16Clamp
@ Suld2DV2I16Clamp
Definition: NVPTXISelLowering.h:282
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::NVPTXISD::Tex3DS32S32
@ Tex3DS32S32
Definition: NVPTXISelLowering.h:135
llvm::NVPTXISD::Suld3DV2I32Clamp
@ Suld3DV2I32Clamp
Definition: NVPTXISelLowering.h:307
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1275
llvm::NVPTXISD::TexUnified3DS32S32
@ TexUnified3DS32S32
Definition: NVPTXISelLowering.h:219
llvm::NVPTXISD::Suld2DArrayI8Trap
@ Suld2DArrayI8Trap
Definition: NVPTXISelLowering.h:349
getPTXCmpMode
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
Definition: NVPTXISelDAGToDAG.cpp:537
llvm::NVPTXISD::Suld1DArrayV4I16Trap
@ Suld1DArrayV4I16Trap
Definition: NVPTXISelLowering.h:334
llvm::NVPTXISD::Suld1DV4I32Trap
@ Suld1DV4I32Trap
Definition: NVPTXISelLowering.h:323
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::NVPTXISD::Suld3DI32Trap
@ Suld3DI32Trap
Definition: NVPTXISelLowering.h:363
llvm::NVPTXISD::Suld2DI8Zero
@ Suld2DI8Zero
Definition: NVPTXISelLowering.h:397
llvm::NVPTXISD::Suld2DV4I32Clamp
@ Suld2DV4I32Clamp
Definition: NVPTXISelLowering.h:287
llvm::NVPTXISD::Tld4R2DS64Float
@ Tld4R2DS64Float
Definition: NVPTXISelLowering.h:159
llvm::NVPTXISD::Suld2DArrayV2I64Trap
@ Suld2DArrayV2I64Trap
Definition: NVPTXISelLowering.h:356
llvm::NVPTXISD::TexUnified1DArrayU32S32
@ TexUnified1DArrayU32S32
Definition: NVPTXISelLowering.h:187
llvm::NVPTXISD::Tex2DArrayU32FloatLevel
@ Tex2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:129
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2332
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1427
llvm::NVPTXISD::TexUnified2DS32S32
@ TexUnified2DS32S32
Definition: NVPTXISelLowering.h:195
llvm::NVPTXISD::TexUnifiedCubeArrayS32Float
@ TexUnifiedCubeArrayS32Float
Definition: NVPTXISelLowering.h:235
llvm::NVPTXISD::Suld3DV2I64Zero
@ Suld3DV2I64Zero
Definition: NVPTXISelLowering.h:428
llvm::NVPTXISD::Suld2DV2I8Trap
@ Suld2DV2I8Trap
Definition: NVPTXISelLowering.h:341
llvm::NVPTXISD::Suld1DV2I8Trap
@ Suld1DV2I8Trap
Definition: NVPTXISelLowering.h:317
llvm::NVPTXISD::TexUnified2DS32FloatLevel
@ TexUnified2DS32FloatLevel
Definition: NVPTXISelLowering.h:197
llvm::NVPTXISD::LDGV4
@ LDGV4
Definition: NVPTXISelLowering.h:65
llvm::NVPTXISD::Tex1DArrayS32FloatGrad
@ Tex1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:102
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1435
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:26
llvm::NVPTXISD::Tex2DFloatFloatLevel
@ Tex2DFloatFloatLevel
Definition: NVPTXISelLowering.h:109
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::NVPTXISD::TexUnified3DU32Float
@ TexUnified3DU32Float
Definition: NVPTXISelLowering.h:224
llvm::NVPTXISD::Tex1DU32FloatLevel
@ Tex1DU32FloatLevel
Definition: NVPTXISelLowering.h:93
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::NVPTXISD::Suld3DV4I16Clamp
@ Suld3DV4I16Clamp
Definition: NVPTXISelLowering.h:310
llvm::NVPTXISD::Suld1DArrayV2I32Trap
@ Suld1DArrayV2I32Trap
Definition: NVPTXISelLowering.h:331
llvm::NVPTXISD::Suld1DI32Trap
@ Suld1DI32Trap
Definition: NVPTXISelLowering.h:315
NVPTXUtilities.h
llvm::NVPTXISD::Suld2DArrayV4I8Clamp
@ Suld2DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:297
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::NVPTXISD::Suld1DArrayV4I8Zero
@ Suld1DArrayV4I8Zero
Definition: NVPTXISelLowering.h:393
llvm::NVPTXISD::Suld3DV2I8Zero
@ Suld3DV2I8Zero
Definition: NVPTXISelLowering.h:425
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::NVPTXISD::Suld2DArrayI64Clamp
@ Suld2DArrayI64Clamp
Definition: NVPTXISelLowering.h:292
llvm::NVPTXISD::Suld1DV4I16Clamp
@ Suld1DV4I16Clamp
Definition: NVPTXISelLowering.h:262
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
NVPTXISelDAGToDAG.h
llvm::NVPTXISD::TexCubeU32Float
@ TexCubeU32Float
Definition: NVPTXISelLowering.h:147
llvm::NVPTXISD::Suld2DArrayV2I64Zero
@ Suld2DArrayV2I64Zero
Definition: NVPTXISelLowering.h:416
llvm::NVPTXISD::Suld3DI16Clamp
@ Suld3DI16Clamp
Definition: NVPTXISelLowering.h:302
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:210
llvm::NVPTXISD::Tex1DS32S32
@ Tex1DS32S32
Definition: NVPTXISelLowering.h:87
llvm::NVPTXISD::LoadParamV4
@ LoadParamV4
Definition: NVPTXISelLowering.h:72
llvm::NVPTXISD::Suld3DI32Zero
@ Suld3DI32Zero
Definition: NVPTXISelLowering.h:423
GlobalValue.h
llvm::NVPTXISD::Suld2DV4I8Zero
@ Suld2DV4I8Zero
Definition: NVPTXISelLowering.h:405
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:720
llvm::NVPTXISD::Suld1DV4I8Trap
@ Suld1DV4I8Trap
Definition: NVPTXISelLowering.h:321
llvm::NVPTXISD::Suld2DV4I16Zero
@ Suld2DV4I16Zero
Definition: NVPTXISelLowering.h:406
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat
@ Tld4UnifiedB2DFloatFloat
Definition: NVPTXISelLowering.h:241
llvm::NVPTXISD::Suld3DV4I32Zero
@ Suld3DV4I32Zero
Definition: NVPTXISelLowering.h:431
llvm::NVPTXISD::Suld2DArrayV4I32Zero
@ Suld2DArrayV4I32Zero
Definition: NVPTXISelLowering.h:419
llvm::NVPTXISD::Suld3DV2I8Trap
@ Suld3DV2I8Trap
Definition: NVPTXISelLowering.h:365
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1446
llvm::NVPTXISD::Tex3DFloatFloatGrad
@ Tex3DFloatFloatGrad
Definition: NVPTXISelLowering.h:134
llvm::NVPTXISD::Tex2DArrayFloatFloatLevel
@ Tex2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:121
llvm::NVPTXISD::TexUnified1DFloatS32
@ TexUnified1DFloatS32
Definition: NVPTXISelLowering.h:167
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::NVPTXISD::TexCubeArrayS32FloatLevel
@ TexCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:152
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::NVPTXISD::LoadV2
@ LoadV2
Definition: NVPTXISelLowering.h:62
llvm::NVPTXISD::Suld1DArrayI32Zero
@ Suld1DArrayI32Zero
Definition: NVPTXISelLowering.h:387
llvm::NVPTXISD::Tex2DArrayS32S32
@ Tex2DArrayS32S32
Definition: NVPTXISelLowering.h:123
llvm::NVPTXISD::Suld2DV2I64Zero
@ Suld2DV2I64Zero
Definition: NVPTXISelLowering.h:404
llvm::SelectionDAGISel::OptLevel
CodeGenOpt::Level OptLevel
Definition: SelectionDAGISel.h:54
llvm::NVPTXISD::Tex1DU32FloatGrad
@ Tex1DU32FloatGrad
Definition: NVPTXISelLowering.h:94
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::NVPTXISD::Suld2DI32Clamp
@ Suld2DI32Clamp
Definition: NVPTXISelLowering.h:279
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:452
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
EQ
#define EQ(a, b)
Definition: regexec.c:112
llvm::NVPTXISD::Tex3DS32Float
@ Tex3DS32Float
Definition: NVPTXISelLowering.h:136
llvm::NVPTXISD::Suld2DV2I64Clamp
@ Suld2DV2I64Clamp
Definition: NVPTXISelLowering.h:284
llvm::NVPTXISD::TexUnified2DFloatFloat
@ TexUnified2DFloatFloat
Definition: NVPTXISelLowering.h:192
llvm::NVPTXISD::Tex2DFloatFloat
@ Tex2DFloatFloat
Definition: NVPTXISelLowering.h:108
llvm::NVPTXISD::Suld1DV2I8Clamp
@ Suld1DV2I8Clamp
Definition: NVPTXISelLowering.h:257
llvm::NVPTXISD::Suld1DArrayV2I8Zero
@ Suld1DArrayV2I8Zero
Definition: NVPTXISelLowering.h:389
llvm::NVPTXISD::TexUnifiedCubeFloatFloat
@ TexUnifiedCubeFloatFloat
Definition: NVPTXISelLowering.h:227
llvm::NVPTXISD::Tld4G2DFloatFloat
@ Tld4G2DFloatFloat
Definition: NVPTXISelLowering.h:156
Param
Value * Param
Definition: NVPTXLowerArgs.cpp:165
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:341
llvm::NVPTXISD::StoreRetval
@ StoreRetval
Definition: NVPTXISelLowering.h:78
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::NVPTXISD::Tld4UnifiedB2DU64Float
@ Tld4UnifiedB2DU64Float
Definition: NVPTXISelLowering.h:249
llvm::NVPTXISD::Suld2DArrayI16Clamp
@ Suld2DArrayI16Clamp
Definition: NVPTXISelLowering.h:290
llvm::NVPTXISD::Tex2DArrayU32FloatGrad
@ Tex2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:130
llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel
@ TexUnifiedCubeS32FloatLevel
Definition: NVPTXISelLowering.h:230
llvm::NVPTXISD::TexUnified1DFloatFloatGrad
@ TexUnified1DFloatFloatGrad
Definition: NVPTXISelLowering.h:170
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:231
llvm::NVPTXISD::TexUnified1DArrayFloatS32
@ TexUnified1DArrayFloatS32
Definition: NVPTXISelLowering.h:179
llvm::NVPTXISD::Suld1DArrayV2I16Trap
@ Suld1DArrayV2I16Trap
Definition: NVPTXISelLowering.h:330
llvm::NVPTXISD::Suld1DArrayV2I32Zero
@ Suld1DArrayV2I32Zero
Definition: NVPTXISelLowering.h:391
llvm::ISD::ADDRSPACECAST
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:890
llvm::NVPTXISD::Suld1DV4I16Zero
@ Suld1DV4I16Zero
Definition: NVPTXISelLowering.h:382
llvm::NVPTXISD::Tex2DU32FloatLevel
@ Tex2DU32FloatLevel
Definition: NVPTXISelLowering.h:117
llvm::NVPTXISD::TexUnified1DU32Float
@ TexUnified1DU32Float
Definition: NVPTXISelLowering.h:176
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::NVPTXISD::Suld2DArrayV4I8Trap
@ Suld2DArrayV4I8Trap
Definition: NVPTXISelLowering.h:357
llvm::NVPTXISD::Suld2DArrayI8Clamp
@ Suld2DArrayI8Clamp
Definition: NVPTXISelLowering.h:289
llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:238
llvm::NVPTXISD::Tex1DArrayU32S32
@ Tex1DArrayU32S32
Definition: NVPTXISelLowering.h:103
llvm::NVPTXISD::Suld1DArrayV4I16Clamp
@ Suld1DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:274
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::NVPTXTargetLowering::usePrecSqrtF32
bool usePrecSqrtF32() const
Definition: NVPTXISelLowering.cpp:105
llvm::NVPTXISD::Tex1DS32Float
@ Tex1DS32Float
Definition: NVPTXISelLowering.h:88
llvm::NVPTXISD::Suld2DArrayV4I32Clamp
@ Suld2DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:299
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::NVPTXISD::Suld1DV2I8Zero
@ Suld1DV2I8Zero
Definition: NVPTXISelLowering.h:377
llvm::NVPTXISD::Suld2DV4I8Clamp
@ Suld2DV4I8Clamp
Definition: NVPTXISelLowering.h:285
llvm::NVPTXISD::Tex2DArrayU32S32
@ Tex2DArrayU32S32
Definition: NVPTXISelLowering.h:127
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1163
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1785
llvm::NVPTXISD::StoreV4
@ StoreV4
Definition: NVPTXISelLowering.h:69
llvm::NVPTXISD::TexCubeArrayFloatFloat
@ TexCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:149
llvm::NVPTXISD::LDGV2
@ LDGV2
Definition: NVPTXISelLowering.h:64
llvm::NVPTXISD::TexUnified3DS32FloatLevel
@ TexUnified3DS32FloatLevel
Definition: NVPTXISelLowering.h:221
llvm::NVPTXTargetMachine::useShortPointers
bool useShortPointers() const
Definition: NVPTXTargetMachine.h:49
llvm::NVPTXISD::Suld2DV4I32Zero
@ Suld2DV4I32Zero
Definition: NVPTXISelLowering.h:407
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1430
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
llvm::NVPTXISD::Suld1DI64Clamp
@ Suld1DI64Clamp
Definition: NVPTXISelLowering.h:256
llvm::NVPTXISD::LDUV4
@ LDUV4
Definition: NVPTXISelLowering.h:67
llvm::NVPTXISD::Suld1DV4I32Clamp
@ Suld1DV4I32Clamp
Definition: NVPTXISelLowering.h:263
llvm::NVPTXISD::Suld2DArrayV2I32Trap
@ Suld2DArrayV2I32Trap
Definition: NVPTXISelLowering.h:355
llvm::NVPTXISD::Tex2DS32FloatGrad
@ Tex2DS32FloatGrad
Definition: NVPTXISelLowering.h:114
llvm::NVPTXISD::TexUnified2DFloatFloatGrad
@ TexUnified2DFloatFloatGrad
Definition: NVPTXISelLowering.h:194
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::NVPTXISD::Suld3DV4I16Trap
@ Suld3DV4I16Trap
Definition: NVPTXISelLowering.h:370
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:199
llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel
@ TexUnified1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:185
llvm::NVPTXISD::Suld2DV2I16Zero
@ Suld2DV2I16Zero
Definition: NVPTXISelLowering.h:402
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1314
llvm::NVPTXISD::Suld1DI8Clamp
@ Suld1DI8Clamp
Definition: NVPTXISelLowering.h:253
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1431
llvm::omp::RTLDependInfoFields::Len
@ Len
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1436
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:538
llvm::NVPTXISD::StoreRetvalV4
@ StoreRetvalV4
Definition: NVPTXISelLowering.h:80
llvm::NVPTXISD::TexCubeU32FloatLevel
@ TexCubeU32FloatLevel
Definition: NVPTXISelLowering.h:148
llvm::NVPTXISD::Tex2DArrayFloatFloat
@ Tex2DArrayFloatFloat
Definition: NVPTXISelLowering.h:120
llvm::NVPTXISD::TexUnified3DS32Float
@ TexUnified3DS32Float
Definition: NVPTXISelLowering.h:220
llvm::NVPTXISD::Tex2DArrayS32Float
@ Tex2DArrayS32Float
Definition: NVPTXISelLowering.h:124
llvm::NVPTXISD::Tld4G2DU64Float
@ Tld4G2DU64Float
Definition: NVPTXISelLowering.h:164
llvm::NVPTXISD::Suld3DV4I8Zero
@ Suld3DV4I8Zero
Definition: NVPTXISelLowering.h:429
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXISD::TexCubeArrayU32FloatLevel
@ TexCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:154
llvm::NVPTXISD::TexCubeS32FloatLevel
@ TexCubeS32FloatLevel
Definition: NVPTXISelLowering.h:146
llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad
@ TexUnified1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:186
llvm::NVPTXISD::TexUnified3DU32FloatLevel
@ TexUnified3DU32FloatLevel
Definition: NVPTXISelLowering.h:225
llvm::NVPTXISD::Tld4A2DU64Float
@ Tld4A2DU64Float
Definition: NVPTXISelLowering.h:166
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:284
llvm::InlineAsm::Constraint_m
@ Constraint_m
Definition: InlineAsm.h:259
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1440
llvm::NVPTXISD::Suld3DV2I16Zero
@ Suld3DV2I16Zero
Definition: NVPTXISelLowering.h:426
llvm::NVPTXISD::Suld2DArrayI64Trap
@ Suld2DArrayI64Trap
Definition: NVPTXISelLowering.h:352
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:660
llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat
@ Tld4UnifiedG2DFloatFloat
Definition: NVPTXISelLowering.h:240
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:124
llvm::NVPTXISD::Suld3DI64Zero
@ Suld3DI64Zero
Definition: NVPTXISelLowering.h:424
llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad
@ TexUnified1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:182
llvm::pdb::PDB_ColorItem::Address
@ Address
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4551
llvm::NVPTXISD::Tex3DFloatFloat
@ Tex3DFloatFloat
Definition: NVPTXISelLowering.h:132
llvm::NVPTXISD::Suld1DArrayV2I8Clamp
@ Suld1DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:269
llvm::NVPTXISD::Tex1DArrayFloatS32
@ Tex1DArrayFloatS32
Definition: NVPTXISelLowering.h:95
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:419
llvm::NVPTXISD::Tex3DU32FloatLevel
@ Tex3DU32FloatLevel
Definition: NVPTXISelLowering.h:141
llvm::NVPTXISD::Tex2DArrayS32FloatLevel
@ Tex2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:125
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:147
uint64_t
llvm::NVPTXISD::Suld3DI8Trap
@ Suld3DI8Trap
Definition: NVPTXISelLowering.h:361
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
llvm::NVPTXISD::LoadParam
@ LoadParam
Definition: NVPTXISelLowering.h:70
llvm::NVPTXISD::Tex1DU32Float
@ Tex1DU32Float
Definition: NVPTXISelLowering.h:92
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::NVPTXISD::TexUnified2DU32S32
@ TexUnified2DU32S32
Definition: NVPTXISelLowering.h:199
llvm::NVPTXDAGToDAGISel::NVPTXDAGToDAGISel
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
Definition: NVPTXISelDAGToDAG.cpp:38
llvm::NVPTXISD::Tex1DArrayFloatFloat
@ Tex1DArrayFloatFloat
Definition: NVPTXISelLowering.h:96
llvm::NVPTXISD::Tex2DU32Float
@ Tex2DU32Float
Definition: NVPTXISelLowering.h:116
llvm::NVPTXISD::Suld1DArrayI16Trap
@ Suld1DArrayI16Trap
Definition: NVPTXISelLowering.h:326
llvm::NVPTXISD::TexUnified2DU32Float
@ TexUnified2DU32Float
Definition: NVPTXISelLowering.h:200
llvm::NVPTXISD::Suld1DV2I32Trap
@ Suld1DV2I32Trap
Definition: NVPTXISelLowering.h:319
llvm::NVPTXISD::Suld3DI64Clamp
@ Suld3DI64Clamp
Definition: NVPTXISelLowering.h:304
llvm::NVPTXISD::Suld2DV2I8Clamp
@ Suld2DV2I8Clamp
Definition: NVPTXISelLowering.h:281
llvm::NVPTXISD::Suld1DArrayI8Clamp
@ Suld1DArrayI8Clamp
Definition: NVPTXISelLowering.h:265
llvm::NVPTXISD::Tex3DS32FloatLevel
@ Tex3DS32FloatLevel
Definition: NVPTXISelLowering.h:137
llvm::NVPTXISD::Tex1DArrayU32FloatGrad
@ Tex1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:106
llvm::NVPTXISD::Tex2DArrayS32FloatGrad
@ Tex2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:126
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:213
llvm::NVPTX::PTXLdStInstCode::Float
@ Float
Definition: NVPTX.h:118
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
llvm::NVPTXISD::Suld2DArrayI16Trap
@ Suld2DArrayI16Trap
Definition: NVPTXISelLowering.h:350
NVPTXBaseInfo.h
llvm::NVPTXISD::Suld3DI32Clamp
@ Suld3DI32Clamp
Definition: NVPTXISelLowering.h:303
llvm::NVPTXISD::Suld1DArrayV4I32Clamp
@ Suld1DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:275
llvm::countTrailingOnes
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:491
llvm::CondCodeSDNode
Definition: SelectionDAGNodes.h:2274
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2359
llvm::MVT::v2bf16
@ v2bf16
Definition: MachineValueType.h:158
llvm::NVPTXISD::Suld2DArrayV2I32Zero
@ Suld2DArrayV2I32Zero
Definition: NVPTXISelLowering.h:415
llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad
@ TexUnified2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:206
canLowerToLDG
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
Definition: NVPTXISelDAGToDAG.cpp:676
pickOpcodeForVT
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
Definition: NVPTXISelDAGToDAG.cpp:812
llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel
@ TexUnified2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:209
llvm::NVPTXISD::TexUnified2DArrayS32Float
@ TexUnified2DArrayS32Float
Definition: NVPTXISelLowering.h:208
llvm::NVPTXISD::Suld1DArrayV2I64Trap
@ Suld1DArrayV2I64Trap
Definition: NVPTXISelLowering.h:332
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2372
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1428
llvm::NVPTXISD::Tex1DFloatFloatLevel
@ Tex1DFloatFloatLevel
Definition: NVPTXISelLowering.h:85
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::NVPTXISD::TexUnified3DFloatS32
@ TexUnified3DFloatS32
Definition: NVPTXISelLowering.h:215
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:233
llvm::NVPTXISD::TexUnified3DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
Definition: NVPTXISelLowering.h:217
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::NVPTXISD::Suld2DI32Zero
@ Suld2DI32Zero
Definition: NVPTXISelLowering.h:399
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:919
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::NVPTXISD::Suld3DV2I32Zero
@ Suld3DV2I32Zero
Definition: NVPTXISelLowering.h:427
llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:189
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1438
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1597
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::NVPTXISD::Suld1DArrayI16Clamp
@ Suld1DArrayI16Clamp
Definition: NVPTXISelLowering.h:266
llvm::NVPTXISD::Suld2DV2I32Zero
@ Suld2DV2I32Zero
Definition: NVPTXISelLowering.h:403
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
llvm::NVPTXISD::Tex2DFloatFloatGrad
@ Tex2DFloatFloatGrad
Definition: NVPTXISelLowering.h:110
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:49
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9877
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9645
llvm::NVPTXISD::TexUnified2DFloatFloatLevel
@ TexUnified2DFloatFloatLevel
Definition: NVPTXISelLowering.h:193
llvm::NVPTXISD::TexUnified1DU32FloatLevel
@ TexUnified1DU32FloatLevel
Definition: NVPTXISelLowering.h:177
llvm::NVPTXISD::Tld4R2DFloatFloat
@ Tld4R2DFloatFloat
Definition: NVPTXISelLowering.h:155
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2393
llvm::NVPTXISD::Suld2DArrayV2I32Clamp
@ Suld2DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:295
llvm::NVPTXISD::Tld4B2DFloatFloat
@ Tld4B2DFloatFloat
Definition: NVPTXISelLowering.h:157
llvm::NVPTXTargetLowering::getDivF32Level
int getDivF32Level() const
Definition: NVPTXISelLowering.cpp:92
llvm::NVPTXISD::Tex1DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:105
llvm::NVPTXISD::Tex1DS32FloatGrad
@ Tex1DS32FloatGrad
Definition: NVPTXISelLowering.h:90
llvm::NVPTXISD::Tex3DS32FloatGrad
@ Tex3DS32FloatGrad
Definition: NVPTXISelLowering.h:138
llvm::NVPTXISD::Suld2DArrayV2I16Clamp
@ Suld2DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:294
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:234
llvm::MVT::bf16
@ bf16
Definition: MachineValueType.h:55
llvm::NVPTXISD::LDUV2
@ LDUV2
Definition: NVPTXISelLowering.h:66
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::NVPTX::PTXLdStInstCode::FromType
FromType
Definition: NVPTX.h:115
llvm::NVPTXISD::TexUnified3DU32FloatGrad
@ TexUnified3DU32FloatGrad
Definition: NVPTXISelLowering.h:226
llvm::NVPTXISD::Suld2DI8Clamp
@ Suld2DI8Clamp
Definition: NVPTXISelLowering.h:277
llvm::NVPTXTargetLowering::useF32FTZ
bool useF32FTZ(const MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:115
llvm::NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
Definition: NVPTXISelDAGToDAG.cpp:3689
llvm::createNVPTXISelDag
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
Definition: NVPTXISelDAGToDAG.cpp:33
llvm::NVPTX::PTXLdStInstCode::CONSTANT
@ CONSTANT
Definition: NVPTX.h:110
llvm::NVPTXISD::TexCubeArrayS32Float
@ TexCubeArrayS32Float
Definition: NVPTXISelLowering.h:151
llvm::NVPTXISD::Tex1DArrayU32Float
@ Tex1DArrayU32Float
Definition: NVPTXISelLowering.h:104
llvm::NVPTX::PTXCmpMode::GTU
@ GTU
Definition: NVPTX.h:166
llvm::NVPTXISD::Suld2DI64Zero
@ Suld2DI64Zero
Definition: NVPTXISelLowering.h:400
llvm::NVPTX::PTXCmpMode::LEU
@ LEU
Definition: NVPTX.h:165
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::NVPTXISD::Suld2DArrayV4I16Clamp
@ Suld2DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:298
llvm::NVPTXISD::Suld2DArrayV2I8Trap
@ Suld2DArrayV2I8Trap
Definition: NVPTXISelLowering.h:353
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:152
llvm::NVPTX::PTXLdStInstCode::Signed
@ Signed
Definition: NVPTX.h:117
llvm::NVPTXISD::Suld3DV4I16Zero
@ Suld3DV4I16Zero
Definition: NVPTXISelLowering.h:430
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::NVPTXISD::Suld2DV4I32Trap
@ Suld2DV4I32Trap
Definition: NVPTXISelLowering.h:347
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::NVPTXISD::Tex2DS32S32
@ Tex2DS32S32
Definition: NVPTXISelLowering.h:111
llvm::NVPTXISD::TexUnified2DArrayFloatFloat
@ TexUnified2DArrayFloatFloat
Definition: NVPTXISelLowering.h:204
llvm::NVPTXISD::Suld2DArrayI16Zero
@ Suld2DArrayI16Zero
Definition: NVPTXISelLowering.h:410
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel
@ TexUnified1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:181
llvm::NVPTXISD::Tex3DU32S32
@ Tex3DU32S32
Definition: NVPTXISelLowering.h:139
llvm::NVPTXISD::Suld1DArrayV2I64Zero
@ Suld1DArrayV2I64Zero
Definition: NVPTXISelLowering.h:392
llvm::NVPTXISD::Tex1DFloatFloat
@ Tex1DFloatFloat
Definition: NVPTXISelLowering.h:84
llvm::NVPTXISD::TexUnified1DS32FloatLevel
@ TexUnified1DS32FloatLevel
Definition: NVPTXISelLowering.h:173
llvm::NVPTXISD::Suld2DV4I16Trap
@ Suld2DV4I16Trap
Definition: NVPTXISelLowering.h:346
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
llvm::NVPTXISD::Suld3DV2I16Trap
@ Suld3DV2I16Trap
Definition: NVPTXISelLowering.h:366
llvm::NVPTXISD::StoreParam
@ StoreParam
Definition: NVPTXISelLowering.h:73
llvm::NVPTXISD::Tex1DArrayFloatFloatGrad
@ Tex1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:98
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::IsVolatile
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
Definition: AMDGPUMetadata.h:199
llvm::NVPTXISD::Tld4A2DFloatFloat
@ Tld4A2DFloatFloat
Definition: NVPTXISelLowering.h:158
llvm::NVPTX::PTXLdStInstCode::Untyped
@ Untyped
Definition: NVPTX.h:119
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::NVPTXISD::Tld4UnifiedA2DS64Float
@ Tld4UnifiedA2DS64Float
Definition: NVPTXISelLowering.h:246
llvm::NVPTXISD::TexUnified1DArrayFloatFloat
@ TexUnified1DArrayFloatFloat
Definition: NVPTXISelLowering.h:180
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::NVPTXISD::Suld1DI16Trap
@ Suld1DI16Trap
Definition: NVPTXISelLowering.h:314
llvm::NVPTXISD::Suld1DV4I32Zero
@ Suld1DV4I32Zero
Definition: NVPTXISelLowering.h:383
llvm::NVPTXISD::TexUnified1DS32Float
@ TexUnified1DS32Float
Definition: NVPTXISelLowering.h:172
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::NVPTXISD::Suld1DArrayI64Clamp
@ Suld1DArrayI64Clamp
Definition: NVPTXISelLowering.h:268
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::NVPTXISD::Suld1DArrayI64Zero
@ Suld1DArrayI64Zero
Definition: NVPTXISelLowering.h:388
llvm::AddrSpaceCastSDNode::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: SelectionDAGNodes.h:1267
llvm::NVPTXISD::Tex2DArrayU32Float
@ Tex2DArrayU32Float
Definition: NVPTXISelLowering.h:128
llvm::NVPTXDAGToDAGISel
Definition: NVPTXISelDAGToDAG.h:27
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1442
llvm::NVPTXISD::Suld2DArrayI64Zero
@ Suld2DArrayI64Zero
Definition: NVPTXISelLowering.h:412
llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:440
llvm::NVPTXISD::Suld1DArrayI32Trap
@ Suld1DArrayI32Trap
Definition: NVPTXISelLowering.h:327
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:47
llvm::NVPTXISD::Suld1DArrayV2I16Zero
@ Suld1DArrayV2I16Zero
Definition: NVPTXISelLowering.h:390
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1447
llvm::NVPTX::PTXCmpMode::FTZ_FLAG
@ FTZ_FLAG
Definition: NVPTX.h:173
llvm::NVPTXISD::Suld1DV2I16Trap
@ Suld1DV2I16Trap
Definition: NVPTXISelLowering.h:318
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::NVPTXISD::TexUnified2DArrayS32S32
@ TexUnified2DArrayS32S32
Definition: NVPTXISelLowering.h:207
llvm::NVPTXISD::TexUnified3DU32S32
@ TexUnified3DU32S32
Definition: NVPTXISelLowering.h:223
llvm::NVPTXISD::Suld2DV2I16Trap
@ Suld2DV2I16Trap
Definition: NVPTXISelLowering.h:342
llvm::NVPTXISD::Suld2DI64Clamp
@ Suld2DI64Clamp
Definition: NVPTXISelLowering.h:280
llvm::NVPTXISD::Suld1DI32Zero
@ Suld1DI32Zero
Definition: NVPTXISelLowering.h:375
llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel
@ TexUnified2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:205
llvm::NVPTXISD::Suld3DI16Trap
@ Suld3DI16Trap
Definition: NVPTXISelLowering.h:362
llvm::NVPTXISD::StoreParamS32
@ StoreParamS32
Definition: NVPTXISelLowering.h:76
llvm::NVPTXISD::Tex3DU32Float
@ Tex3DU32Float
Definition: NVPTXISelLowering.h:140
llvm::NVPTX::PTXLdStInstCode::PARAM
@ PARAM
Definition: NVPTX.h:112
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:351
llvm::NVPTXISD::Tld4UnifiedG2DS64Float
@ Tld4UnifiedG2DS64Float
Definition: NVPTXISelLowering.h:244
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1366
llvm::NVPTXISD::Suld2DArrayI8Zero
@ Suld2DArrayI8Zero
Definition: NVPTXISelLowering.h:409
llvm::NVPTXISD::Suld1DArrayI8Zero
@ Suld1DArrayI8Zero
Definition: NVPTXISelLowering.h:385
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:469
llvm::NVPTXISD::Tld4R2DU64Float
@ Tld4R2DU64Float
Definition: NVPTXISelLowering.h:163
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:210
llvm::NVPTXISD::TexUnified3DFloatFloatGrad
@ TexUnified3DFloatFloatGrad
Definition: NVPTXISelLowering.h:218
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::NVPTXISD::TexUnified2DS32FloatGrad
@ TexUnified2DS32FloatGrad
Definition: NVPTXISelLowering.h:198
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1434
llvm::NVPTXISD::TexCubeFloatFloat
@ TexCubeFloatFloat
Definition: NVPTXISelLowering.h:143
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
llvm::AddrSpaceCastSDNode
Definition: SelectionDAGNodes.h:1257
llvm::NVPTXISD::Suld1DI8Trap
@ Suld1DI8Trap
Definition: NVPTXISelLowering.h:313
llvm::NVPTXISD::Suld3DV2I64Clamp
@ Suld3DV2I64Clamp
Definition: NVPTXISelLowering.h:308
llvm::NVPTX::PTXCmpMode::NEU
@ NEU
Definition: NVPTX.h:163
llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad
@ TexUnified1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:190
llvm::NVPTXISD::StoreV2
@ StoreV2
Definition: NVPTXISelLowering.h:68
MemRef
Definition: Lint.cpp:81
llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::NVPTXISD::Suld1DI8Zero
@ Suld1DI8Zero
Definition: NVPTXISelLowering.h:373
llvm::NVPTXISD::StoreParamV4
@ StoreParamV4
Definition: NVPTXISelLowering.h:75
llvm::AtomicSDNode::getVal
const SDValue & getVal() const
Definition: SelectionDAGNodes.h:1452
llvm::NVPTXISD::Suld3DV4I8Clamp
@ Suld3DV4I8Clamp
Definition: NVPTXISelLowering.h:309
llvm::NVPTX::PTXLdStInstCode::GLOBAL
@ GLOBAL
Definition: NVPTX.h:109
llvm::NVPTX::PTXLdStInstCode::GENERIC
@ GENERIC
Definition: NVPTX.h:108
llvm::NVPTXISD::Tex2DU32S32
@ Tex2DU32S32
Definition: NVPTXISelLowering.h:115
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1429
llvm::NVPTXISD::Suld2DArrayV4I8Zero
@ Suld2DArrayV4I8Zero
Definition: NVPTXISelLowering.h:417
Instructions.h
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat
@ Tld4UnifiedA2DFloatFloat
Definition: NVPTXISelLowering.h:242
llvm::NVPTXISD::Tex3DU32FloatGrad
@ Tex3DU32FloatGrad
Definition: NVPTXISelLowering.h:142
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:41
llvm::NVPTXISD::Suld2DI16Clamp
@ Suld2DI16Clamp
Definition: NVPTXISelLowering.h:278
llvm::NVPTXISD::TexUnified1DU32FloatGrad
@ TexUnified1DU32FloatGrad
Definition: NVPTXISelLowering.h:178
llvm::NVPTXISD::Suld1DV4I8Zero
@ Suld1DV4I8Zero
Definition: NVPTXISelLowering.h:381
TargetIntrinsicInfo.h
llvm::NVPTX::PTXCvtMode::NONE
@ NONE
Definition: NVPTX.h:131
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::NVPTXISD::TexUnified1DS32FloatGrad
@ TexUnified1DS32FloatGrad
Definition: NVPTXISelLowering.h:174
llvm::NVPTXISD::TexUnified1DS32S32
@ TexUnified1DS32S32
Definition: NVPTXISelLowering.h:171
N
#define N
llvm::NVPTXISD::Suld1DArrayV4I8Clamp
@ Suld1DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:273
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::SelectionDAG::getTargetConstantFP
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:701
llvm::NVPTXISD::Suld2DI32Trap
@ Suld2DI32Trap
Definition: NVPTXISelLowering.h:339
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::NVPTXISD::Suld2DV4I16Clamp
@ Suld2DV4I16Clamp
Definition: NVPTXISelLowering.h:286
llvm::NVPTXISD::Suld1DI32Clamp
@ Suld1DI32Clamp
Definition: NVPTXISelLowering.h:255
llvm::NVPTXISD::Suld1DV2I64Clamp
@ Suld1DV2I64Clamp
Definition: NVPTXISelLowering.h:260
llvm::NVPTXTargetLowering::allowUnsafeFPMath
bool allowUnsafeFPMath(MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:4466
llvm::NVPTXISD::Suld3DI8Clamp
@ Suld3DI8Clamp
Definition: NVPTXISelLowering.h:301
llvm::NVPTXISD::Suld2DV4I8Trap
@ Suld2DV4I8Trap
Definition: NVPTXISelLowering.h:345
llvm::NVPTXISD::Tex2DFloatS32
@ Tex2DFloatS32
Definition: NVPTXISelLowering.h:107
llvm::NVPTXISD::Suld2DI8Trap
@ Suld2DI8Trap
Definition: NVPTXISelLowering.h:337
llvm::NVPTXISD::Suld2DI64Trap
@ Suld2DI64Trap
Definition: NVPTXISelLowering.h:340
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:669
llvm::NVPTXDAGToDAGISel::Subtarget
const NVPTXSubtarget * Subtarget
Definition: NVPTXISelDAGToDAG.h:49
llvm::NVPTXISD::Suld3DV4I8Trap
@ Suld3DV4I8Trap
Definition: NVPTXISelLowering.h:369
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1432
llvm::DataLayout::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:412
llvm::NVPTXISD::Suld2DV2I8Zero
@ Suld2DV2I8Zero
Definition: NVPTXISelLowering.h:401
llvm::NVPTXISD::Tex2DS32FloatLevel
@ Tex2DS32FloatLevel
Definition: NVPTXISelLowering.h:113
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::NVPTXISD::Suld1DV2I16Zero
@ Suld1DV2I16Zero
Definition: NVPTXISelLowering.h:378
llvm::NVPTXISD::Tld4UnifiedG2DU64Float
@ Tld4UnifiedG2DU64Float
Definition: NVPTXISelLowering.h:248
llvm::NVPTXISD::TexUnified2DFloatS32
@ TexUnified2DFloatS32
Definition: NVPTXISelLowering.h:191
llvm::NVPTX::PTXCmpMode::CmpMode
CmpMode
Definition: NVPTX.h:151
llvm::NVPTXISD::Suld1DArrayV2I16Clamp
@ Suld1DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:270
llvm::NVPTXISD::Wrapper
@ Wrapper
Definition: NVPTXISelLowering.h:26
llvm::NVPTXISD::StoreParamV2
@ StoreParamV2
Definition: NVPTXISelLowering.h:74
llvm::NVPTXISD::StoreRetvalV2
@ StoreRetvalV2
Definition: NVPTXISelLowering.h:79
llvm::NVPTXISD::Suld2DArrayV2I16Trap
@ Suld2DArrayV2I16Trap
Definition: NVPTXISelLowering.h:354
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:440
llvm::NVPTX::PTXLdStInstCode::Unsigned
@ Unsigned
Definition: NVPTX.h:116
llvm::NVPTXISD::Suld1DArrayV2I64Clamp
@ Suld1DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:272
llvm::NVPTXISD::Suld1DArrayV2I8Trap
@ Suld1DArrayV2I8Trap
Definition: NVPTXISelLowering.h:329
llvm::NVPTXISD::Suld1DI64Trap
@ Suld1DI64Trap
Definition: NVPTXISelLowering.h:316
llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad
@ TexUnified2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:214
llvm::NVPTXISD::StoreParamU32
@ StoreParamU32
Definition: NVPTXISelLowering.h:77
llvm::NVPTXISD::TexUnified2DArrayU32Float
@ TexUnified2DArrayU32Float
Definition: NVPTXISelLowering.h:212
llvm::NVPTXISD::Tex2DU32FloatGrad
@ Tex2DU32FloatGrad
Definition: NVPTXISelLowering.h:118
llvm::NVPTXTargetLowering::allowFMA
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
Definition: NVPTXISelLowering.cpp:4449
llvm::NVPTXISD::SETP_F16X2
@ SETP_F16X2
Definition: NVPTXISelLowering.h:59
raw_ostream.h
llvm::NVPTXISD::Tex1DFloatFloatGrad
@ Tex1DFloatFloatGrad
Definition: NVPTXISelLowering.h:86
llvm::NVPTXISD::TexUnifiedCubeU32Float
@ TexUnifiedCubeU32Float
Definition: NVPTXISelLowering.h:231
llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:374
llvm::NVPTXISD::TexCubeArrayFloatFloatLevel
@ TexCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:150
llvm::NVPTXISD::Suld1DArrayI8Trap
@ Suld1DArrayI8Trap
Definition: NVPTXISelLowering.h:325
llvm::NVPTXISD::TexUnified2DS32Float
@ TexUnified2DS32Float
Definition: NVPTXISelLowering.h:196
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::NVPTXISD::Tex2DArrayFloatFloatGrad
@ Tex2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:122
llvm::NVPTXISD::Suld2DArrayV2I8Zero
@ Suld2DArrayV2I8Zero
Definition: NVPTXISelLowering.h:413
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::NVPTXISD::TexUnified1DFloatFloat
@ TexUnified1DFloatFloat
Definition: NVPTXISelLowering.h:168
llvm::NVPTXISD::Suld2DArrayV2I64Clamp
@ Suld2DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:296
Debug.h
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
llvm::NVPTXISD::Suld2DArrayI32Trap
@ Suld2DArrayI32Trap
Definition: NVPTXISelLowering.h:351
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1159
llvm::NVPTXISD::Suld2DI16Zero
@ Suld2DI16Zero
Definition: NVPTXISelLowering.h:398
llvm::NVPTXISD::Suld1DArrayV4I32Zero
@ Suld1DArrayV4I32Zero
Definition: NVPTXISelLowering.h:395
llvm::NVPTXISD::Suld1DV2I16Clamp
@ Suld1DV2I16Clamp
Definition: NVPTXISelLowering.h:258
llvm::NVPTXISD::Tex1DArrayS32S32
@ Tex1DArrayS32S32
Definition: NVPTXISelLowering.h:99
llvm::NVPTXISD::Suld1DI64Zero
@ Suld1DI64Zero
Definition: NVPTXISelLowering.h:376
llvm::NVPTXISD::Suld2DV2I64Trap
@ Suld2DV2I64Trap
Definition: NVPTXISelLowering.h:344
llvm::NVPTXISD::Suld2DArrayI32Clamp
@ Suld2DArrayI32Clamp
Definition: NVPTXISelLowering.h:291
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::NVPTXISD::Suld3DV4I32Clamp
@ Suld3DV4I32Clamp
Definition: NVPTXISelLowering.h:311
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::NVPTXISD::Suld2DArrayV2I16Zero
@ Suld2DArrayV2I16Zero
Definition: NVPTXISelLowering.h:414
llvm::NVPTXISD::Suld3DV2I16Clamp
@ Suld3DV2I16Clamp
Definition: NVPTXISelLowering.h:306
llvm::NVPTXISD::Suld1DV2I32Zero
@ Suld1DV2I32Zero
Definition: NVPTXISelLowering.h:379
llvm::NVPTXISD::Tex1DS32FloatLevel
@ Tex1DS32FloatLevel
Definition: NVPTXISelLowering.h:89
llvm::NVPTXISD::Suld2DArrayV4I16Trap
@ Suld2DArrayV4I16Trap
Definition: NVPTXISelLowering.h:358
llvm::NVPTX::PTXCmpMode::LTU
@ LTU
Definition: NVPTX.h:164