19#include "llvm/IR/IntrinsicsNVPTX.h"
29#define DEBUG_TYPE "nvptx-isel"
30#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
44 CodeGenOpt::Level OptLevel)
46 doMulWide = (OptLevel > 0);
54int NVPTXDAGToDAGISel::getDivF32Level()
const {
58bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
62bool NVPTXDAGToDAGISel::useF32FTZ()
const {
66bool NVPTXDAGToDAGISel::allowFMA()
const {
71bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
76bool NVPTXDAGToDAGISel::useShortPointers()
const {
82void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
84 if (
N->isMachineOpcode()) {
89 switch (
N->getOpcode()) {
101 if (tryEXTRACT_VECTOR_ELEMENT(
N))
110 if (tryLoadVector(
N))
122 if (tryStoreVector(
N))
134 if (tryStoreRetval(
N))
142 if (tryStoreParam(
N))
146 if (tryIntrinsicNoChain(
N))
150 if (tryIntrinsicChain(
N))
321 if (tryTextureIntrinsic(
N))
489 if (trySurfaceIntrinsic(
N))
500 SelectAddrSpaceCast(
N);
503 if (tryConstantFP16(
N))
512bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
513 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
517 case Intrinsic::nvvm_ldg_global_f:
518 case Intrinsic::nvvm_ldg_global_i:
519 case Intrinsic::nvvm_ldg_global_p:
520 case Intrinsic::nvvm_ldu_global_f:
521 case Intrinsic::nvvm_ldu_global_i:
522 case Intrinsic::nvvm_ldu_global_p:
529bool NVPTXDAGToDAGISel::tryConstantFP16(
SDNode *
N) {
530 if (
N->getValueType(0) != MVT::f16)
533 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N), MVT::f16);
563 return CmpMode::NotANumber;
597bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
598 unsigned PTXCmpMode =
599 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
602 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
610bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
615 if (
Vector.getSimpleValueType() != MVT::v2f16)
620 for (
auto *U :
Vector.getNode()->uses()) {
623 if (
U->getOperand(0) !=
Vector)
626 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
627 if (IdxConst->getZExtValue() == 0)
629 else if (IdxConst->getZExtValue() == 1)
641 unsigned Op = NVPTX::SplitF16x2;
646 Op = NVPTX::SplitI32toF16x2;
653 for (
auto *
Node : E0)
655 for (
auto *
Node : E1)
662 const Value *Src =
N->getMemOperand()->getValue();
667 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
668 switch (PT->getAddressSpace()) {
703 if (
N->isInvariant())
715 if (
auto *
A = dyn_cast<const Argument>(V))
716 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
717 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
718 return GV->isConstant();
723bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
724 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
728 case Intrinsic::nvvm_texsurf_handle_internal:
729 SelectTexSurfHandle(
N);
734void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
739 MVT::i64, GlobalVal));
742void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
747 assert(SrcAddrSpace != DstAddrSpace &&
748 "addrspacecast must be between different address spaces");
753 switch (SrcAddrSpace) {
756 Opc = TM.
is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
759 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
760 : NVPTX::cvta_shared_yes_64)
761 : NVPTX::cvta_shared_yes;
764 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
765 : NVPTX::cvta_const_yes_64)
766 : NVPTX::cvta_const_yes;
769 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
770 : NVPTX::cvta_local_yes_64)
771 : NVPTX::cvta_local_yes;
779 if (SrcAddrSpace != 0)
782 switch (DstAddrSpace) {
785 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_yes_64
786 : NVPTX::cvta_to_global_yes;
789 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
790 : NVPTX::cvta_to_shared_yes_64)
791 : NVPTX::cvta_to_shared_yes;
794 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
795 : NVPTX::cvta_to_const_yes_64)
796 : NVPTX::cvta_to_const_yes;
799 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
800 : NVPTX::cvta_to_local_yes_64)
801 : NVPTX::cvta_to_local_yes;
804 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
805 : NVPTX::nvvm_ptr_gen_to_param;
816static std::optional<unsigned>
818 unsigned Opcode_i16,
unsigned Opcode_i32,
819 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f16,
820 unsigned Opcode_f16x2,
unsigned Opcode_f32,
821 std::optional<unsigned> Opcode_f64) {
862bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
865 assert(
LD->readMem() &&
"Expected load");
867 EVT LoadedVT =
LD->getMemoryVT();
868 SDNode *NVPTXLD =
nullptr;
912 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
913 unsigned int fromType;
918 assert((LoadedVT == MVT::v2f16 || LoadedVT == MVT::v2bf16) &&
919 "Unexpected vector type");
934 std::optional<unsigned> Opcode;
937 if (SelectDirectAddr(N1,
Addr)) {
939 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
940 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
941 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
944 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
945 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
946 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
950 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
951 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
952 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
953 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
956 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
957 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
958 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
962 if (PointerSize == 64)
964 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
965 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
966 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
969 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
970 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
971 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
974 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
975 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
976 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
979 if (PointerSize == 64)
981 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
982 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
983 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
984 NVPTX::LD_f64_areg_64);
987 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
988 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
989 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
992 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
993 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
994 getI32Imm(fromTypeWidth, dl), N1, Chain };
1008bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1013 std::optional<unsigned> Opcode;
1025 return tryLDGLDU(
N);
1050 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1053 unsigned ExtensionType = cast<ConstantSDNode>(
1054 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1062 switch (
N->getOpcode()) {
1073 EVT EltVT =
N->getValueType(0);
1078 if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
1085 if (SelectDirectAddr(Op1,
Addr)) {
1086 switch (
N->getOpcode()) {
1091 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1092 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1093 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1094 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1098 NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar,
1099 NVPTX::LDV_i32_v4_avar, std::nullopt,
1100 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1101 NVPTX::LDV_f32_v4_avar, std::nullopt);
1106 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1107 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1108 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1110 }
else if (PointerSize == 64
1113 switch (
N->getOpcode()) {
1118 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1119 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1120 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1121 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1125 NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi,
1126 NVPTX::LDV_i32_v4_asi, std::nullopt,
1127 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1128 NVPTX::LDV_f32_v4_asi, std::nullopt);
1133 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1134 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1137 }
else if (PointerSize == 64
1140 if (PointerSize == 64) {
1141 switch (
N->getOpcode()) {
1147 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1148 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1149 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1150 NVPTX::LDV_f64_v2_ari_64);
1155 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1156 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1157 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1161 switch (
N->getOpcode()) {
1166 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1167 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1168 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1169 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1173 NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari,
1174 NVPTX::LDV_i32_v4_ari, std::nullopt,
1175 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1176 NVPTX::LDV_f32_v4_ari, std::nullopt);
1182 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1183 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1188 if (PointerSize == 64) {
1189 switch (
N->getOpcode()) {
1195 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1196 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1197 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1198 NVPTX::LDV_f64_v2_areg_64);
1203 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1204 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1205 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1209 switch (
N->getOpcode()) {
1215 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1216 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1217 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1218 NVPTX::LDV_f64_v2_areg);
1223 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt,
1224 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1225 NVPTX::LDV_f32_v4_areg, std::nullopt);
1231 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1232 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1233 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1244bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1254 Op1 =
N->getOperand(2);
1255 Mem = cast<MemIntrinsicSDNode>(
N);
1256 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
1260 case Intrinsic::nvvm_ldg_global_f:
1261 case Intrinsic::nvvm_ldg_global_i:
1262 case Intrinsic::nvvm_ldg_global_p:
1265 case Intrinsic::nvvm_ldu_global_f:
1266 case Intrinsic::nvvm_ldu_global_i:
1267 case Intrinsic::nvvm_ldu_global_p:
1272 Op1 =
N->getOperand(1);
1273 Mem = cast<MemSDNode>(
N);
1276 std::optional<unsigned> Opcode;
1282 unsigned NumElts = 1;
1287 if (EltVT == MVT::f16 &&
N->getValueType(0) == MVT::v2f16) {
1288 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1297 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1299 for (
unsigned i = 0; i != NumElts; ++i) {
1305 if (SelectDirectAddr(Op1,
Addr)) {
1306 switch (
N->getOpcode()) {
1313 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1314 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1315 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1316 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1317 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1318 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1319 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1320 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1323 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1324 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1325 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1326 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1327 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1328 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1329 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1330 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1335 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1336 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1337 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1338 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1339 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1340 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1341 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1342 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1346 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1347 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1348 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1349 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1350 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1351 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1352 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1353 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1359 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1360 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1361 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1362 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1363 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1368 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1369 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1370 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1371 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1372 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1382 switch (
N->getOpcode()) {
1389 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1390 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1391 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1392 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1393 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1394 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1395 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1396 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1399 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1400 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1401 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1402 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1403 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1404 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1405 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1406 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1411 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1412 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1413 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1414 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1415 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1416 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1417 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1418 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1422 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1423 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1424 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1425 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1426 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1427 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1428 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1429 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1435 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1436 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1437 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1438 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1439 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1444 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1445 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1446 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1447 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1448 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1452 switch (
N->getOpcode()) {
1459 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1460 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1461 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1462 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1463 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1464 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1465 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1466 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1469 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1470 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1471 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1472 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1473 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1474 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1475 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1476 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1481 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1482 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1483 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1484 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1485 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1486 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1487 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1488 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1492 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1493 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1494 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1495 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1496 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1497 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1498 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1499 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1505 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1506 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1507 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1508 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1509 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1514 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1515 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1516 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1517 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1518 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1528 switch (
N->getOpcode()) {
1535 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1536 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1537 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1538 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1539 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1540 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1541 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1542 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1545 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1546 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1547 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1548 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1549 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1550 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1551 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1552 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1557 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1558 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1559 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1560 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1561 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1562 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1563 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1564 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1568 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1569 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1570 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1571 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1572 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1573 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1574 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1575 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1581 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1582 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1583 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1584 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1585 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1590 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1591 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1592 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1593 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1594 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1598 switch (
N->getOpcode()) {
1605 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1606 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1607 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1608 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1609 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1610 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1611 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1612 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1615 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1616 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1617 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1618 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1619 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1620 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1621 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1622 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1627 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1628 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1629 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1630 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1631 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1632 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1633 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1634 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1638 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1639 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1640 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1641 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1642 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1643 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1644 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1645 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1651 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1652 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1653 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1654 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1655 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1660 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1661 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1662 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1663 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1664 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1670 SDValue Ops[] = { Op1, Chain };
1685 EVT OrigType =
N->getValueType(0);
1688 if (OrigType != EltVT && LdNode) {
1693 unsigned CvtOpc = GetConvertOpcode(OrigType.
getSimpleVT(),
1698 for (
unsigned i = 0; i != NumElts; ++i) {
1714bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1717 assert(
ST->writeMem() &&
"Expected store");
1720 assert((PlainStore || AtomicStore) &&
"Expected store");
1721 EVT StoreVT =
ST->getMemoryVT();
1722 SDNode *NVPTXST =
nullptr;
1725 if (PlainStore && PlainStore->
isIndexed())
1763 assert((StoreVT == MVT::v2f16 || StoreVT == MVT::v2bf16) &&
1764 "Unexpected vector type");
1777 std::optional<unsigned> Opcode;
1779 Value.getNode()->getSimpleValueType(0).SimpleTy;
1781 if (SelectDirectAddr(BasePtr,
Addr)) {
1782 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1783 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1784 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1785 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1789 getI32Imm(isVolatile, dl),
1790 getI32Imm(CodeAddrSpace, dl),
1791 getI32Imm(vecType, dl),
1792 getI32Imm(toType, dl),
1793 getI32Imm(toTypeWidth, dl),
1797 }
else if (PointerSize == 64
1800 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1801 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1802 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1803 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1807 getI32Imm(isVolatile, dl),
1808 getI32Imm(CodeAddrSpace, dl),
1809 getI32Imm(vecType, dl),
1810 getI32Imm(toType, dl),
1811 getI32Imm(toTypeWidth, dl),
1816 }
else if (PointerSize == 64
1819 if (PointerSize == 64)
1821 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1822 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1823 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1825 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1826 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1827 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1828 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1833 getI32Imm(isVolatile, dl),
1834 getI32Imm(CodeAddrSpace, dl),
1835 getI32Imm(vecType, dl),
1836 getI32Imm(toType, dl),
1837 getI32Imm(toTypeWidth, dl),
1843 if (PointerSize == 64)
1845 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1846 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1847 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1848 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1850 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1851 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1852 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1853 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1857 getI32Imm(isVolatile, dl),
1858 getI32Imm(CodeAddrSpace, dl),
1859 getI32Imm(vecType, dl),
1860 getI32Imm(toType, dl),
1861 getI32Imm(toTypeWidth, dl),
1876bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1880 std::optional<unsigned> Opcode;
1915 switch (
N->getOpcode()) {
1920 N2 =
N->getOperand(3);
1928 N2 =
N->getOperand(5);
1937 if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
1950 if (SelectDirectAddr(N2,
Addr)) {
1951 switch (
N->getOpcode()) {
1956 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1957 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1958 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1959 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1963 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1964 NVPTX::STV_i32_v4_avar, std::nullopt,
1965 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1966 NVPTX::STV_f32_v4_avar, std::nullopt);
1972 switch (
N->getOpcode()) {
1977 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1978 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1979 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1980 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1984 NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi,
1985 NVPTX::STV_i32_v4_asi, std::nullopt,
1986 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1987 NVPTX::STV_f32_v4_asi, std::nullopt);
1994 if (PointerSize == 64) {
1995 switch (
N->getOpcode()) {
2001 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2002 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2003 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2004 NVPTX::STV_f64_v2_ari_64);
2009 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
2010 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2011 NVPTX::STV_f32_v4_ari_64, std::nullopt);
2015 switch (
N->getOpcode()) {
2020 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2021 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2022 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2023 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2027 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
2028 NVPTX::STV_i32_v4_ari, std::nullopt,
2029 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2030 NVPTX::STV_f32_v4_ari, std::nullopt);
2037 if (PointerSize == 64) {
2038 switch (
N->getOpcode()) {
2044 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2045 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2046 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2047 NVPTX::STV_f64_v2_areg_64);
2052 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
2053 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2054 NVPTX::STV_f32_v4_areg_64, std::nullopt);
2058 switch (
N->getOpcode()) {
2064 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2065 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2066 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2067 NVPTX::STV_f64_v2_areg);
2072 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt,
2073 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2074 NVPTX::STV_f32_v4_areg, std::nullopt);
2095bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2103 switch (
Node->getOpcode()) {
2117 EVT EltVT =
Node->getValueType(0);
2120 std::optional<unsigned> Opcode;
2127 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2128 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2129 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2130 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2135 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2136 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2137 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2138 NVPTX::LoadParamMemV2F64);
2143 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt,
2144 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2145 NVPTX::LoadParamMemV4F32, std::nullopt);
2154 }
else if (VecSize == 2) {
2157 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2161 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2172bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2176 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2180 unsigned NumElts = 1;
2181 switch (
N->getOpcode()) {
2197 for (
unsigned i = 0; i < NumElts; ++i)
2205 std::optional<unsigned> Opcode = 0;
2211 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2212 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2213 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2214 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2218 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2219 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2220 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2221 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2225 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2226 NVPTX::StoreRetvalV4I32, std::nullopt,
2227 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2228 NVPTX::StoreRetvalV4F32, std::nullopt);
2242bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2246 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2248 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2250 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2253 unsigned NumElts = 1;
2254 switch (
N->getOpcode()) {
2272 for (
unsigned i = 0; i < NumElts; ++i)
2282 std::optional<unsigned> Opcode = 0;
2283 switch (
N->getOpcode()) {
2290 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2291 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2292 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2293 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2297 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2298 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2299 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2300 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2304 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2305 NVPTX::StoreParamV4I32, std::nullopt,
2306 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2307 NVPTX::StoreParamV4F32, std::nullopt);
2317 Opcode = NVPTX::StoreParamI32;
2321 MVT::i32, Ops[0], CvtNone);
2326 Opcode = NVPTX::StoreParamI32;
2330 MVT::i32, Ops[0], CvtNone);
2345bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2348 switch (
N->getOpcode()) {
2349 default:
return false;
2351 Opc = NVPTX::TEX_1D_F32_S32_RR;
2354 Opc = NVPTX::TEX_1D_F32_F32_RR;
2357 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2360 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2363 Opc = NVPTX::TEX_1D_S32_S32_RR;
2366 Opc = NVPTX::TEX_1D_S32_F32_RR;
2369 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2372 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2375 Opc = NVPTX::TEX_1D_U32_S32_RR;
2378 Opc = NVPTX::TEX_1D_U32_F32_RR;
2381 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2384 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2387 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2390 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2393 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2396 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2399 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2402 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2405 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2408 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2411 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2414 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2417 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2420 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2423 Opc = NVPTX::TEX_2D_F32_S32_RR;
2426 Opc = NVPTX::TEX_2D_F32_F32_RR;
2429 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2432 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2435 Opc = NVPTX::TEX_2D_S32_S32_RR;
2438 Opc = NVPTX::TEX_2D_S32_F32_RR;
2441 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2444 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2447 Opc = NVPTX::TEX_2D_U32_S32_RR;
2450 Opc = NVPTX::TEX_2D_U32_F32_RR;
2453 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2456 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2459 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2462 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2465 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2468 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2471 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2474 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2477 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2480 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2483 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2486 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2489 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2492 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2495 Opc = NVPTX::TEX_3D_F32_S32_RR;
2498 Opc = NVPTX::TEX_3D_F32_F32_RR;
2501 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2504 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2507 Opc = NVPTX::TEX_3D_S32_S32_RR;
2510 Opc = NVPTX::TEX_3D_S32_F32_RR;
2513 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2516 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2519 Opc = NVPTX::TEX_3D_U32_S32_RR;
2522 Opc = NVPTX::TEX_3D_U32_F32_RR;
2525 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2528 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2531 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2534 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2537 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2540 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2543 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2546 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2549 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2552 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2555 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2558 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2561 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2564 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2567 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2570 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2573 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2576 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2579 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2582 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2585 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2588 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2591 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2594 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2597 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2600 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2603 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2606 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2609 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2612 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2615 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2618 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2621 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2624 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2627 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2630 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2633 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2636 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2639 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2642 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2645 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2648 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2651 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2654 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2657 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2660 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2663 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2666 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2669 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2672 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2675 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2678 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2681 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2684 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2687 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2690 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2693 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2696 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2699 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2702 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2705 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2708 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2711 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2714 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2717 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2720 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2723 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2726 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2729 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2732 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2735 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2738 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2741 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2744 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2747 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2750 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2753 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2756 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2759 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2762 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2765 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2768 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2771 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2774 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2777 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2780 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2783 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2786 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2789 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2792 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2795 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2798 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2801 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2804 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2807 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2819 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2822 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2825 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2828 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2831 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2834 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2837 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2840 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2843 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2846 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2849 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2852 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2864bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2866 switch (
N->getOpcode()) {
2867 default:
return false;
2869 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2872 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2875 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2878 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2881 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2884 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2887 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2890 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2893 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2896 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2899 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2902 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2905 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2908 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2911 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2914 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2917 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2920 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2923 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2929 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2932 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2935 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2938 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2941 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2944 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2947 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2950 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2953 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2956 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2959 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2962 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2965 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2968 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2971 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2974 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2977 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2980 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2983 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2986 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2989 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2992 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2995 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2998 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
3001 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
3004 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3007 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3010 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3013 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3016 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3019 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3022 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3025 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3028 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3031 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3034 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3037 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3040 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3043 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3046 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3049 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3052 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3055 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3058 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3061 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3064 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3067 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3070 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3073 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3076 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3079 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3082 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3085 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3088 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3091 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3094 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3097 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3100 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3103 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3106 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3109 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3112 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3115 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3118 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3121 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3124 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3127 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3130 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3133 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3136 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3139 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3142 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3145 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3151 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3154 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3157 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3160 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3163 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3166 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3169 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3172 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3175 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3178 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3181 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3184 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3187 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3190 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3193 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3196 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3199 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3202 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3205 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3208 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3211 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3214 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3217 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3220 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3223 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3226 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3229 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3232 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3235 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3238 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3241 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3244 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3247 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3250 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3253 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3256 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3259 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3262 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3265 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3268 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3271 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3274 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3277 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3280 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3283 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3286 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3289 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3292 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3295 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3298 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3301 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3304 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3307 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3310 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3313 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3316 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3319 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3322 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3325 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3328 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3331 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3334 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3337 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3340 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3343 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3346 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3349 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3352 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3355 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3358 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3361 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3376bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3383 bool IsSigned =
false;
3388 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3413 Val =
LHS.getNode()->getOperand(0);
3414 Start =
LHS.getNode()->getOperand(1);
3420 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3421 if (NumBits > GoodBits) {
3455 if (isa<ConstantSDNode>(AndLHS)) {
3479 NumBits = NumZeros + NumOnes - ShiftAmt;
3485 if (ShiftAmt < NumZeros) {
3502 Val =
LHS->getOperand(0);
3521 if (OuterShiftAmt < InnerShiftAmt) {
3557 Opc = NVPTX::BFE_S32rii;
3559 Opc = NVPTX::BFE_U32rii;
3563 Opc = NVPTX::BFE_S64rii;
3565 Opc = NVPTX::BFE_U64rii;
3604bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3609 if (SelectDirectAddr(base,
Base)) {
3632bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3644 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3649 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3674bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3675 unsigned int spN)
const {
3676 const Value *Src =
nullptr;
3677 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3678 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3680 Src = mN->getMemOperand()->getValue();
3684 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3685 return (PT->getAddressSpace() == spN);
3692 const SDValue &Op,
unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3694 switch (ConstraintID) {
3698 if (SelectDirectAddr(Op, Op0)) {
3699 OutOps.push_back(Op0);
3703 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3704 OutOps.push_back(Op0);
3705 OutOps.push_back(Op1);
3715unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3725 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3727 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3729 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3736 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3738 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3740 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3747 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3749 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3751 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3758 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3760 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3762 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned int getCodeAddrSpace(MemSDNode *N)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static int getLdStRegType(EVT VT)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
bool useShortPointers() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
CodeGenOpt::Level OptLevel
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Level
Code generation optimization level.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad