20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
47 doMulWide = (OptLevel > CodeGenOptLevel::None);
55int NVPTXDAGToDAGISel::getDivF32Level()
const {
59bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
63bool NVPTXDAGToDAGISel::useF32FTZ()
const {
67bool NVPTXDAGToDAGISel::allowFMA()
const {
72bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
77bool NVPTXDAGToDAGISel::useShortPointers()
const {
83void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
85 if (
N->isMachineOpcode()) {
90 switch (
N->getOpcode()) {
102 if (tryEXTRACT_VECTOR_ELEMENT(
N))
109 SelectSETP_BF16X2(
N);
113 if (tryLoadVector(
N))
125 if (tryStoreVector(
N))
137 if (tryStoreRetval(
N))
145 if (tryStoreParam(
N))
149 if (tryIntrinsicNoChain(
N))
153 if (tryIntrinsicChain(
N))
324 if (tryTextureIntrinsic(
N))
492 if (trySurfaceIntrinsic(
N))
503 SelectAddrSpaceCast(
N);
506 if (tryConstantFP(
N))
515bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
516 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
520 case Intrinsic::nvvm_ldg_global_f:
521 case Intrinsic::nvvm_ldg_global_i:
522 case Intrinsic::nvvm_ldg_global_p:
523 case Intrinsic::nvvm_ldu_global_f:
524 case Intrinsic::nvvm_ldu_global_i:
525 case Intrinsic::nvvm_ldu_global_p:
532bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
533 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
536 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
538 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
539 : NVPTX::LOAD_CONST_BF16),
540 SDLoc(
N),
N->getValueType(0), Val);
568 return CmpMode::NotANumber;
602bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
603 unsigned PTXCmpMode =
604 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
607 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
613bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
614 unsigned PTXCmpMode =
615 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
618 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
626bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
636 for (
auto *U :
Vector.getNode()->uses()) {
639 if (
U->getOperand(0) !=
Vector)
642 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
643 if (IdxConst->getZExtValue() == 0)
645 else if (IdxConst->getZExtValue() == 1)
662 for (
auto *
Node : E0)
664 for (
auto *
Node : E1)
671 const Value *Src =
N->getMemOperand()->getValue();
676 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
677 switch (PT->getAddressSpace()) {
712 if (
N->isInvariant())
724 if (
auto *
A = dyn_cast<const Argument>(V))
725 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
726 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
727 return GV->isConstant();
732bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
733 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
737 case Intrinsic::nvvm_texsurf_handle_internal:
738 SelectTexSurfHandle(
N);
743void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
748 MVT::i64, GlobalVal));
751void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
756 assert(SrcAddrSpace != DstAddrSpace &&
757 "addrspacecast must be between different address spaces");
762 switch (SrcAddrSpace) {
765 Opc = TM.
is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
768 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
769 : NVPTX::cvta_shared_yes_64)
770 : NVPTX::cvta_shared_yes;
773 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
774 : NVPTX::cvta_const_yes_64)
775 : NVPTX::cvta_const_yes;
778 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
779 : NVPTX::cvta_local_yes_64)
780 : NVPTX::cvta_local_yes;
788 if (SrcAddrSpace != 0)
791 switch (DstAddrSpace) {
794 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_yes_64
795 : NVPTX::cvta_to_global_yes;
798 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
799 : NVPTX::cvta_to_shared_yes_64)
800 : NVPTX::cvta_to_shared_yes;
803 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
804 : NVPTX::cvta_to_const_yes_64)
805 : NVPTX::cvta_to_const_yes;
808 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
809 : NVPTX::cvta_to_local_yes_64)
810 : NVPTX::cvta_to_local_yes;
813 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
814 : NVPTX::nvvm_ptr_gen_to_param;
825static std::optional<unsigned>
827 unsigned Opcode_i16,
unsigned Opcode_i32,
828 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
829 std::optional<unsigned> Opcode_f64) {
872bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
875 assert(
LD->readMem() &&
"Expected load");
877 EVT LoadedVT =
LD->getMemoryVT();
878 SDNode *NVPTXLD =
nullptr;
922 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
923 unsigned int fromType;
929 "Unexpected vector type");
944 std::optional<unsigned>
Opcode;
947 if (SelectDirectAddr(N1,
Addr)) {
949 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
950 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
953 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
954 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
955 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
960 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
961 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
964 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
965 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
966 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
970 if (PointerSize == 64)
973 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
974 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
977 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
978 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
981 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
982 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
983 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
986 if (PointerSize == 64)
989 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
990 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
993 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
994 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
997 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
998 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
999 getI32Imm(fromTypeWidth, dl), N1, Chain };
1013bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1018 std::optional<unsigned>
Opcode;
1030 return tryLDGLDU(
N);
1055 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1058 unsigned ExtensionType = cast<ConstantSDNode>(
1059 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1067 switch (
N->getOpcode()) {
1078 EVT EltVT =
N->getValueType(0);
1090 if (SelectDirectAddr(Op1,
Addr)) {
1091 switch (
N->getOpcode()) {
1096 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1097 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1098 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1103 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1104 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1109 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1110 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1111 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1113 }
else if (PointerSize == 64
1116 switch (
N->getOpcode()) {
1121 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1122 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1123 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1128 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1129 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1134 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1135 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1138 }
else if (PointerSize == 64
1141 if (PointerSize == 64) {
1142 switch (
N->getOpcode()) {
1148 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1149 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1150 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1155 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1156 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1160 switch (
N->getOpcode()) {
1165 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1166 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1167 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1172 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1173 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1179 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1180 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1185 if (PointerSize == 64) {
1186 switch (
N->getOpcode()) {
1192 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1193 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1194 NVPTX::LDV_f64_v2_areg_64);
1199 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1200 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1204 switch (
N->getOpcode()) {
1210 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1211 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1212 NVPTX::LDV_f64_v2_areg);
1217 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1218 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1224 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1225 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1226 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1237bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1247 Op1 =
N->getOperand(2);
1248 Mem = cast<MemIntrinsicSDNode>(
N);
1249 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
1253 case Intrinsic::nvvm_ldg_global_f:
1254 case Intrinsic::nvvm_ldg_global_i:
1255 case Intrinsic::nvvm_ldg_global_p:
1258 case Intrinsic::nvvm_ldu_global_f:
1259 case Intrinsic::nvvm_ldu_global_i:
1260 case Intrinsic::nvvm_ldu_global_p:
1265 Op1 =
N->getOperand(1);
1266 Mem = cast<MemSDNode>(
N);
1269 std::optional<unsigned>
Opcode;
1273 EVT OrigType =
N->getValueType(0);
1276 unsigned NumElts = 1;
1281 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1282 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1283 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1284 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1287 }
else if (OrigType == MVT::v4i8) {
1296 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1298 for (
unsigned i = 0; i != NumElts; ++i) {
1304 if (SelectDirectAddr(Op1,
Addr)) {
1305 switch (
N->getOpcode()) {
1312 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1313 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1314 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1315 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1316 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1317 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1320 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1321 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1322 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1323 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1324 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1325 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1330 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1331 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1332 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1333 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1334 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1335 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1339 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1340 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1341 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1342 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1343 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1344 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1350 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1351 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1352 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1357 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1358 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1359 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1369 switch (
N->getOpcode()) {
1376 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1377 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1378 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1379 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1380 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1381 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1384 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1385 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1386 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1387 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1388 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1389 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1394 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1395 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1396 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1397 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1398 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1399 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1403 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1404 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1405 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1406 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1407 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1408 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1414 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1415 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1416 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1421 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1422 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1423 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1427 switch (
N->getOpcode()) {
1434 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1435 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1436 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1437 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1438 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1439 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1442 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1443 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1444 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1445 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1446 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1447 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1452 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1453 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1454 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1455 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1456 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1457 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1461 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1462 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1463 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1464 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1465 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1466 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1472 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1473 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1474 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1479 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1480 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1481 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1491 switch (
N->getOpcode()) {
1498 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1499 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1500 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1501 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1502 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1503 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1506 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1507 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1508 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1509 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1510 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1511 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1516 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1517 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1518 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1519 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1520 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1521 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1525 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1526 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1527 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1528 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1529 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1530 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1536 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1537 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1538 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1543 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1544 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1545 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1549 switch (
N->getOpcode()) {
1556 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1557 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1558 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1559 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1560 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1561 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1564 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1565 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1566 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1567 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1568 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1569 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1574 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1575 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1576 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1577 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1578 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1579 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1583 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1584 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1585 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1586 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1587 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1588 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1594 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1595 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1596 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1601 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1602 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1603 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1609 SDValue Ops[] = { Op1, Chain };
1626 if (OrigType != EltVT &&
1636 for (
unsigned i = 0; i != NumElts; ++i) {
1652bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1655 assert(
ST->writeMem() &&
"Expected store");
1658 assert((PlainStore || AtomicStore) &&
"Expected store");
1659 EVT StoreVT =
ST->getMemoryVT();
1660 SDNode *NVPTXST =
nullptr;
1663 if (PlainStore && PlainStore->
isIndexed())
1702 "Unexpected vector type");
1715 std::optional<unsigned>
Opcode;
1717 Value.getNode()->getSimpleValueType(0).SimpleTy;
1719 if (SelectDirectAddr(BasePtr,
Addr)) {
1721 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1722 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1726 getI32Imm(isVolatile, dl),
1727 getI32Imm(CodeAddrSpace, dl),
1728 getI32Imm(vecType, dl),
1729 getI32Imm(toType, dl),
1730 getI32Imm(toTypeWidth, dl),
1734 }
else if (PointerSize == 64
1738 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1739 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1743 getI32Imm(isVolatile, dl),
1744 getI32Imm(CodeAddrSpace, dl),
1745 getI32Imm(vecType, dl),
1746 getI32Imm(toType, dl),
1747 getI32Imm(toTypeWidth, dl),
1752 }
else if (PointerSize == 64
1755 if (PointerSize == 64)
1758 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1759 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1762 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1763 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1768 getI32Imm(isVolatile, dl),
1769 getI32Imm(CodeAddrSpace, dl),
1770 getI32Imm(vecType, dl),
1771 getI32Imm(toType, dl),
1772 getI32Imm(toTypeWidth, dl),
1778 if (PointerSize == 64)
1780 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1781 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1782 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1785 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1786 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1790 getI32Imm(isVolatile, dl),
1791 getI32Imm(CodeAddrSpace, dl),
1792 getI32Imm(vecType, dl),
1793 getI32Imm(toType, dl),
1794 getI32Imm(toTypeWidth, dl),
1809bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1813 std::optional<unsigned>
Opcode;
1848 switch (
N->getOpcode()) {
1853 N2 =
N->getOperand(3);
1861 N2 =
N->getOperand(5);
1883 if (SelectDirectAddr(N2,
Addr)) {
1884 switch (
N->getOpcode()) {
1889 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1890 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1891 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1895 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1896 NVPTX::STV_i32_v4_avar, std::nullopt,
1897 NVPTX::STV_f32_v4_avar, std::nullopt);
1903 switch (
N->getOpcode()) {
1908 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1909 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1910 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1915 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1916 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1923 if (PointerSize == 64) {
1924 switch (
N->getOpcode()) {
1930 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1931 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1932 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1937 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1938 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1942 switch (
N->getOpcode()) {
1947 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1948 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1949 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1953 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1954 NVPTX::STV_i32_v4_ari, std::nullopt,
1955 NVPTX::STV_f32_v4_ari, std::nullopt);
1962 if (PointerSize == 64) {
1963 switch (
N->getOpcode()) {
1969 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1970 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1971 NVPTX::STV_f64_v2_areg_64);
1976 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1977 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1981 switch (
N->getOpcode()) {
1987 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1988 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1989 NVPTX::STV_f64_v2_areg);
1994 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
1995 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2016bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2024 switch (
Node->getOpcode()) {
2038 EVT EltVT =
Node->getValueType(0);
2041 std::optional<unsigned>
Opcode;
2048 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2049 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2050 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2055 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2056 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2057 NVPTX::LoadParamMemV2F64);
2062 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2063 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2072 }
else if (VecSize == 2) {
2075 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2079 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2090bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2094 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2098 unsigned NumElts = 1;
2099 switch (
N->getOpcode()) {
2115 for (
unsigned i = 0; i < NumElts; ++i)
2123 std::optional<unsigned>
Opcode = 0;
2129 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2130 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2131 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2135 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2136 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2137 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2141 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2142 NVPTX::StoreRetvalV4I32, std::nullopt,
2143 NVPTX::StoreRetvalV4F32, std::nullopt);
2157bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2161 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2163 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2165 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2168 unsigned NumElts = 1;
2169 switch (
N->getOpcode()) {
2187 for (
unsigned i = 0; i < NumElts; ++i)
2197 std::optional<unsigned>
Opcode = 0;
2198 switch (
N->getOpcode()) {
2205 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2206 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2207 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2211 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2212 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2213 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2217 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2218 NVPTX::StoreParamV4I32, std::nullopt,
2219 NVPTX::StoreParamV4F32, std::nullopt);
2229 Opcode = NVPTX::StoreParamI32;
2233 MVT::i32, Ops[0], CvtNone);
2238 Opcode = NVPTX::StoreParamI32;
2242 MVT::i32, Ops[0], CvtNone);
2257bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2260 switch (
N->getOpcode()) {
2261 default:
return false;
2263 Opc = NVPTX::TEX_1D_F32_S32_RR;
2266 Opc = NVPTX::TEX_1D_F32_F32_RR;
2269 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2272 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2275 Opc = NVPTX::TEX_1D_S32_S32_RR;
2278 Opc = NVPTX::TEX_1D_S32_F32_RR;
2281 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2284 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2287 Opc = NVPTX::TEX_1D_U32_S32_RR;
2290 Opc = NVPTX::TEX_1D_U32_F32_RR;
2293 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2296 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2299 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2302 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2305 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2308 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2311 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2314 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2317 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2320 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2323 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2326 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2329 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2332 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2335 Opc = NVPTX::TEX_2D_F32_S32_RR;
2338 Opc = NVPTX::TEX_2D_F32_F32_RR;
2341 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2344 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2347 Opc = NVPTX::TEX_2D_S32_S32_RR;
2350 Opc = NVPTX::TEX_2D_S32_F32_RR;
2353 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2356 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2359 Opc = NVPTX::TEX_2D_U32_S32_RR;
2362 Opc = NVPTX::TEX_2D_U32_F32_RR;
2365 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2368 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2371 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2374 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2377 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2380 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2383 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2386 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2389 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2392 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2395 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2398 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2401 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2404 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2407 Opc = NVPTX::TEX_3D_F32_S32_RR;
2410 Opc = NVPTX::TEX_3D_F32_F32_RR;
2413 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2416 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2419 Opc = NVPTX::TEX_3D_S32_S32_RR;
2422 Opc = NVPTX::TEX_3D_S32_F32_RR;
2425 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2428 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2431 Opc = NVPTX::TEX_3D_U32_S32_RR;
2434 Opc = NVPTX::TEX_3D_U32_F32_RR;
2437 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2440 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2443 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2446 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2449 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2452 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2455 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2458 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2461 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2464 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2467 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2470 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2473 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2476 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2479 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2482 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2485 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2488 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2491 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2494 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2497 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2500 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2503 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2506 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2509 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2512 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2515 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2518 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2521 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2524 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2527 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2530 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2533 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2536 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2539 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2542 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2545 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2548 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2551 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2554 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2557 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2560 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2563 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2566 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2569 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2572 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2575 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2578 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2581 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2584 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2587 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2590 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2593 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2596 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2599 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2602 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2605 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2608 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2611 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2614 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2617 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2620 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2623 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2626 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2629 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2632 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2635 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2638 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2641 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2644 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2647 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2650 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2653 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2656 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2659 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2662 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2665 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2668 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2671 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2674 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2677 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2680 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2683 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2686 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2689 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2692 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2695 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2698 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2701 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2704 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2707 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2710 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2713 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2716 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2719 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2722 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2725 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2728 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2731 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2734 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2737 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2740 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2743 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2746 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2749 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2752 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2755 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2758 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2761 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2764 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2776bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2778 switch (
N->getOpcode()) {
2779 default:
return false;
2781 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2784 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2787 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2790 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2793 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2796 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2799 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2802 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2805 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2808 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2811 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2814 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2817 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2820 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2823 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2826 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2829 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2832 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2835 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2838 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2841 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2844 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2847 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2850 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2853 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2856 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2859 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2862 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2865 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2868 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2871 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2874 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2877 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2880 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2883 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2886 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2889 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2892 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2895 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2898 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2901 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2904 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2907 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2910 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2913 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2916 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2919 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2922 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2925 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2928 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2931 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2934 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2937 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2940 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
2943 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
2946 Opc = NVPTX::SULD_1D_I8_TRAP_R;
2949 Opc = NVPTX::SULD_1D_I16_TRAP_R;
2952 Opc = NVPTX::SULD_1D_I32_TRAP_R;
2955 Opc = NVPTX::SULD_1D_I64_TRAP_R;
2958 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
2961 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
2964 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
2967 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
2970 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
2973 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
2976 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
2979 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
2982 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
2985 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
2988 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
2991 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
2994 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
2997 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3000 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3003 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3006 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3009 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3012 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3015 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3018 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3021 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3024 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3027 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3030 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3033 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3036 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3039 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3042 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3045 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3048 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3051 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3054 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3057 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3060 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3063 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3066 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3069 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3072 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3075 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3078 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3081 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3084 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3087 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3090 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3093 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3096 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3099 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3102 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3105 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3108 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3111 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3114 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3117 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3120 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3123 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3126 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3129 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3132 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3135 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3138 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3141 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3144 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3147 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3150 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3153 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3156 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3159 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3162 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3165 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3168 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3171 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3174 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3177 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3180 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3183 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3186 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3189 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3192 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3195 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3198 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3201 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3204 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3207 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3210 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3213 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3216 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3219 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3222 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3225 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3228 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3231 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3234 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3237 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3240 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3243 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3246 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3249 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3252 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3255 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3258 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3261 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3264 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3267 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3270 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3273 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3288bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3295 bool IsSigned =
false;
3300 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3325 Val =
LHS.getNode()->getOperand(0);
3326 Start =
LHS.getNode()->getOperand(1);
3332 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3333 if (NumBits > GoodBits) {
3367 if (isa<ConstantSDNode>(AndLHS)) {
3391 NumBits = NumZeros + NumOnes - ShiftAmt;
3397 if (ShiftAmt < NumZeros) {
3414 Val =
LHS->getOperand(0);
3433 if (OuterShiftAmt < InnerShiftAmt) {
3469 Opc = NVPTX::BFE_S32rii;
3471 Opc = NVPTX::BFE_U32rii;
3475 Opc = NVPTX::BFE_S64rii;
3477 Opc = NVPTX::BFE_U64rii;
3516bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3521 if (SelectDirectAddr(base,
Base)) {
3544bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3556 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3561 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3586bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3587 unsigned int spN)
const {
3588 const Value *Src =
nullptr;
3589 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3590 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3592 Src = mN->getMemOperand()->getValue();
3596 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3597 return (PT->getAddressSpace() == spN);
3605 std::vector<SDValue> &OutOps) {
3607 switch (ConstraintID) {
3611 if (SelectDirectAddr(
Op, Op0)) {
3612 OutOps.push_back(Op0);
3616 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3617 OutOps.push_back(Op0);
3618 OutOps.push_back(Op1);
3628unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3639 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3641 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3643 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3650 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3652 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3654 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3661 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3663 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3665 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3672 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3674 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3676 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3683 return NVPTX::CVT_f32_f16;
3685 return NVPTX::CVT_f64_f16;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr uint32_t Opcode
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
bool useShortPointers() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad