20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
64int NVPTXDAGToDAGISel::getDivF32Level()
const {
68bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
72bool NVPTXDAGToDAGISel::useF32FTZ()
const {
76bool NVPTXDAGToDAGISel::allowFMA()
const {
81bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
86bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
90void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
92 if (
N->isMachineOpcode()) {
97 switch (
N->getOpcode()) {
109 if (tryEXTRACT_VECTOR_ELEMENT(
N))
116 SelectSETP_BF16X2(
N);
120 if (tryLoadVector(
N))
132 if (tryStoreVector(
N))
144 if (tryStoreRetval(
N))
152 if (tryStoreParam(
N))
156 if (tryIntrinsicNoChain(
N))
160 if (tryIntrinsicChain(
N))
337 if (tryTextureIntrinsic(
N))
505 if (trySurfaceIntrinsic(
N))
516 SelectAddrSpaceCast(
N);
519 if (tryConstantFP(
N))
523 if (
N->getOperand(1).getValueType() == MVT::i128) {
524 SelectV2I64toI128(
N);
530 if (
N->getOperand(1).getValueType() == MVT::i128) {
531 SelectI128toV2I64(
N);
542bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
543 unsigned IID =
N->getConstantOperandVal(1);
547 case Intrinsic::nvvm_ldg_global_f:
548 case Intrinsic::nvvm_ldg_global_i:
549 case Intrinsic::nvvm_ldg_global_p:
550 case Intrinsic::nvvm_ldu_global_f:
551 case Intrinsic::nvvm_ldu_global_i:
552 case Intrinsic::nvvm_ldu_global_p:
559bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
560 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
563 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
565 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
566 : NVPTX::LOAD_CONST_BF16),
567 SDLoc(
N),
N->getValueType(0), Val);
595 return CmpMode::NotANumber;
629bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
630 unsigned PTXCmpMode =
631 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
634 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
640bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
641 unsigned PTXCmpMode =
642 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
645 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
653bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
663 for (
auto *U :
Vector.getNode()->uses()) {
666 if (
U->getOperand(0) !=
Vector)
669 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
670 if (IdxConst->getZExtValue() == 0)
672 else if (IdxConst->getZExtValue() == 1)
689 for (
auto *
Node : E0)
691 for (
auto *
Node : E1)
698 const Value *Src =
N->getMemOperand()->getValue();
703 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
704 switch (PT->getAddressSpace()) {
739 if (
N->isInvariant())
751 if (
auto *
A = dyn_cast<const Argument>(V))
752 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
753 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
754 return GV->isConstant();
759bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
760 unsigned IID =
N->getConstantOperandVal(0);
764 case Intrinsic::nvvm_texsurf_handle_internal:
765 SelectTexSurfHandle(
N);
770void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
775 MVT::i64, GlobalVal));
778void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
783 assert(SrcAddrSpace != DstAddrSpace &&
784 "addrspacecast must be between different address spaces");
789 switch (SrcAddrSpace) {
792 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
796 ? NVPTX::cvta_shared_6432
797 : NVPTX::cvta_shared_64)
798 : NVPTX::cvta_shared;
802 ? NVPTX::cvta_const_6432
803 : NVPTX::cvta_const_64)
808 ? NVPTX::cvta_local_6432
809 : NVPTX::cvta_local_64)
818 if (SrcAddrSpace != 0)
821 switch (DstAddrSpace) {
824 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
828 ? NVPTX::cvta_to_shared_3264
829 : NVPTX::cvta_to_shared_64)
830 : NVPTX::cvta_to_shared;
834 ? NVPTX::cvta_to_const_3264
835 : NVPTX::cvta_to_const_64)
836 : NVPTX::cvta_to_const;
840 ? NVPTX::cvta_to_local_3264
841 : NVPTX::cvta_to_local_64)
842 : NVPTX::cvta_to_local;
845 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
846 : NVPTX::nvvm_ptr_gen_to_param;
857static std::optional<unsigned>
859 unsigned Opcode_i16,
unsigned Opcode_i32,
860 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
861 std::optional<unsigned> Opcode_f64) {
904bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
907 assert(
LD->readMem() &&
"Expected load");
909 EVT LoadedVT =
LD->getMemoryVT();
910 SDNode *NVPTXLD =
nullptr;
954 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
955 unsigned int fromType;
961 "Unexpected vector type");
976 std::optional<unsigned> Opcode;
979 if (SelectDirectAddr(N1,
Addr)) {
980 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
981 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
982 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
985 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
986 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
987 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
991 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
992 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
993 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
996 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
997 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
998 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
1002 if (PointerSize == 64)
1005 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
1006 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
1008 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
1009 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
1010 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
1013 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1014 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1015 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
1018 if (PointerSize == 64)
1020 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1021 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1022 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1024 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1025 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1026 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1029 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1030 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1031 getI32Imm(fromTypeWidth, dl), N1, Chain };
1045bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1050 std::optional<unsigned> Opcode;
1062 return tryLDGLDU(
N);
1087 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1090 unsigned ExtensionType = cast<ConstantSDNode>(
1091 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1099 switch (
N->getOpcode()) {
1110 EVT EltVT =
N->getValueType(0);
1122 if (SelectDirectAddr(Op1,
Addr)) {
1123 switch (
N->getOpcode()) {
1128 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1129 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1130 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1135 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1136 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1141 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1142 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1143 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1145 }
else if (PointerSize == 64
1148 switch (
N->getOpcode()) {
1153 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1154 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1155 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1160 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1161 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1166 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1167 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1170 }
else if (PointerSize == 64
1173 if (PointerSize == 64) {
1174 switch (
N->getOpcode()) {
1180 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1181 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1182 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1187 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1188 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1192 switch (
N->getOpcode()) {
1197 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1198 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1199 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1204 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1205 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1211 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1212 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1217 if (PointerSize == 64) {
1218 switch (
N->getOpcode()) {
1224 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1225 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1226 NVPTX::LDV_f64_v2_areg_64);
1231 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1232 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1236 switch (
N->getOpcode()) {
1242 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1243 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1244 NVPTX::LDV_f64_v2_areg);
1249 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1250 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1256 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1257 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1258 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1269bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1279 Op1 =
N->getOperand(2);
1280 Mem = cast<MemIntrinsicSDNode>(
N);
1281 unsigned IID =
N->getConstantOperandVal(1);
1285 case Intrinsic::nvvm_ldg_global_f:
1286 case Intrinsic::nvvm_ldg_global_i:
1287 case Intrinsic::nvvm_ldg_global_p:
1290 case Intrinsic::nvvm_ldu_global_f:
1291 case Intrinsic::nvvm_ldu_global_i:
1292 case Intrinsic::nvvm_ldu_global_p:
1297 Op1 =
N->getOperand(1);
1298 Mem = cast<MemSDNode>(
N);
1301 std::optional<unsigned> Opcode;
1305 EVT OrigType =
N->getValueType(0);
1308 unsigned NumElts = 1;
1313 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1314 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1315 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1316 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1319 }
else if (OrigType == MVT::v4i8) {
1328 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1330 for (
unsigned i = 0; i != NumElts; ++i) {
1336 if (SelectDirectAddr(Op1,
Addr)) {
1337 switch (
N->getOpcode()) {
1344 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1345 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1346 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1347 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1348 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1349 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1352 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1353 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1354 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1355 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1356 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1357 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1362 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1363 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1364 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1365 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1366 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1367 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1371 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1372 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1373 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1374 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1375 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1376 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1382 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1383 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1384 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1389 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1390 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1391 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1401 switch (
N->getOpcode()) {
1408 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1409 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1410 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1411 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1412 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1413 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1416 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1417 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1418 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1419 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1420 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1421 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1426 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1427 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1429 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1430 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1431 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1435 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1436 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1437 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1438 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1439 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1440 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1446 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1447 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1448 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1453 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1454 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1455 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1459 switch (
N->getOpcode()) {
1466 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1467 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1468 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1469 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1470 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1471 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1474 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1475 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1476 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1477 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1478 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1479 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1484 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1485 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1487 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1488 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1489 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1493 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1494 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1495 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1496 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1497 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1498 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1504 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1505 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1506 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1511 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1512 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1513 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1523 switch (
N->getOpcode()) {
1530 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1531 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1532 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1533 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1534 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1535 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1538 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1539 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1540 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1541 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1542 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1543 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1548 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1549 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1551 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1552 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1553 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1557 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1558 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1559 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1560 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1561 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1562 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1568 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1569 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1570 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1575 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1576 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1577 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1581 switch (
N->getOpcode()) {
1588 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1589 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1590 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1591 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1592 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1593 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1596 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1597 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1598 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1599 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1600 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1601 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1606 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1607 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1609 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1610 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1611 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1615 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1616 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1617 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1618 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1619 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1620 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1626 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1627 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1628 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1633 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1634 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1635 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1641 SDValue Ops[] = { Op1, Chain };
1658 if (OrigType != EltVT &&
1668 for (
unsigned i = 0; i != NumElts; ++i) {
1684bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1687 assert(
ST->writeMem() &&
"Expected store");
1690 assert((PlainStore || AtomicStore) &&
"Expected store");
1691 EVT StoreVT =
ST->getMemoryVT();
1692 SDNode *NVPTXST =
nullptr;
1695 if (PlainStore && PlainStore->
isIndexed())
1734 "Unexpected vector type");
1747 std::optional<unsigned> Opcode;
1749 Value.getNode()->getSimpleValueType(0).SimpleTy;
1751 if (SelectDirectAddr(BasePtr,
Addr)) {
1752 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1753 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1754 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1758 getI32Imm(isVolatile, dl),
1759 getI32Imm(CodeAddrSpace, dl),
1760 getI32Imm(vecType, dl),
1761 getI32Imm(toType, dl),
1762 getI32Imm(toTypeWidth, dl),
1766 }
else if (PointerSize == 64
1769 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1770 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1771 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1775 getI32Imm(isVolatile, dl),
1776 getI32Imm(CodeAddrSpace, dl),
1777 getI32Imm(vecType, dl),
1778 getI32Imm(toType, dl),
1779 getI32Imm(toTypeWidth, dl),
1784 }
else if (PointerSize == 64
1787 if (PointerSize == 64)
1790 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1791 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1793 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1794 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1795 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1800 getI32Imm(isVolatile, dl),
1801 getI32Imm(CodeAddrSpace, dl),
1802 getI32Imm(vecType, dl),
1803 getI32Imm(toType, dl),
1804 getI32Imm(toTypeWidth, dl),
1810 if (PointerSize == 64)
1812 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1813 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1814 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1816 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1817 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1818 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1822 getI32Imm(isVolatile, dl),
1823 getI32Imm(CodeAddrSpace, dl),
1824 getI32Imm(vecType, dl),
1825 getI32Imm(toType, dl),
1826 getI32Imm(toTypeWidth, dl),
1841bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1845 std::optional<unsigned> Opcode;
1880 switch (
N->getOpcode()) {
1885 N2 =
N->getOperand(3);
1893 N2 =
N->getOperand(5);
1915 if (SelectDirectAddr(N2,
Addr)) {
1916 switch (
N->getOpcode()) {
1921 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1922 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1923 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1927 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1928 NVPTX::STV_i32_v4_avar, std::nullopt,
1929 NVPTX::STV_f32_v4_avar, std::nullopt);
1935 switch (
N->getOpcode()) {
1940 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1941 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1942 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1947 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1948 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1955 if (PointerSize == 64) {
1956 switch (
N->getOpcode()) {
1962 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1963 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1964 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1969 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1970 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1974 switch (
N->getOpcode()) {
1979 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1980 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1981 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1985 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1986 NVPTX::STV_i32_v4_ari, std::nullopt,
1987 NVPTX::STV_f32_v4_ari, std::nullopt);
1994 if (PointerSize == 64) {
1995 switch (
N->getOpcode()) {
2001 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2002 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2003 NVPTX::STV_f64_v2_areg_64);
2008 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
2009 NVPTX::STV_f32_v4_areg_64, std::nullopt);
2013 switch (
N->getOpcode()) {
2019 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2020 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2021 NVPTX::STV_f64_v2_areg);
2026 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2027 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2048bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2056 switch (
Node->getOpcode()) {
2070 EVT EltVT =
Node->getValueType(0);
2073 std::optional<unsigned> Opcode;
2080 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2081 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2082 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2087 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2088 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2089 NVPTX::LoadParamMemV2F64);
2094 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2095 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2104 }
else if (VecSize == 2) {
2107 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2111 unsigned OffsetVal =
Offset->getAsZExtVal();
2122bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2126 unsigned OffsetVal =
Offset->getAsZExtVal();
2130 unsigned NumElts = 1;
2131 switch (
N->getOpcode()) {
2147 for (
unsigned i = 0; i < NumElts; ++i)
2155 std::optional<unsigned> Opcode = 0;
2161 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2162 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2163 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2164 if (Opcode == NVPTX::StoreRetvalI8) {
2168 switch (Ops[0].getSimpleValueType().SimpleTy) {
2172 Opcode = NVPTX::StoreRetvalI8TruncI32;
2175 Opcode = NVPTX::StoreRetvalI8TruncI64;
2182 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2183 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2184 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2188 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2189 NVPTX::StoreRetvalV4I32, std::nullopt,
2190 NVPTX::StoreRetvalV4F32, std::nullopt);
2205#define getOpcV2H(ty, opKind0, opKind1) \
2206 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2208#define getOpcV2H1(ty, opKind0, isImm1) \
2209 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2211#define getOpcodeForVectorStParamV2(ty, isimm) \
2212 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2214#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2215 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2217#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2218 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2219 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2221#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2222 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2223 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2225#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2226 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2227 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2229#define getOpcodeForVectorStParamV4(ty, isimm) \
2230 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2231 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2233#define getOpcodeForVectorStParam(n, ty, isimm) \
2234 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2235 : getOpcodeForVectorStParamV4(ty, isimm)
2244 for (
unsigned i = 0; i < NumElts; i++) {
2245 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2248 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2270 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2275 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2281 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2282 : NVPTX::StoreParamV4I8_rrrr;
2285 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2286 : NVPTX::StoreParamV4I16_rrrr;
2291 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2292 : NVPTX::StoreParamV4I32_rrrr;
2298bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2302 unsigned ParamVal =
Param->getAsZExtVal();
2304 unsigned OffsetVal =
Offset->getAsZExtVal();
2306 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2310 switch (
N->getOpcode()) {
2328 for (
unsigned i = 0; i < NumElts; ++i)
2338 std::optional<unsigned> Opcode;
2339 switch (
N->getOpcode()) {
2347 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2348 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2350 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2362 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2363 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2364 NVPTX::StoreParamF64_i);
2368 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2369 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2370 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2371 if (Opcode == NVPTX::StoreParamI8_r) {
2375 switch (Ops[0].getSimpleValueType().SimpleTy) {
2379 Opcode = NVPTX::StoreParamI8TruncI32_r;
2382 Opcode = NVPTX::StoreParamI8TruncI64_r;
2400 Opcode = NVPTX::StoreParamI32_r;
2404 MVT::i32, Ops[0], CvtNone);
2409 Opcode = NVPTX::StoreParamI32_r;
2413 MVT::i32, Ops[0], CvtNone);
2428bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2431 switch (
N->getOpcode()) {
2432 default:
return false;
2434 Opc = NVPTX::TEX_1D_F32_S32_RR;
2437 Opc = NVPTX::TEX_1D_F32_F32_RR;
2440 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2443 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2446 Opc = NVPTX::TEX_1D_S32_S32_RR;
2449 Opc = NVPTX::TEX_1D_S32_F32_RR;
2452 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2455 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2458 Opc = NVPTX::TEX_1D_U32_S32_RR;
2461 Opc = NVPTX::TEX_1D_U32_F32_RR;
2464 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2467 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2470 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2473 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2476 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2479 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2482 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2485 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2488 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2491 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2494 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2497 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2500 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2503 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2506 Opc = NVPTX::TEX_2D_F32_S32_RR;
2509 Opc = NVPTX::TEX_2D_F32_F32_RR;
2512 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2515 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2518 Opc = NVPTX::TEX_2D_S32_S32_RR;
2521 Opc = NVPTX::TEX_2D_S32_F32_RR;
2524 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2527 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2530 Opc = NVPTX::TEX_2D_U32_S32_RR;
2533 Opc = NVPTX::TEX_2D_U32_F32_RR;
2536 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2539 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2542 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2545 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2548 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2551 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2554 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2557 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2560 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2563 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2566 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2569 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2572 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2575 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2578 Opc = NVPTX::TEX_3D_F32_S32_RR;
2581 Opc = NVPTX::TEX_3D_F32_F32_RR;
2584 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2587 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2590 Opc = NVPTX::TEX_3D_S32_S32_RR;
2593 Opc = NVPTX::TEX_3D_S32_F32_RR;
2596 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2599 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2602 Opc = NVPTX::TEX_3D_U32_S32_RR;
2605 Opc = NVPTX::TEX_3D_U32_F32_RR;
2608 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2611 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2614 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2617 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2620 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2623 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2626 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2629 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2632 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2635 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2638 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2641 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2644 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2647 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2650 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2653 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2656 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2659 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2662 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2665 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2668 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2671 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2674 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2677 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2680 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2683 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2686 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2689 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2692 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2695 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2698 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2701 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2704 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2707 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2710 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2713 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2716 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2719 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2722 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2725 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2728 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2731 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2734 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2737 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2740 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2743 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2746 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2749 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2752 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2755 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2758 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2761 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2764 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2767 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2770 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2773 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2776 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2779 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2782 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2785 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2788 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2791 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2794 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2797 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2800 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2803 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2806 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2809 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2812 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2815 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2818 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2821 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2824 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2827 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2830 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2833 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2836 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2839 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2842 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2845 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2848 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2851 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2854 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2857 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2860 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2863 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2866 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2869 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2872 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2875 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2878 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2881 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2884 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2887 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2890 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2893 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2896 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2899 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2902 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2905 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2908 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2911 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2914 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2917 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2920 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2923 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2926 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2929 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2932 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2935 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2938 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2941 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2944 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2947 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2950 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2953 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2965bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2967 switch (
N->getOpcode()) {
2968 default:
return false;
2970 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2973 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2976 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2979 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2982 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2985 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2988 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2991 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2994 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2997 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
3000 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
3003 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
3006 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
3009 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
3012 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
3015 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
3021 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
3024 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
3027 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
3030 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
3033 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
3036 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
3039 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
3042 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
3045 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
3048 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
3051 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
3054 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
3057 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
3060 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
3063 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
3066 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
3069 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
3072 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
3075 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
3078 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
3081 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
3084 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
3087 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
3090 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
3093 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
3096 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
3099 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
3102 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
3105 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3108 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3111 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3114 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3117 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3120 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3123 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3126 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3129 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3132 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3135 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3138 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3141 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3144 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3147 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3150 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3153 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3156 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3159 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3162 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3165 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3168 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3171 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3174 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3177 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3180 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3183 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3186 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3189 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3192 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3195 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3198 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3201 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3204 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3207 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3210 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3213 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3216 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3219 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3222 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3225 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3228 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3231 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3234 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3237 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3240 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3243 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3246 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3249 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3252 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3255 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3258 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3261 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3264 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3267 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3270 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3273 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3276 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3279 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3282 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3285 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3288 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3291 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3294 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3297 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3300 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3303 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3306 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3309 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3312 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3315 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3318 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3321 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3324 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3327 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3330 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3333 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3336 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3339 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3342 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3345 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3348 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3351 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3354 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3357 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3360 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3363 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3366 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3369 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3372 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3375 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3378 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3381 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3384 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3387 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3390 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3393 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3396 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3399 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3402 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3405 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3408 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3411 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3414 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3417 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3420 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3423 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3426 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3429 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3432 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3435 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3438 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3441 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3444 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3447 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3450 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3453 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3456 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3459 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3462 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3477bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3484 bool IsSigned =
false;
3489 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3514 Val =
LHS.getNode()->getOperand(0);
3515 Start =
LHS.getNode()->getOperand(1);
3521 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3522 if (NumBits > GoodBits) {
3556 if (isa<ConstantSDNode>(AndLHS)) {
3580 NumBits = NumZeros + NumOnes - ShiftAmt;
3586 if (ShiftAmt < NumZeros) {
3603 Val =
LHS->getOperand(0);
3622 if (OuterShiftAmt < InnerShiftAmt) {
3658 Opc = NVPTX::BFE_S32rii;
3660 Opc = NVPTX::BFE_U32rii;
3664 Opc = NVPTX::BFE_S64rii;
3666 Opc = NVPTX::BFE_U64rii;
3705bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3710 if (SelectDirectAddr(base,
Base)) {
3733bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3745 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3750 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3775bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3776 unsigned int spN)
const {
3777 const Value *Src =
nullptr;
3778 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3779 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3781 Src = mN->getMemOperand()->getValue();
3785 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3786 return (PT->getAddressSpace() == spN);
3794 std::vector<SDValue> &OutOps) {
3796 switch (ConstraintID) {
3800 if (SelectDirectAddr(
Op, Op0)) {
3801 OutOps.push_back(Op0);
3805 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3806 OutOps.push_back(Op0);
3807 OutOps.push_back(Op1);
3815void NVPTXDAGToDAGISel::SelectV2I64toI128(
SDNode *
N) {
3834 NewOps[0] =
N->getOperand(0);
3837 if (
N->getNumOperands() == 5)
3838 NewOps[3] =
N->getOperand(4);
3844void NVPTXDAGToDAGISel::SelectI128toV2I64(
SDNode *
N) {
3862 NVPTX::I128toV2I64,
DL,
3871unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3882 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3884 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3886 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3893 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3895 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3897 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3904 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3906 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3908 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3915 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3917 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3919 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3926 return NVPTX::CVT_f32_f16;
3928 return NVPTX::CVT_f64_f16;
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnifiedCubeArrayU32FloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnifiedCubeFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloatGrad
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexUnifiedCubeS32FloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnifiedCubeArrayS32FloatGrad
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ TexUnifiedCubeU32FloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.