20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
64int NVPTXDAGToDAGISel::getDivF32Level()
const {
68bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
72bool NVPTXDAGToDAGISel::useF32FTZ()
const {
76bool NVPTXDAGToDAGISel::allowFMA()
const {
81bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
86bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
90void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
92 if (
N->isMachineOpcode()) {
97 switch (
N->getOpcode()) {
109 if (tryEXTRACT_VECTOR_ELEMENT(
N))
116 SelectSETP_BF16X2(
N);
120 if (tryLoadVector(
N))
132 if (tryStoreVector(
N))
144 if (tryStoreRetval(
N))
152 if (tryStoreParam(
N))
156 if (tryIntrinsicNoChain(
N))
160 if (tryIntrinsicChain(
N))
337 if (tryTextureIntrinsic(
N))
505 if (trySurfaceIntrinsic(
N))
516 SelectAddrSpaceCast(
N);
519 if (tryConstantFP(
N))
523 if (
N->getOperand(1).getValueType() == MVT::i128) {
524 SelectV2I64toI128(
N);
530 if (
N->getOperand(1).getValueType() == MVT::i128) {
531 SelectI128toV2I64(
N);
542bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
543 unsigned IID =
N->getConstantOperandVal(1);
547 case Intrinsic::nvvm_ldg_global_f:
548 case Intrinsic::nvvm_ldg_global_i:
549 case Intrinsic::nvvm_ldg_global_p:
550 case Intrinsic::nvvm_ldu_global_f:
551 case Intrinsic::nvvm_ldu_global_i:
552 case Intrinsic::nvvm_ldu_global_p:
559bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
560 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
563 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
565 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
566 : NVPTX::LOAD_CONST_BF16),
567 SDLoc(
N),
N->getValueType(0), Val);
595 return CmpMode::NotANumber;
629bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
630 unsigned PTXCmpMode =
631 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
634 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
640bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
641 unsigned PTXCmpMode =
642 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
645 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
653bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
663 for (
auto *U :
Vector.getNode()->uses()) {
666 if (
U->getOperand(0) !=
Vector)
669 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
670 if (IdxConst->getZExtValue() == 0)
672 else if (IdxConst->getZExtValue() == 1)
689 for (
auto *
Node : E0)
691 for (
auto *
Node : E1)
698 const Value *Src =
N->getMemOperand()->getValue();
703 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
704 switch (PT->getAddressSpace()) {
803 !HasMemoryOrdering) {
806 OS <<
"PTX does not support \"atomic\" for orderings different than"
807 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
819 bool AddrGenericOrGlobalOrShared =
823 bool UseRelaxedMMIO =
828 return N->isVolatile() && AddrGenericOrGlobalOrShared
847 OS <<
"PTX only supports Acquire Ordering on reads: "
848 <<
N->getOperationName();
855 if (!
N->writeMem()) {
858 OS <<
"PTX only supports Release Ordering on writes: "
859 <<
N->getOperationName();
868 OS <<
"PTX only supports AcquireRelease Ordering on read-modify-write: "
869 <<
N->getOperationName();
877 OS <<
"NVPTX backend does not support AtomicOrdering \""
907 if (
N->isInvariant())
919 if (
auto *
A = dyn_cast<const Argument>(V))
920 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
921 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
922 return GV->isConstant();
927bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
928 unsigned IID =
N->getConstantOperandVal(0);
932 case Intrinsic::nvvm_texsurf_handle_internal:
933 SelectTexSurfHandle(
N);
938void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
943 MVT::i64, GlobalVal));
946void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
951 assert(SrcAddrSpace != DstAddrSpace &&
952 "addrspacecast must be between different address spaces");
957 switch (SrcAddrSpace) {
960 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
964 ? NVPTX::cvta_shared_6432
965 : NVPTX::cvta_shared_64)
966 : NVPTX::cvta_shared;
970 ? NVPTX::cvta_const_6432
971 : NVPTX::cvta_const_64)
976 ? NVPTX::cvta_local_6432
977 : NVPTX::cvta_local_64)
986 if (SrcAddrSpace != 0)
989 switch (DstAddrSpace) {
992 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
996 ? NVPTX::cvta_to_shared_3264
997 : NVPTX::cvta_to_shared_64)
998 : NVPTX::cvta_to_shared;
1002 ? NVPTX::cvta_to_const_3264
1003 : NVPTX::cvta_to_const_64)
1004 : NVPTX::cvta_to_const;
1008 ? NVPTX::cvta_to_local_3264
1009 : NVPTX::cvta_to_local_64)
1010 : NVPTX::cvta_to_local;
1013 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
1014 : NVPTX::nvvm_ptr_gen_to_param;
1025static std::optional<unsigned>
1027 unsigned Opcode_i16,
unsigned Opcode_i32,
1028 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
1029 std::optional<unsigned> Opcode_f64) {
1053 return std::nullopt;
1072bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
1075 assert(
LD->readMem() &&
"Expected load");
1077 EVT LoadedVT =
LD->getMemoryVT();
1078 SDNode *NVPTXLD =
nullptr;
1081 if (PlainLoad && PlainLoad->
isIndexed())
1090 return tryLDGLDU(
N);
1108 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1109 unsigned int fromType;
1115 "Unexpected vector type");
1130 std::optional<unsigned> Opcode;
1133 if (SelectDirectAddr(N1,
Addr)) {
1134 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
1135 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
1136 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
1139 SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
1140 getI32Imm(CodeAddrSpace, dl),
1141 getI32Imm(vecType, dl),
1142 getI32Imm(fromType, dl),
1143 getI32Imm(fromTypeWidth, dl),
1149 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
1150 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
1151 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
1154 SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
1155 getI32Imm(CodeAddrSpace, dl),
1156 getI32Imm(vecType, dl),
1157 getI32Imm(fromType, dl),
1158 getI32Imm(fromTypeWidth, dl),
1165 if (PointerSize == 64)
1168 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
1169 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
1171 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
1172 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
1173 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
1176 SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
1177 getI32Imm(CodeAddrSpace, dl),
1178 getI32Imm(vecType, dl),
1179 getI32Imm(fromType, dl),
1180 getI32Imm(fromTypeWidth, dl),
1186 if (PointerSize == 64)
1188 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1189 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1190 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1192 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1193 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1194 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1197 SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
1198 getI32Imm(CodeAddrSpace, dl),
1199 getI32Imm(vecType, dl),
1200 getI32Imm(fromType, dl),
1201 getI32Imm(fromTypeWidth, dl),
1217bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1222 std::optional<unsigned> Opcode;
1234 return tryLDGLDU(
N);
1254 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1257 unsigned ExtensionType = cast<ConstantSDNode>(
1258 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1266 switch (
N->getOpcode()) {
1277 EVT EltVT =
N->getValueType(0);
1289 if (SelectDirectAddr(Op1,
Addr)) {
1290 switch (
N->getOpcode()) {
1295 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1296 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1297 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1302 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1303 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1308 SDValue Ops[] = {getI32Imm(CodeMemorySem,
DL),
1309 getI32Imm(CodeAddrSpace,
DL),
1310 getI32Imm(VecType,
DL),
1311 getI32Imm(FromType,
DL),
1312 getI32Imm(FromTypeWidth,
DL),
1316 }
else if (PointerSize == 64
1319 switch (
N->getOpcode()) {
1324 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1325 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1326 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1331 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1332 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1337 SDValue Ops[] = {getI32Imm(CodeMemorySem,
DL),
1338 getI32Imm(CodeAddrSpace,
DL),
1339 getI32Imm(VecType,
DL),
1340 getI32Imm(FromType,
DL),
1341 getI32Imm(FromTypeWidth,
DL),
1346 }
else if (PointerSize == 64
1349 if (PointerSize == 64) {
1350 switch (
N->getOpcode()) {
1356 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1357 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1358 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1363 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1364 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1368 switch (
N->getOpcode()) {
1373 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1374 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1375 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1380 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1381 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1387 SDValue Ops[] = {getI32Imm(CodeMemorySem,
DL),
1388 getI32Imm(CodeAddrSpace,
DL),
1389 getI32Imm(VecType,
DL),
1390 getI32Imm(FromType,
DL),
1391 getI32Imm(FromTypeWidth,
DL),
1398 if (PointerSize == 64) {
1399 switch (
N->getOpcode()) {
1405 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1406 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1407 NVPTX::LDV_f64_v2_areg_64);
1412 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1413 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1417 switch (
N->getOpcode()) {
1423 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1424 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1425 NVPTX::LDV_f64_v2_areg);
1430 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1431 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1437 SDValue Ops[] = {getI32Imm(CodeMemorySem,
DL),
1438 getI32Imm(CodeAddrSpace,
DL),
1439 getI32Imm(VecType,
DL),
1440 getI32Imm(FromType,
DL),
1441 getI32Imm(FromTypeWidth,
DL),
1454bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1464 Op1 =
N->getOperand(2);
1465 Mem = cast<MemIntrinsicSDNode>(
N);
1466 unsigned IID =
N->getConstantOperandVal(1);
1470 case Intrinsic::nvvm_ldg_global_f:
1471 case Intrinsic::nvvm_ldg_global_i:
1472 case Intrinsic::nvvm_ldg_global_p:
1475 case Intrinsic::nvvm_ldu_global_f:
1476 case Intrinsic::nvvm_ldu_global_i:
1477 case Intrinsic::nvvm_ldu_global_p:
1482 Op1 =
N->getOperand(1);
1483 Mem = cast<MemSDNode>(
N);
1486 std::optional<unsigned> Opcode;
1490 EVT OrigType =
N->getValueType(0);
1493 unsigned NumElts = 1;
1498 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1499 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1500 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1501 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1504 }
else if (OrigType == MVT::v4i8) {
1513 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1515 for (
unsigned i = 0; i != NumElts; ++i) {
1521 if (SelectDirectAddr(Op1,
Addr)) {
1522 switch (
N->getOpcode()) {
1529 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1530 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1531 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1532 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1533 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1534 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1537 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1538 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1539 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1540 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1541 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1542 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1547 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1548 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1549 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1550 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1551 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1552 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1556 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1557 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1558 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1559 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1560 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1561 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1567 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1568 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1569 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1574 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1575 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1576 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1586 switch (
N->getOpcode()) {
1593 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1594 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1595 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1596 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1597 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1598 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1601 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1602 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1603 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1604 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1605 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1606 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1611 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1612 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1613 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1614 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1615 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1616 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1620 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1621 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1622 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1623 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1624 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1625 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1631 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1632 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1633 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1638 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1639 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1640 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1644 switch (
N->getOpcode()) {
1651 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1652 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1653 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1654 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1655 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1656 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1659 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1660 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1661 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1662 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1663 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1664 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1669 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1670 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1671 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1672 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1673 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1674 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1678 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1679 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1680 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1681 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1682 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1683 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1689 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1690 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1691 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1696 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1697 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1698 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1708 switch (
N->getOpcode()) {
1715 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1716 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1717 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1718 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1719 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1720 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1723 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1724 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1725 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1726 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1727 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1728 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1733 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1734 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1735 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1736 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1737 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1738 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1742 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1743 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1744 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1745 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1746 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1747 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1753 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1754 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1755 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1760 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1761 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1762 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1766 switch (
N->getOpcode()) {
1773 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1774 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1775 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1776 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1777 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1778 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1781 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1782 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1783 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1784 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1785 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1786 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1791 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1792 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1793 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1794 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1795 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1796 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1800 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1801 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1802 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1803 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1804 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1805 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1811 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1812 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1813 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1818 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1819 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1820 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1826 SDValue Ops[] = { Op1, Chain };
1843 if (OrigType != EltVT &&
1853 for (
unsigned i = 0; i != NumElts; ++i) {
1869bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1872 assert(
ST->writeMem() &&
"Expected store");
1875 assert((PlainStore || AtomicStore) &&
"Expected store");
1876 EVT StoreVT =
ST->getMemoryVT();
1877 SDNode *NVPTXST =
nullptr;
1880 if (PlainStore && PlainStore->
isIndexed())
1905 "Unexpected vector type");
1918 std::optional<unsigned> Opcode;
1920 Value.getNode()->getSimpleValueType(0).SimpleTy;
1922 if (SelectDirectAddr(BasePtr,
Addr)) {
1923 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1924 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1925 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1929 getI32Imm(CodeMemorySem, dl),
1930 getI32Imm(CodeAddrSpace, dl),
1931 getI32Imm(vecType, dl),
1932 getI32Imm(toType, dl),
1933 getI32Imm(toTypeWidth, dl),
1937 }
else if (PointerSize == 64
1940 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1941 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1942 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1946 getI32Imm(CodeMemorySem, dl),
1947 getI32Imm(CodeAddrSpace, dl),
1948 getI32Imm(vecType, dl),
1949 getI32Imm(toType, dl),
1950 getI32Imm(toTypeWidth, dl),
1955 }
else if (PointerSize == 64
1958 if (PointerSize == 64)
1961 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1962 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1964 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1965 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1966 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1971 getI32Imm(CodeMemorySem, dl),
1972 getI32Imm(CodeAddrSpace, dl),
1973 getI32Imm(vecType, dl),
1974 getI32Imm(toType, dl),
1975 getI32Imm(toTypeWidth, dl),
1981 if (PointerSize == 64)
1983 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1984 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1985 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1987 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1988 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1989 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1993 getI32Imm(CodeMemorySem, dl),
1994 getI32Imm(CodeAddrSpace, dl),
1995 getI32Imm(vecType, dl),
1996 getI32Imm(toType, dl),
1997 getI32Imm(toTypeWidth, dl),
2012bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
2016 std::optional<unsigned> Opcode;
2046 switch (
N->getOpcode()) {
2051 N2 =
N->getOperand(3);
2059 N2 =
N->getOperand(5);
2081 if (SelectDirectAddr(N2,
Addr)) {
2082 switch (
N->getOpcode()) {
2087 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
2088 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
2089 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
2093 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
2094 NVPTX::STV_i32_v4_avar, std::nullopt,
2095 NVPTX::STV_f32_v4_avar, std::nullopt);
2101 switch (
N->getOpcode()) {
2106 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
2107 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
2108 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
2113 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
2114 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
2121 if (PointerSize == 64) {
2122 switch (
N->getOpcode()) {
2128 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
2129 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
2130 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
2135 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
2136 NVPTX::STV_f32_v4_ari_64, std::nullopt);
2140 switch (
N->getOpcode()) {
2145 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2146 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2147 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2151 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
2152 NVPTX::STV_i32_v4_ari, std::nullopt,
2153 NVPTX::STV_f32_v4_ari, std::nullopt);
2160 if (PointerSize == 64) {
2161 switch (
N->getOpcode()) {
2167 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2168 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2169 NVPTX::STV_f64_v2_areg_64);
2174 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
2175 NVPTX::STV_f32_v4_areg_64, std::nullopt);
2179 switch (
N->getOpcode()) {
2185 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2186 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2187 NVPTX::STV_f64_v2_areg);
2192 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2193 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2214bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2222 switch (
Node->getOpcode()) {
2236 EVT EltVT =
Node->getValueType(0);
2239 std::optional<unsigned> Opcode;
2246 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2247 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2248 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2253 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2254 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2255 NVPTX::LoadParamMemV2F64);
2260 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2261 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2270 }
else if (VecSize == 2) {
2273 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2277 unsigned OffsetVal =
Offset->getAsZExtVal();
2288bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2292 unsigned OffsetVal =
Offset->getAsZExtVal();
2296 unsigned NumElts = 1;
2297 switch (
N->getOpcode()) {
2313 for (
unsigned i = 0; i < NumElts; ++i)
2321 std::optional<unsigned> Opcode = 0;
2327 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2328 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2329 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2330 if (Opcode == NVPTX::StoreRetvalI8) {
2334 switch (Ops[0].getSimpleValueType().SimpleTy) {
2338 Opcode = NVPTX::StoreRetvalI8TruncI32;
2341 Opcode = NVPTX::StoreRetvalI8TruncI64;
2348 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2349 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2350 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2354 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2355 NVPTX::StoreRetvalV4I32, std::nullopt,
2356 NVPTX::StoreRetvalV4F32, std::nullopt);
2371#define getOpcV2H(ty, opKind0, opKind1) \
2372 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2374#define getOpcV2H1(ty, opKind0, isImm1) \
2375 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2377#define getOpcodeForVectorStParamV2(ty, isimm) \
2378 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2380#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2381 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2383#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2384 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2385 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2387#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2388 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2389 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2391#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2392 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2393 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2395#define getOpcodeForVectorStParamV4(ty, isimm) \
2396 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2397 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2399#define getOpcodeForVectorStParam(n, ty, isimm) \
2400 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2401 : getOpcodeForVectorStParamV4(ty, isimm)
2410 for (
unsigned i = 0; i < NumElts; i++) {
2411 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2414 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2436 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2441 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2447 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2448 : NVPTX::StoreParamV4I8_rrrr;
2451 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2452 : NVPTX::StoreParamV4I16_rrrr;
2457 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2458 : NVPTX::StoreParamV4I32_rrrr;
2464bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2468 unsigned ParamVal =
Param->getAsZExtVal();
2470 unsigned OffsetVal =
Offset->getAsZExtVal();
2472 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2476 switch (
N->getOpcode()) {
2494 for (
unsigned i = 0; i < NumElts; ++i)
2504 std::optional<unsigned> Opcode;
2505 switch (
N->getOpcode()) {
2513 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2514 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2516 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2528 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2529 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2530 NVPTX::StoreParamF64_i);
2534 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2535 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2536 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2537 if (Opcode == NVPTX::StoreParamI8_r) {
2541 switch (Ops[0].getSimpleValueType().SimpleTy) {
2545 Opcode = NVPTX::StoreParamI8TruncI32_r;
2548 Opcode = NVPTX::StoreParamI8TruncI64_r;
2566 Opcode = NVPTX::StoreParamI32_r;
2570 MVT::i32, Ops[0], CvtNone);
2575 Opcode = NVPTX::StoreParamI32_r;
2579 MVT::i32, Ops[0], CvtNone);
2594bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2597 switch (
N->getOpcode()) {
2598 default:
return false;
2600 Opc = NVPTX::TEX_1D_F32_S32_RR;
2603 Opc = NVPTX::TEX_1D_F32_F32_RR;
2606 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2609 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2612 Opc = NVPTX::TEX_1D_S32_S32_RR;
2615 Opc = NVPTX::TEX_1D_S32_F32_RR;
2618 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2621 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2624 Opc = NVPTX::TEX_1D_U32_S32_RR;
2627 Opc = NVPTX::TEX_1D_U32_F32_RR;
2630 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2633 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2636 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2639 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2642 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2645 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2648 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2651 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2654 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2657 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2660 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2663 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2666 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2669 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2672 Opc = NVPTX::TEX_2D_F32_S32_RR;
2675 Opc = NVPTX::TEX_2D_F32_F32_RR;
2678 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2681 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2684 Opc = NVPTX::TEX_2D_S32_S32_RR;
2687 Opc = NVPTX::TEX_2D_S32_F32_RR;
2690 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2693 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2696 Opc = NVPTX::TEX_2D_U32_S32_RR;
2699 Opc = NVPTX::TEX_2D_U32_F32_RR;
2702 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2705 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2708 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2711 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2714 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2717 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2720 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2723 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2726 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2729 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2732 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2735 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2738 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2741 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2744 Opc = NVPTX::TEX_3D_F32_S32_RR;
2747 Opc = NVPTX::TEX_3D_F32_F32_RR;
2750 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2753 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2756 Opc = NVPTX::TEX_3D_S32_S32_RR;
2759 Opc = NVPTX::TEX_3D_S32_F32_RR;
2762 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2765 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2768 Opc = NVPTX::TEX_3D_U32_S32_RR;
2771 Opc = NVPTX::TEX_3D_U32_F32_RR;
2774 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2777 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2780 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2783 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2786 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2789 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2792 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2795 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2798 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2801 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2804 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2807 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2810 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2813 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2816 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2819 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2822 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2825 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2828 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2831 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2834 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2837 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2840 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2843 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2846 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2849 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2852 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2855 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2858 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2861 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2864 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2867 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2870 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2873 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2876 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2879 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2882 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2885 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2888 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2891 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2894 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2897 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2900 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2903 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2906 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2909 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2912 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2915 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2918 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2921 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2924 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2927 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2930 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2933 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2936 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2939 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2942 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2945 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2948 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2951 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2954 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2957 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2960 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2963 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2966 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2969 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2972 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2975 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2978 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2981 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2984 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2987 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2990 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2993 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2996 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2999 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
3002 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
3005 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
3008 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
3011 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
3014 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
3017 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
3020 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
3023 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
3026 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
3029 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
3032 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
3035 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
3038 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
3041 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
3044 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
3047 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
3050 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
3053 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
3056 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
3059 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
3062 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
3065 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
3068 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
3071 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
3074 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
3077 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
3080 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
3083 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
3086 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
3089 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
3092 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
3095 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
3098 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
3101 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
3104 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
3107 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
3110 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
3113 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
3116 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
3119 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
3131bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
3133 switch (
N->getOpcode()) {
3134 default:
return false;
3136 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
3139 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
3142 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
3145 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
3148 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
3151 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
3154 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
3157 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
3160 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
3163 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
3166 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
3169 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
3172 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
3175 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
3178 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
3181 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
3184 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
3187 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
3190 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
3193 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
3196 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
3199 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
3202 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
3205 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
3208 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
3211 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
3214 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
3217 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
3220 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
3223 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
3226 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
3229 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
3232 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
3235 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
3238 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
3241 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
3244 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
3247 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
3250 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
3253 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
3256 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
3259 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
3262 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
3265 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
3268 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
3271 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3274 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3277 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3280 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3283 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3286 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3289 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3292 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3295 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3298 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3301 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3304 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3307 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3310 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3313 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3316 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3319 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3322 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3325 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3328 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3331 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3334 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3337 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3340 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3343 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3346 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3349 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3352 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3355 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3358 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3361 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3364 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3367 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3370 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3373 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3376 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3379 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3382 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3385 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3388 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3391 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3394 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3397 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3400 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3403 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3406 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3409 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3412 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3415 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3418 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3421 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3424 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3427 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3430 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3433 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3436 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3439 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3442 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3445 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3448 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3451 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3454 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3457 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3460 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3463 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3466 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3469 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3472 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3475 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3478 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3481 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3484 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3487 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3490 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3493 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3496 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3499 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3502 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3505 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3508 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3511 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3514 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3517 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3520 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3523 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3526 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3529 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3532 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3535 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3538 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3541 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3544 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3547 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3550 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3553 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3556 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3559 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3562 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3565 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3568 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3571 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3574 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3577 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3580 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3583 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3586 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3589 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3592 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3595 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3598 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3601 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3604 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3607 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3610 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3613 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3616 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3619 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3622 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3625 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3628 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3643bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3650 bool IsSigned =
false;
3655 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3680 Val =
LHS.getNode()->getOperand(0);
3681 Start =
LHS.getNode()->getOperand(1);
3687 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3688 if (NumBits > GoodBits) {
3722 if (isa<ConstantSDNode>(AndLHS)) {
3746 NumBits = NumZeros + NumOnes - ShiftAmt;
3752 if (ShiftAmt < NumZeros) {
3769 Val =
LHS->getOperand(0);
3788 if (OuterShiftAmt < InnerShiftAmt) {
3824 Opc = NVPTX::BFE_S32rii;
3826 Opc = NVPTX::BFE_U32rii;
3830 Opc = NVPTX::BFE_S64rii;
3832 Opc = NVPTX::BFE_U64rii;
3871bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3876 if (SelectDirectAddr(base,
Base)) {
3899bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3911 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3916 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3924 if (!CN->getAPIntValue().isSignedIntN(32))
3947bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3948 unsigned int spN)
const {
3949 const Value *Src =
nullptr;
3950 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3951 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3953 Src = mN->getMemOperand()->getValue();
3957 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3958 return (PT->getAddressSpace() == spN);
3966 std::vector<SDValue> &OutOps) {
3968 switch (ConstraintID) {
3972 if (SelectDirectAddr(
Op, Op0)) {
3973 OutOps.push_back(Op0);
3977 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3978 OutOps.push_back(Op0);
3979 OutOps.push_back(Op1);
3987void NVPTXDAGToDAGISel::SelectV2I64toI128(
SDNode *
N) {
4006 NewOps[0] =
N->getOperand(0);
4009 if (
N->getNumOperands() == 5)
4010 NewOps[3] =
N->getOperand(4);
4016void NVPTXDAGToDAGISel::SelectI128toV2I64(
SDNode *
N) {
4034 NVPTX::I128toV2I64,
DL,
4043unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
4054 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
4056 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
4058 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
4065 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
4067 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
4069 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
4076 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
4078 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
4080 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
4087 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
4089 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
4091 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
4098 return NVPTX::CVT_f32_f16;
4100 return NVPTX::CVT_f64_f16;
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
static unsigned int getCodeMemorySemantic(MemSDNode *N, const NVPTXSubtarget *Subtarget)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool hasRelaxedMMIO() const
bool hasMemoryOrdering() const
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnifiedCubeArrayU32FloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnifiedCubeFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloatGrad
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexUnifiedCubeS32FloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnifiedCubeArrayS32FloatGrad
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ TexUnifiedCubeU32FloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isKernelFunction(const Function &F)
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.