19#include "llvm/IR/IntrinsicsNVPTX.h"
29#define DEBUG_TYPE "nvptx-isel"
30#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
34 cl::desc(
"Enable reciprocal sqrt optimization"));
64int NVPTXDAGToDAGISel::getDivF32Level()
const {
68bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
72bool NVPTXDAGToDAGISel::useF32FTZ()
const {
76bool NVPTXDAGToDAGISel::allowFMA()
const {
81bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
86bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
90void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
92 if (
N->isMachineOpcode()) {
97 switch (
N->getOpcode()) {
113 if (tryEXTRACT_VECTOR_ELEMENT(
N))
120 SelectSETP_BF16X2(
N);
124 if (tryLoadVector(
N))
134 if (tryStoreVector(
N))
146 if (tryStoreRetval(
N))
154 if (tryStoreParam(
N))
158 if (tryIntrinsicNoChain(
N))
162 if (tryIntrinsicChain(
N))
166 if (tryIntrinsicVoid(
N))
177 SelectAddrSpaceCast(
N);
180 if (
N->getOperand(1).getValueType() == MVT::i128) {
181 SelectV2I64toI128(
N);
187 if (
N->getOperand(1).getValueType() == MVT::i128) {
188 SelectI128toV2I64(
N);
199bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
200 unsigned IID =
N->getConstantOperandVal(1);
204 case Intrinsic::nvvm_ldu_global_f:
205 case Intrinsic::nvvm_ldu_global_i:
206 case Intrinsic::nvvm_ldu_global_p:
234 return CmpMode::NotANumber;
268bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
269 unsigned PTXCmpMode =
270 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
273 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
279bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
280 unsigned PTXCmpMode =
281 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
284 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
292bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
302 for (
auto *U :
Vector.getNode()->users()) {
305 if (
U->getOperand(0) !=
Vector)
308 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
309 if (IdxConst->getZExtValue() == 0)
311 else if (IdxConst->getZExtValue() == 1)
328 for (
auto *
Node : E0)
330 for (
auto *
Node : E1)
337 const Value *Src =
N->getMemOperand()->getValue();
342 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
343 switch (PT->getAddressSpace()) {
364struct OperationOrderings {
371static OperationOrderings
473 !HasMemoryOrdering) {
475 formatv(
"PTX does not support \"atomic\" for orderings different than"
476 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "
488 bool AddrGenericOrGlobalOrShared =
492 if (!AddrGenericOrGlobalOrShared)
495 bool UseRelaxedMMIO =
517 formatv(
"PTX only supports Acquire Ordering on reads: {}",
518 N->getOperationName()));
523 formatv(
"PTX only supports Release Ordering on writes: {}",
524 N->getOperationName()));
528 formatv(
"NVPTX does not support AcquireRelease Ordering on "
530 "yet and PTX does not support it on loads or stores: {}",
531 N->getOperationName()));
544 else if (
N->writeMem())
548 formatv(
"NVPTX does not support SequentiallyConsistent Ordering on "
549 "read-modify-writes yet: {}",
550 N->getOperationName()));
551 return OperationOrderings(InstrOrder,
556 formatv(
"NVPTX backend does not support AtomicOrdering \"{}\" yet.",
579 auto S = Scopes[
N->getSyncScopeID()];
618 if (
N->isInvariant())
630 if (
auto *
A = dyn_cast<const Argument>(V))
631 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
632 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
633 return GV->isConstant();
641 T->failIfClustersUnsupported(
".cluster scope fence");
649 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
650 : NVPTX::INT_MEMBAR_SYS;
652 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
653 : NVPTX::INT_MEMBAR_CTA;
655 return NVPTX::atomic_thread_fence_acq_rel_cluster;
657 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
658 : NVPTX::INT_MEMBAR_GL;
661 formatv(
"Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
669 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
670 : NVPTX::INT_MEMBAR_SYS;
672 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
673 : NVPTX::INT_MEMBAR_CTA;
675 return NVPTX::atomic_thread_fence_seq_cst_cluster;
677 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
678 : NVPTX::INT_MEMBAR_GL;
690 formatv(
"Unsupported \"{}\" ordering and \"{}\" scope for fence.",
691 OrderingToString(O), ScopeToString(S)));
699std::pair<NVPTX::Ordering, NVPTX::Scope>
700NVPTXDAGToDAGISel::insertMemoryInstructionFence(
SDLoc DL,
SDValue &Chain,
717 formatv(
"Unexpected fence ordering: \"{}\".",
723bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
724 unsigned IID =
N->getConstantOperandVal(0);
728 case Intrinsic::nvvm_texsurf_handle_internal:
729 SelectTexSurfHandle(
N);
734void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
739 MVT::i64, GlobalVal));
742void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
748 assert(SrcAddrSpace != DstAddrSpace &&
749 "addrspacecast must be between different address spaces");
763 switch (SrcAddrSpace) {
766 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
769 Opc = TM.
is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
772 Opc = TM.
is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
775 Opc = TM.
is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;
782 if (SrcAddrSpace != 0)
785 switch (DstAddrSpace) {
788 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
791 Opc = TM.
is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
794 Opc = TM.
is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
797 Opc = TM.
is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
800 Opc = TM.
is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
819static std::optional<unsigned>
821 unsigned Opcode_i16,
unsigned Opcode_i32,
822 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
823 std::optional<unsigned> Opcode_f64) {
866bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
868 assert(
LD->readMem() &&
"Expected load");
875 EVT LoadedVT =
LD->getMemoryVT();
900 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
907 "Unexpected vector type");
921 std::optional<unsigned> Opcode;
925 getI32Imm(CodeAddrSpace,
DL),
926 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
927 getI32Imm(FromTypeWidth,
DL)});
929 if (SelectDirectAddr(N1,
Addr)) {
930 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
931 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
932 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
935 Ops.append({
Addr, Chain});
938 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
939 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
940 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
946 if (PointerSize == 64)
949 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
950 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
952 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
953 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
954 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
959 if (PointerSize == 64)
962 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
963 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
965 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
966 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
967 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
970 Ops.append({N1, Chain});
993 return Isv2x16VT(EltVT) || EltVT == MVT::v4i8;
996bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1005 return tryLDGLDU(
N);
1012 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1025 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1028 unsigned ExtensionType = cast<ConstantSDNode>(
1029 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1037 switch (
N->getOpcode()) {
1048 EVT EltVT =
N->getValueType(0);
1058 std::optional<unsigned> Opcode;
1062 getI32Imm(CodeAddrSpace,
DL),
1063 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1064 getI32Imm(FromTypeWidth,
DL)});
1066 if (SelectDirectAddr(Op1,
Addr)) {
1067 switch (
N->getOpcode()) {
1072 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1073 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1074 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1079 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1080 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1085 Ops.append({
Addr, Chain});
1086 }
else if (PointerSize == 64
1089 switch (
N->getOpcode()) {
1094 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1095 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1096 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1101 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1102 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1108 }
else if (PointerSize == 64
1111 if (PointerSize == 64) {
1112 switch (
N->getOpcode()) {
1118 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1119 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1120 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1125 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1126 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1130 switch (
N->getOpcode()) {
1135 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1136 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1137 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1142 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1143 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1151 if (PointerSize == 64) {
1152 switch (
N->getOpcode()) {
1158 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1159 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1160 NVPTX::LDV_f64_v2_areg_64);
1165 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1166 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1170 switch (
N->getOpcode()) {
1176 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1177 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1178 NVPTX::LDV_f64_v2_areg);
1183 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1184 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1190 Ops.append({Op1, Chain});
1201bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1202 auto *Mem = cast<MemSDNode>(
N);
1208 EVT OrigType =
N->getValueType(0);
1209 EVT EltVT = Mem->getMemoryVT();
1210 unsigned NumElts = 1;
1216 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1217 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1218 (EltVT == MVT::i16 && OrigType == MVT::v2i16) ||
1219 (EltVT == MVT::i8 && OrigType == MVT::v4i8)) {
1221 "NumElts must be divisible by the number of elts in subvectors");
1230 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1232 for (
unsigned i = 0; i != NumElts; ++i) {
1239 std::optional<unsigned> Opcode;
1244 if (SelectDirectAddr(Op1,
Addr)) {
1245 switch (
N->getOpcode()) {
1251 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1252 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1253 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1258 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1259 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1260 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1264 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1265 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1266 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1267 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1268 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1269 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1273 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1274 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1275 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1276 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1277 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1278 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1283 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1284 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1285 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1290 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1291 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1292 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1302 switch (
N->getOpcode()) {
1307 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1308 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1309 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1310 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1311 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1312 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1316 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1317 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1318 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1319 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1320 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1321 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1325 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1326 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1327 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1328 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1329 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1330 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1334 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1335 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1336 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1337 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1338 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1339 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1344 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1345 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1346 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1351 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1352 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1353 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1357 switch (
N->getOpcode()) {
1363 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1364 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1365 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1370 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1371 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1372 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1376 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1377 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1378 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1379 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1380 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1381 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1385 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1386 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1387 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1388 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1389 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1390 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1395 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1396 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1397 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1402 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1403 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1404 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1414 switch (
N->getOpcode()) {
1419 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1420 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1421 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1422 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1423 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1424 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1428 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1429 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1430 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1431 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1432 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1433 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1437 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1438 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1439 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1440 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1441 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1442 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1446 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1447 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1448 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1449 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1450 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1451 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1456 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1457 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1458 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1463 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1464 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1465 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1469 switch (
N->getOpcode()) {
1474 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1475 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1476 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1477 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1478 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1479 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1483 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1484 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1485 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1486 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1487 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1488 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1492 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1493 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1494 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1495 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1496 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1497 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1501 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1502 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1503 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1504 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1505 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1506 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1511 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1512 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1513 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1518 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1519 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1520 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1526 SDValue Ops[] = { Op1, Chain };
1543 if (OrigType != EltVT &&
1553 for (
unsigned i = 0; i != NumElts; ++i) {
1569bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1571 assert(
ST->writeMem() &&
"Expected store");
1574 assert((PlainStore || AtomicStore) &&
"Expected store");
1577 if (PlainStore && PlainStore->
isIndexed())
1580 EVT StoreVT =
ST->getMemoryVT();
1603 "Unexpected vector type");
1615 std::optional<unsigned> Opcode;
1617 Value.getNode()->getSimpleValueType(0).SimpleTy;
1620 {
Value, getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1621 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1622 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1624 if (SelectDirectAddr(BasePtr,
Addr)) {
1625 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1626 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1627 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1630 Ops.append({
Addr, Chain});
1631 }
else if (PointerSize == 64
1634 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1635 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1636 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1640 }
else if (PointerSize == 64
1643 if (PointerSize == 64)
1646 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1647 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1649 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1650 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1651 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1656 if (PointerSize == 64)
1658 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1659 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1660 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1662 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1663 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1664 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1681bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1684 std::optional<unsigned> Opcode;
1701 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1714 switch (
N->getOpcode()) {
1717 Ops.
append({
N->getOperand(1),
N->getOperand(2)});
1718 N2 =
N->getOperand(3);
1722 Ops.
append({
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
1724 N2 =
N->getOperand(5);
1736 Ops.
append({getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1737 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1738 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1740 if (SelectDirectAddr(N2,
Addr)) {
1741 switch (
N->getOpcode()) {
1746 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1747 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1748 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1752 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1753 NVPTX::STV_i32_v4_avar, std::nullopt,
1754 NVPTX::STV_f32_v4_avar, std::nullopt);
1760 switch (
N->getOpcode()) {
1765 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1766 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1767 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1772 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1773 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1779 if (PointerSize == 64) {
1780 switch (
N->getOpcode()) {
1786 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1787 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1788 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1793 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1794 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1798 switch (
N->getOpcode()) {
1803 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1804 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1805 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1809 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1810 NVPTX::STV_i32_v4_ari, std::nullopt,
1811 NVPTX::STV_f32_v4_ari, std::nullopt);
1817 if (PointerSize == 64) {
1818 switch (
N->getOpcode()) {
1824 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1825 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1826 NVPTX::STV_f64_v2_areg_64);
1831 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1832 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1836 switch (
N->getOpcode()) {
1842 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1843 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1844 NVPTX::STV_f64_v2_areg);
1849 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
1850 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
1871bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
1879 switch (
Node->getOpcode()) {
1893 EVT EltVT =
Node->getValueType(0);
1896 std::optional<unsigned> Opcode;
1903 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
1904 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
1905 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
1910 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
1911 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
1912 NVPTX::LoadParamMemV2F64);
1917 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
1918 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
1927 }
else if (VecSize == 2) {
1930 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
1934 unsigned OffsetVal =
Offset->getAsZExtVal();
1943bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
1947 unsigned OffsetVal =
Offset->getAsZExtVal();
1951 unsigned NumElts = 1;
1952 switch (
N->getOpcode()) {
1968 for (
unsigned i = 0; i < NumElts; ++i)
1975 std::optional<unsigned> Opcode = 0;
1981 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
1982 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
1983 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
1984 if (Opcode == NVPTX::StoreRetvalI8) {
1988 switch (Ops[0].getSimpleValueType().SimpleTy) {
1992 Opcode = NVPTX::StoreRetvalI8TruncI32;
1995 Opcode = NVPTX::StoreRetvalI8TruncI64;
2002 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2003 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2004 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2008 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2009 NVPTX::StoreRetvalV4I32, std::nullopt,
2010 NVPTX::StoreRetvalV4F32, std::nullopt);
2025#define getOpcV2H(ty, opKind0, opKind1) \
2026 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2028#define getOpcV2H1(ty, opKind0, isImm1) \
2029 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2031#define getOpcodeForVectorStParamV2(ty, isimm) \
2032 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2034#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2035 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2037#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2038 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2039 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2041#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2042 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2043 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2045#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2046 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2047 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2049#define getOpcodeForVectorStParamV4(ty, isimm) \
2050 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2051 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2053#define getOpcodeForVectorStParam(n, ty, isimm) \
2054 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2055 : getOpcodeForVectorStParamV4(ty, isimm)
2064 for (
unsigned i = 0; i < NumElts; i++) {
2065 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2068 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2090 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2095 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2101 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2102 : NVPTX::StoreParamV4I8_rrrr;
2105 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2106 : NVPTX::StoreParamV4I16_rrrr;
2111 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2112 : NVPTX::StoreParamV4I32_rrrr;
2118bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2122 unsigned ParamVal =
Param->getAsZExtVal();
2124 unsigned OffsetVal =
Offset->getAsZExtVal();
2126 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2130 switch (
N->getOpcode()) {
2148 for (
unsigned i = 0; i < NumElts; ++i)
2156 std::optional<unsigned> Opcode;
2157 switch (
N->getOpcode()) {
2165 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2166 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2168 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2180 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2181 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2182 NVPTX::StoreParamF64_i);
2186 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2187 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2188 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2189 if (Opcode == NVPTX::StoreParamI8_r) {
2193 switch (Ops[0].getSimpleValueType().SimpleTy) {
2197 Opcode = NVPTX::StoreParamI8TruncI32_r;
2200 Opcode = NVPTX::StoreParamI8TruncI64_r;
2218 Opcode = NVPTX::StoreParamI32_r;
2222 MVT::i32, Ops[0], CvtNone);
2227 Opcode = NVPTX::StoreParamI32_r;
2231 MVT::i32, Ops[0], CvtNone);
2248bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
2255 bool IsSigned =
false;
2260 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
2285 Val =
LHS.getNode()->getOperand(0);
2286 Start =
LHS.getNode()->getOperand(1);
2292 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
2293 if (NumBits > GoodBits) {
2327 if (isa<ConstantSDNode>(AndLHS)) {
2351 NumBits = NumZeros + NumOnes - ShiftAmt;
2357 if (ShiftAmt < NumZeros) {
2374 Val =
LHS->getOperand(0);
2393 if (OuterShiftAmt < InnerShiftAmt) {
2429 Opc = NVPTX::BFE_S32rii;
2431 Opc = NVPTX::BFE_U32rii;
2435 Opc = NVPTX::BFE_S64rii;
2437 Opc = NVPTX::BFE_U64rii;
2476bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2481 if (SelectDirectAddr(base,
Base)) {
2504bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2516 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
2521 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
2529 if (!CN->getAPIntValue().isSignedIntN(32))
2533 SDLoc(OpNode), MVT::i32);
2552bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
2553 unsigned int spN)
const {
2554 const Value *Src =
nullptr;
2555 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
2556 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
2558 Src = mN->getMemOperand()->getValue();
2562 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
2563 return (PT->getAddressSpace() == spN);
2571 std::vector<SDValue> &OutOps) {
2573 switch (ConstraintID) {
2577 if (SelectDirectAddr(
Op, Op0)) {
2578 OutOps.push_back(Op0);
2582 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
2583 OutOps.push_back(Op0);
2584 OutOps.push_back(Op1);
2592void NVPTXDAGToDAGISel::SelectV2I64toI128(
SDNode *
N) {
2611 NewOps[0] =
N->getOperand(0);
2614 if (
N->getNumOperands() == 5)
2615 NewOps[3] =
N->getOperand(4);
2621void NVPTXDAGToDAGISel::SelectI128toV2I64(
SDNode *
N) {
2639 NVPTX::I128toV2I64,
DL,
2648unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
2659 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
2661 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
2663 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
2670 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
2672 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
2674 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
2681 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
2683 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
2685 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
2692 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
2694 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
2696 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
2703 return NVPTX::CVT_f32_f16;
2705 return NVPTX::CVT_f64_f16;
2710bool NVPTXDAGToDAGISel::tryFence(
SDNode *
N) {
2713 unsigned int FenceOp =
2715 Scopes[
N->getConstantOperandVal(2)],
Subtarget);
2733 "NVPTXScopes::operator[]");
2735 auto S = Scopes.find(
ID);
2736 if (S == Scopes.end()) {
2748#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, is_s32, suffix) \
2750 ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \
2751 : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix)
2753#define CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(op, dim, mode, is_ch, is_s32) \
2754 (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, _CH)) \
2755 : (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, )))
2757#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, \
2760 ? (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(RED, dim, mode, is_ch, is_s32)) \
2761 : (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(S2G, dim, mode, is_ch, \
2764#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32) \
2766 if (is_mc && is_ch) \
2767 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC_CH); \
2769 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _CH); \
2771 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC); \
2772 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, ); \
2775#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch) \
2776 (is_ch ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \
2777 : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode)
2780 bool IsCacheHint,
bool IsIm2Col,
2781 bool IsReduce =
false) {
2786 IsCacheHint, IsShared32);
2789 IsCacheHint, IsShared32);
2792 IsCacheHint, IsShared32);
2795 "GetCpAsyncBulkTensorS2GOpcode.");
2801 IsCacheHint, IsShared32);
2804 IsCacheHint, IsShared32);
2807 IsCacheHint, IsShared32);
2810 IsCacheHint, IsShared32);
2813 IsCacheHint, IsShared32);
2816 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorS2GOpcode.");
2823 bool IsCacheHint,
bool IsIm2Col) {
2828 IsCacheHint, IsShared32);
2831 IsCacheHint, IsShared32);
2834 IsCacheHint, IsShared32);
2837 "GetCpAsyncBulkTensorG2SOpcode.");
2843 IsCacheHint, IsShared32);
2846 IsCacheHint, IsShared32);
2849 IsCacheHint, IsShared32);
2852 IsCacheHint, IsShared32);
2855 IsCacheHint, IsShared32);
2858 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorG2SOpcode.");
2875 "GetCpAsyncBulkTensorPrefetchOpcode.");
2891 "GetCpAsyncBulkTensorPrefetchOpcode.");
2898 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
2899 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
2901 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
2902 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
2904 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
2905 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
2912void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(
SDNode *
N,
2920 size_t NumOps =
N->getNumOperands();
2924 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
2925 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2926 bool IsMultiCast =
N->getConstantOperandVal(NumOps - 2) == 1;
2927 size_t NumBaseArgs = NumDims + NumOffsets + 3;
2928 size_t MultiCastIdx = NumBaseArgs + 2;
2939 Ops.
push_back(
N->getOperand(MultiCastIdx + 1));
2947 NumDims, IsShared32, IsMultiCast, IsCacheHint, IsIm2Col);
2951void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(
SDNode *
N,
2957 size_t NumOps =
N->getNumOperands();
2958 size_t NumDims = NumOps - 6;
2959 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2960 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
2973void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon(
SDNode *
N,
2980 size_t NumOps =
N->getNumOperands();
2984 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
2985 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2986 size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1);
2997void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(
SDNode *
N,
3004 size_t NumOps =
N->getNumOperands();
3005 size_t NumDims = NumOps - 6;
3006 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3007 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
3017 NumDims, IsShared32, IsCacheHint, IsIm2Col,
true);
3021bool NVPTXDAGToDAGISel::tryIntrinsicVoid(
SDNode *
N) {
3022 unsigned IID =
N->getConstantOperandVal(1);
3024 auto CastTy = [](TMARedTy
Op) {
return static_cast<unsigned>(
Op); };
3028 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_1d:
3029 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_2d:
3030 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_3d:
3031 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_4d:
3032 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_5d:
3033 SelectCpAsyncBulkTensorS2GCommon(
N);
3035 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_3d:
3036 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_4d:
3037 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_5d:
3038 SelectCpAsyncBulkTensorS2GCommon(
N,
true);
3040 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
3041 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
3042 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
3043 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
3044 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d:
3045 SelectCpAsyncBulkTensorG2SCommon(
N);
3047 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
3048 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
3049 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
3050 SelectCpAsyncBulkTensorG2SCommon(
N,
true);
3052 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d:
3053 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d:
3054 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d:
3055 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d:
3056 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d:
3057 SelectCpAsyncBulkTensorPrefetchCommon(
N);
3059 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
3060 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
3061 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
3062 SelectCpAsyncBulkTensorPrefetchCommon(
N,
true);
3064 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:
3065 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:
3066 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:
3067 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d:
3068 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d:
3069 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD));
3071 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d:
3072 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d:
3073 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d:
3074 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD),
3077 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d:
3078 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d:
3079 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d:
3080 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d:
3081 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d:
3082 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN));
3084 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d:
3085 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d:
3086 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d:
3087 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN),
3090 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d:
3091 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d:
3092 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d:
3093 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d:
3094 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d:
3095 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX));
3097 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d:
3098 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d:
3099 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d:
3100 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX),
3103 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d:
3104 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d:
3105 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d:
3106 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d:
3107 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d:
3108 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC));
3110 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d:
3111 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d:
3112 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d:
3113 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC),
3116 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d:
3117 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d:
3118 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d:
3119 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d:
3120 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d:
3121 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC));
3123 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d:
3124 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d:
3125 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d:
3126 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC),
3129 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d:
3130 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d:
3131 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d:
3132 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d:
3133 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d:
3134 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND));
3136 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d:
3137 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d:
3138 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d:
3139 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND),
3142 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d:
3143 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d:
3144 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d:
3145 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d:
3146 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d:
3147 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR));
3149 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d:
3150 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d:
3151 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d:
3152 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR),
3155 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d:
3156 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d:
3157 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d:
3158 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d:
3159 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d:
3160 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR));
3162 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d:
3163 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d:
3164 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d:
3165 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR),
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static bool isVectorElementTypeUpsized(EVT EltVT)
static size_t GetDimsFromIntrinsic(unsigned IID)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch)
static unsigned GetCpAsyncBulkTensorPrefetchOpcode(size_t Dim, bool IsCacheHint, bool IsIm2Col)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32)
static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32, bool IsMultiCast, bool IsCacheHint, bool IsIm2Col)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, NVPTXSubtarget const *T)
static unsigned GetCpAsyncBulkTensorS2GOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col, bool IsReduce=false)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, is_s32)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Record instruction ordering so we can query their relative positions within a function.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
void failIfClustersUnsupported(std::string const &FailureMessage) const
const NVPTXTargetLowering * getTargetLowering() const override
bool hasRelaxedMMIO() const
bool hasMemoryOrdering() const
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
std::string ScopeToString(Scope S)
std::string OrderingToString(Ordering Order)
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
bool isKernelFunction(const Function &F)
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
NVPTX::Scope operator[](SyncScope::ID ID) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.