20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
65int NVPTXDAGToDAGISel::getDivF32Level()
const {
69bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
73bool NVPTXDAGToDAGISel::useF32FTZ()
const {
77bool NVPTXDAGToDAGISel::allowFMA()
const {
82bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
87bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
91void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
93 if (
N->isMachineOpcode()) {
98 switch (
N->getOpcode()) {
114 if (tryEXTRACT_VECTOR_ELEMENT(
N))
121 SelectSETP_BF16X2(
N);
125 if (tryLoadVector(
N))
135 if (tryStoreVector(
N))
147 if (tryStoreRetval(
N))
155 if (tryStoreParam(
N))
159 if (tryIntrinsicNoChain(
N))
163 if (tryIntrinsicChain(
N))
167 if (tryIntrinsicVoid(
N))
178 SelectAddrSpaceCast(
N);
181 if (
N->getOperand(1).getValueType() == MVT::i128) {
182 SelectV2I64toI128(
N);
188 if (
N->getOperand(1).getValueType() == MVT::i128) {
189 SelectI128toV2I64(
N);
200bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
201 unsigned IID =
N->getConstantOperandVal(1);
205 case Intrinsic::nvvm_ldu_global_f:
206 case Intrinsic::nvvm_ldu_global_i:
207 case Intrinsic::nvvm_ldu_global_p:
235 return CmpMode::NotANumber;
269bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
270 unsigned PTXCmpMode =
271 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
274 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
280bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
281 unsigned PTXCmpMode =
282 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
285 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
293bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
303 for (
auto *U :
Vector.getNode()->users()) {
306 if (
U->getOperand(0) !=
Vector)
309 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
310 if (IdxConst->getZExtValue() == 0)
312 else if (IdxConst->getZExtValue() == 1)
329 for (
auto *
Node : E0)
331 for (
auto *
Node : E1)
338 const Value *Src =
N->getMemOperand()->getValue();
343 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
344 switch (PT->getAddressSpace()) {
365struct OperationOrderings {
372static OperationOrderings
474 !HasMemoryOrdering) {
476 formatv(
"PTX does not support \"atomic\" for orderings different than"
477 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "
489 bool AddrGenericOrGlobalOrShared =
493 if (!AddrGenericOrGlobalOrShared)
496 bool UseRelaxedMMIO =
518 formatv(
"PTX only supports Acquire Ordering on reads: {}",
519 N->getOperationName()));
524 formatv(
"PTX only supports Release Ordering on writes: {}",
525 N->getOperationName()));
529 formatv(
"NVPTX does not support AcquireRelease Ordering on "
531 "yet and PTX does not support it on loads or stores: {}",
532 N->getOperationName()));
545 else if (
N->writeMem())
549 formatv(
"NVPTX does not support SequentiallyConsistent Ordering on "
550 "read-modify-writes yet: {}",
551 N->getOperationName()));
552 return OperationOrderings(InstrOrder,
557 formatv(
"NVPTX backend does not support AtomicOrdering \"{}\" yet.",
580 auto S = Scopes[
N->getSyncScopeID()];
619 if (
N->isInvariant())
631 if (
auto *
A = dyn_cast<const Argument>(V))
632 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
633 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
634 return GV->isConstant();
642 T->failIfClustersUnsupported(
".cluster scope fence");
650 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
651 : NVPTX::INT_MEMBAR_SYS;
653 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
654 : NVPTX::INT_MEMBAR_CTA;
656 return NVPTX::atomic_thread_fence_acq_rel_cluster;
658 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
659 : NVPTX::INT_MEMBAR_GL;
662 formatv(
"Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
670 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
671 : NVPTX::INT_MEMBAR_SYS;
673 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
674 : NVPTX::INT_MEMBAR_CTA;
676 return NVPTX::atomic_thread_fence_seq_cst_cluster;
678 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
679 : NVPTX::INT_MEMBAR_GL;
691 formatv(
"Unsupported \"{}\" ordering and \"{}\" scope for fence.",
692 OrderingToString(O), ScopeToString(S)));
700std::pair<NVPTX::Ordering, NVPTX::Scope>
701NVPTXDAGToDAGISel::insertMemoryInstructionFence(
SDLoc DL,
SDValue &Chain,
718 formatv(
"Unexpected fence ordering: \"{}\".",
724bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
725 unsigned IID =
N->getConstantOperandVal(0);
729 case Intrinsic::nvvm_texsurf_handle_internal:
730 SelectTexSurfHandle(
N);
735void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
740 MVT::i64, GlobalVal));
743void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
749 assert(SrcAddrSpace != DstAddrSpace &&
750 "addrspacecast must be between different address spaces");
764 switch (SrcAddrSpace) {
767 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
770 Opc = TM.
is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
773 Opc = TM.
is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
776 Opc = TM.
is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;
783 if (SrcAddrSpace != 0)
786 switch (DstAddrSpace) {
789 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
792 Opc = TM.
is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
795 Opc = TM.
is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
798 Opc = TM.
is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
801 Opc = TM.
is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
820static std::optional<unsigned>
822 unsigned Opcode_i16,
unsigned Opcode_i32,
823 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
824 std::optional<unsigned> Opcode_f64) {
867bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
869 assert(
LD->readMem() &&
"Expected load");
876 EVT LoadedVT =
LD->getMemoryVT();
901 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
908 "Unexpected vector type");
922 std::optional<unsigned> Opcode;
926 getI32Imm(CodeAddrSpace,
DL),
927 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
928 getI32Imm(FromTypeWidth,
DL)});
930 if (SelectDirectAddr(N1,
Addr)) {
931 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
932 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
933 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
936 Ops.append({
Addr, Chain});
939 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
940 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
941 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
947 if (PointerSize == 64)
950 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
951 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
953 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
954 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
955 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
960 if (PointerSize == 64)
963 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
964 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
966 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
967 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
968 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
971 Ops.append({N1, Chain});
994 return Isv2x16VT(EltVT) || EltVT == MVT::v4i8;
997bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1006 return tryLDGLDU(
N);
1013 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1026 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1029 unsigned ExtensionType = cast<ConstantSDNode>(
1030 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1038 switch (
N->getOpcode()) {
1049 EVT EltVT =
N->getValueType(0);
1059 std::optional<unsigned> Opcode;
1063 getI32Imm(CodeAddrSpace,
DL),
1064 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1065 getI32Imm(FromTypeWidth,
DL)});
1067 if (SelectDirectAddr(Op1,
Addr)) {
1068 switch (
N->getOpcode()) {
1073 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1074 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1075 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1080 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1081 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1086 Ops.append({
Addr, Chain});
1087 }
else if (PointerSize == 64
1090 switch (
N->getOpcode()) {
1095 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1096 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1097 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1102 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1103 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1109 }
else if (PointerSize == 64
1112 if (PointerSize == 64) {
1113 switch (
N->getOpcode()) {
1119 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1120 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1121 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1126 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1127 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1131 switch (
N->getOpcode()) {
1136 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1137 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1138 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1143 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1144 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1152 if (PointerSize == 64) {
1153 switch (
N->getOpcode()) {
1159 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1160 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1161 NVPTX::LDV_f64_v2_areg_64);
1166 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1167 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1171 switch (
N->getOpcode()) {
1177 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1178 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1179 NVPTX::LDV_f64_v2_areg);
1184 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1185 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1191 Ops.append({Op1, Chain});
1202bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1203 auto *Mem = cast<MemSDNode>(
N);
1209 EVT OrigType =
N->getValueType(0);
1210 EVT EltVT = Mem->getMemoryVT();
1211 unsigned NumElts = 1;
1217 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1218 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1219 (EltVT == MVT::i16 && OrigType == MVT::v2i16) ||
1220 (EltVT == MVT::i8 && OrigType == MVT::v4i8)) {
1222 "NumElts must be divisible by the number of elts in subvectors");
1231 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1233 for (
unsigned i = 0; i != NumElts; ++i) {
1240 std::optional<unsigned> Opcode;
1245 if (SelectDirectAddr(Op1,
Addr)) {
1246 switch (
N->getOpcode()) {
1252 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1253 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1254 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1259 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1260 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1261 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1265 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1266 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1267 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1268 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1269 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1270 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1274 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1275 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1276 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1277 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1278 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1279 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1284 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1285 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1286 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1291 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1292 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1293 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1303 switch (
N->getOpcode()) {
1308 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1309 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1310 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1311 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1312 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1313 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1317 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1318 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1319 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1320 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1321 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1322 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1326 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1327 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1328 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1329 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1330 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1331 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1335 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1336 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1337 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1338 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1339 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1340 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1345 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1346 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1347 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1352 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1353 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1354 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1358 switch (
N->getOpcode()) {
1364 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1365 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1366 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1371 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1372 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1373 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1377 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1378 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1379 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1380 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1381 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1382 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1386 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1387 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1388 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1389 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1390 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1391 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1396 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1397 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1398 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1403 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1404 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1405 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1415 switch (
N->getOpcode()) {
1420 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1421 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1422 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1423 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1424 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1425 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1429 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1430 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1431 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1432 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1433 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1434 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1438 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1439 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1440 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1441 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1442 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1443 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1447 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1448 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1449 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1450 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1451 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1452 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1457 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1458 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1459 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1464 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1465 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1466 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1470 switch (
N->getOpcode()) {
1475 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1476 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1477 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1478 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1479 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1480 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1484 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1485 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1486 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1487 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1488 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1489 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1493 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1494 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1495 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1496 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1497 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1498 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1502 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1503 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1504 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1505 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1506 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1507 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1512 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1513 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1514 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1519 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1520 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1521 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1527 SDValue Ops[] = { Op1, Chain };
1544 if (OrigType != EltVT &&
1554 for (
unsigned i = 0; i != NumElts; ++i) {
1570bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1572 assert(
ST->writeMem() &&
"Expected store");
1575 assert((PlainStore || AtomicStore) &&
"Expected store");
1578 if (PlainStore && PlainStore->
isIndexed())
1581 EVT StoreVT =
ST->getMemoryVT();
1604 "Unexpected vector type");
1616 std::optional<unsigned> Opcode;
1618 Value.getNode()->getSimpleValueType(0).SimpleTy;
1621 {
Value, getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1622 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1623 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1625 if (SelectDirectAddr(BasePtr,
Addr)) {
1626 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1627 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1628 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1631 Ops.append({
Addr, Chain});
1632 }
else if (PointerSize == 64
1635 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1636 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1637 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1641 }
else if (PointerSize == 64
1644 if (PointerSize == 64)
1647 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1648 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1650 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1651 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1652 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1657 if (PointerSize == 64)
1659 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1660 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1661 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1663 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1664 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1665 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1682bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1685 std::optional<unsigned> Opcode;
1702 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1715 switch (
N->getOpcode()) {
1718 Ops.
append({
N->getOperand(1),
N->getOperand(2)});
1719 N2 =
N->getOperand(3);
1723 Ops.
append({
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
1725 N2 =
N->getOperand(5);
1737 Ops.
append({getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1738 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1739 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1741 if (SelectDirectAddr(N2,
Addr)) {
1742 switch (
N->getOpcode()) {
1747 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1748 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1749 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1753 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1754 NVPTX::STV_i32_v4_avar, std::nullopt,
1755 NVPTX::STV_f32_v4_avar, std::nullopt);
1761 switch (
N->getOpcode()) {
1766 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1767 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1768 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1773 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1774 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1780 if (PointerSize == 64) {
1781 switch (
N->getOpcode()) {
1787 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1788 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1789 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1794 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1795 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1799 switch (
N->getOpcode()) {
1804 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1805 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1806 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1810 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1811 NVPTX::STV_i32_v4_ari, std::nullopt,
1812 NVPTX::STV_f32_v4_ari, std::nullopt);
1818 if (PointerSize == 64) {
1819 switch (
N->getOpcode()) {
1825 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1826 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1827 NVPTX::STV_f64_v2_areg_64);
1832 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1833 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1837 switch (
N->getOpcode()) {
1843 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1844 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1845 NVPTX::STV_f64_v2_areg);
1850 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
1851 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
1872bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
1880 switch (
Node->getOpcode()) {
1894 EVT EltVT =
Node->getValueType(0);
1897 std::optional<unsigned> Opcode;
1904 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
1905 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
1906 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
1911 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
1912 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
1913 NVPTX::LoadParamMemV2F64);
1918 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
1919 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
1928 }
else if (VecSize == 2) {
1931 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
1935 unsigned OffsetVal =
Offset->getAsZExtVal();
1944bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
1948 unsigned OffsetVal =
Offset->getAsZExtVal();
1952 unsigned NumElts = 1;
1953 switch (
N->getOpcode()) {
1969 for (
unsigned i = 0; i < NumElts; ++i)
1976 std::optional<unsigned> Opcode = 0;
1982 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
1983 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
1984 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
1985 if (Opcode == NVPTX::StoreRetvalI8) {
1989 switch (Ops[0].getSimpleValueType().SimpleTy) {
1993 Opcode = NVPTX::StoreRetvalI8TruncI32;
1996 Opcode = NVPTX::StoreRetvalI8TruncI64;
2003 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2004 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2005 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2009 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2010 NVPTX::StoreRetvalV4I32, std::nullopt,
2011 NVPTX::StoreRetvalV4F32, std::nullopt);
2026#define getOpcV2H(ty, opKind0, opKind1) \
2027 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2029#define getOpcV2H1(ty, opKind0, isImm1) \
2030 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2032#define getOpcodeForVectorStParamV2(ty, isimm) \
2033 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2035#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2036 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2038#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2039 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2040 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2042#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2043 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2044 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2046#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2047 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2048 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2050#define getOpcodeForVectorStParamV4(ty, isimm) \
2051 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2052 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2054#define getOpcodeForVectorStParam(n, ty, isimm) \
2055 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2056 : getOpcodeForVectorStParamV4(ty, isimm)
2065 for (
unsigned i = 0; i < NumElts; i++) {
2066 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2069 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2091 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2096 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2102 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2103 : NVPTX::StoreParamV4I8_rrrr;
2106 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2107 : NVPTX::StoreParamV4I16_rrrr;
2112 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2113 : NVPTX::StoreParamV4I32_rrrr;
2119bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2123 unsigned ParamVal =
Param->getAsZExtVal();
2125 unsigned OffsetVal =
Offset->getAsZExtVal();
2127 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2131 switch (
N->getOpcode()) {
2149 for (
unsigned i = 0; i < NumElts; ++i)
2157 std::optional<unsigned> Opcode;
2158 switch (
N->getOpcode()) {
2166 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2167 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2169 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2181 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2182 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2183 NVPTX::StoreParamF64_i);
2187 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2188 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2189 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2190 if (Opcode == NVPTX::StoreParamI8_r) {
2194 switch (Ops[0].getSimpleValueType().SimpleTy) {
2198 Opcode = NVPTX::StoreParamI8TruncI32_r;
2201 Opcode = NVPTX::StoreParamI8TruncI64_r;
2219 Opcode = NVPTX::StoreParamI32_r;
2223 MVT::i32, Ops[0], CvtNone);
2228 Opcode = NVPTX::StoreParamI32_r;
2232 MVT::i32, Ops[0], CvtNone);
2249bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
2256 bool IsSigned =
false;
2261 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
2286 Val =
LHS.getNode()->getOperand(0);
2287 Start =
LHS.getNode()->getOperand(1);
2293 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
2294 if (NumBits > GoodBits) {
2328 if (isa<ConstantSDNode>(AndLHS)) {
2352 NumBits = NumZeros + NumOnes - ShiftAmt;
2358 if (ShiftAmt < NumZeros) {
2375 Val =
LHS->getOperand(0);
2394 if (OuterShiftAmt < InnerShiftAmt) {
2430 Opc = NVPTX::BFE_S32rii;
2432 Opc = NVPTX::BFE_U32rii;
2436 Opc = NVPTX::BFE_S64rii;
2438 Opc = NVPTX::BFE_U64rii;
2454 return V.getOpcode() ==
ISD::ADD ||
2455 (V->getOpcode() ==
ISD::OR && V->getFlags().hasDisjoint());
2482bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2487 if (SelectDirectAddr(base,
Base)) {
2510bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2522 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
2527 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
2535 if (!CN->getAPIntValue().isSignedIntN(32))
2539 SDLoc(OpNode), MVT::i32);
2558bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
2559 unsigned int spN)
const {
2560 const Value *Src =
nullptr;
2561 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
2562 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
2564 Src = mN->getMemOperand()->getValue();
2568 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
2569 return (PT->getAddressSpace() == spN);
2577 std::vector<SDValue> &OutOps) {
2579 switch (ConstraintID) {
2583 if (SelectDirectAddr(
Op, Op0)) {
2584 OutOps.push_back(Op0);
2588 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
2589 OutOps.push_back(Op0);
2590 OutOps.push_back(Op1);
2598void NVPTXDAGToDAGISel::SelectV2I64toI128(
SDNode *
N) {
2617 NewOps[0] =
N->getOperand(0);
2620 if (
N->getNumOperands() == 5)
2621 NewOps[3] =
N->getOperand(4);
2627void NVPTXDAGToDAGISel::SelectI128toV2I64(
SDNode *
N) {
2645 NVPTX::I128toV2I64,
DL,
2654unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
2665 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
2667 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
2669 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
2676 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
2678 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
2680 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
2687 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
2689 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
2691 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
2698 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
2700 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
2702 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
2709 return NVPTX::CVT_f32_f16;
2711 return NVPTX::CVT_f64_f16;
2716bool NVPTXDAGToDAGISel::tryFence(
SDNode *
N) {
2719 unsigned int FenceOp =
2721 Scopes[
N->getConstantOperandVal(2)],
Subtarget);
2739 "NVPTXScopes::operator[]");
2741 auto S = Scopes.find(
ID);
2742 if (S == Scopes.end()) {
2754#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, is_s32, suffix) \
2756 ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \
2757 : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix)
2759#define CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(op, dim, mode, is_ch, is_s32) \
2760 (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, _CH)) \
2761 : (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, )))
2763#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, \
2766 ? (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(RED, dim, mode, is_ch, is_s32)) \
2767 : (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(S2G, dim, mode, is_ch, \
2770#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32) \
2772 if (is_mc && is_ch) \
2773 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC_CH); \
2775 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _CH); \
2777 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC); \
2778 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, ); \
2781#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch) \
2782 (is_ch ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \
2783 : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode)
2786 bool IsCacheHint,
bool IsIm2Col,
2787 bool IsReduce =
false) {
2792 IsCacheHint, IsShared32);
2795 IsCacheHint, IsShared32);
2798 IsCacheHint, IsShared32);
2801 "GetCpAsyncBulkTensorS2GOpcode.");
2807 IsCacheHint, IsShared32);
2810 IsCacheHint, IsShared32);
2813 IsCacheHint, IsShared32);
2816 IsCacheHint, IsShared32);
2819 IsCacheHint, IsShared32);
2822 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorS2GOpcode.");
2829 bool IsCacheHint,
bool IsIm2Col) {
2834 IsCacheHint, IsShared32);
2837 IsCacheHint, IsShared32);
2840 IsCacheHint, IsShared32);
2843 "GetCpAsyncBulkTensorG2SOpcode.");
2849 IsCacheHint, IsShared32);
2852 IsCacheHint, IsShared32);
2855 IsCacheHint, IsShared32);
2858 IsCacheHint, IsShared32);
2861 IsCacheHint, IsShared32);
2864 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorG2SOpcode.");
2881 "GetCpAsyncBulkTensorPrefetchOpcode.");
2897 "GetCpAsyncBulkTensorPrefetchOpcode.");
2904 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
2905 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
2907 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
2908 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
2910 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
2911 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
2918void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(
SDNode *
N,
2926 size_t NumOps =
N->getNumOperands();
2930 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
2931 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2932 bool IsMultiCast =
N->getConstantOperandVal(NumOps - 2) == 1;
2933 size_t NumBaseArgs = NumDims + NumOffsets + 3;
2934 size_t MultiCastIdx = NumBaseArgs + 2;
2945 Ops.
push_back(
N->getOperand(MultiCastIdx + 1));
2953 NumDims, IsShared32, IsMultiCast, IsCacheHint, IsIm2Col);
2957void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(
SDNode *
N,
2963 size_t NumOps =
N->getNumOperands();
2964 size_t NumDims = NumOps - 6;
2965 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2966 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
2979void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon(
SDNode *
N,
2986 size_t NumOps =
N->getNumOperands();
2990 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
2991 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2992 size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1);
3003void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(
SDNode *
N,
3010 size_t NumOps =
N->getNumOperands();
3011 size_t NumDims = NumOps - 6;
3012 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3013 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
3023 NumDims, IsShared32, IsCacheHint, IsIm2Col,
true);
3027bool NVPTXDAGToDAGISel::tryIntrinsicVoid(
SDNode *
N) {
3028 unsigned IID =
N->getConstantOperandVal(1);
3030 auto CastTy = [](TMARedTy
Op) {
return static_cast<unsigned>(
Op); };
3034 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_1d:
3035 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_2d:
3036 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_3d:
3037 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_4d:
3038 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_5d:
3039 SelectCpAsyncBulkTensorS2GCommon(
N);
3041 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_3d:
3042 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_4d:
3043 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_5d:
3044 SelectCpAsyncBulkTensorS2GCommon(
N,
true);
3046 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
3047 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
3048 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
3049 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
3050 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d:
3051 SelectCpAsyncBulkTensorG2SCommon(
N);
3053 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
3054 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
3055 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
3056 SelectCpAsyncBulkTensorG2SCommon(
N,
true);
3058 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d:
3059 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d:
3060 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d:
3061 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d:
3062 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d:
3063 SelectCpAsyncBulkTensorPrefetchCommon(
N);
3065 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
3066 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
3067 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
3068 SelectCpAsyncBulkTensorPrefetchCommon(
N,
true);
3070 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:
3071 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:
3072 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:
3073 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d:
3074 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d:
3075 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD));
3077 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d:
3078 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d:
3079 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d:
3080 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD),
3083 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d:
3084 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d:
3085 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d:
3086 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d:
3087 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d:
3088 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN));
3090 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d:
3091 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d:
3092 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d:
3093 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN),
3096 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d:
3097 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d:
3098 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d:
3099 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d:
3100 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d:
3101 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX));
3103 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d:
3104 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d:
3105 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d:
3106 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX),
3109 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d:
3110 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d:
3111 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d:
3112 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d:
3113 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d:
3114 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC));
3116 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d:
3117 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d:
3118 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d:
3119 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC),
3122 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d:
3123 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d:
3124 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d:
3125 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d:
3126 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d:
3127 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC));
3129 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d:
3130 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d:
3131 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d:
3132 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC),
3135 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d:
3136 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d:
3137 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d:
3138 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d:
3139 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d:
3140 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND));
3142 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d:
3143 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d:
3144 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d:
3145 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND),
3148 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d:
3149 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d:
3150 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d:
3151 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d:
3152 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d:
3153 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR));
3155 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d:
3156 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d:
3157 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d:
3158 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR),
3161 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d:
3162 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d:
3163 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d:
3164 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d:
3165 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d:
3166 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR));
3168 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d:
3169 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d:
3170 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d:
3171 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR),
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static bool isAddLike(const SDValue V)
static bool isVectorElementTypeUpsized(EVT EltVT)
static size_t GetDimsFromIntrinsic(unsigned IID)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch)
static unsigned GetCpAsyncBulkTensorPrefetchOpcode(size_t Dim, bool IsCacheHint, bool IsIm2Col)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32)
static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32, bool IsMultiCast, bool IsCacheHint, bool IsIm2Col)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, NVPTXSubtarget const *T)
static unsigned GetCpAsyncBulkTensorS2GOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col, bool IsReduce=false)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, is_s32)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Record instruction ordering so we can query their relative positions within a function.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
void failIfClustersUnsupported(std::string const &FailureMessage) const
const NVPTXTargetLowering * getTargetLowering() const override
bool hasRelaxedMMIO() const
bool hasMemoryOrdering() const
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
std::string ScopeToString(Scope S)
std::string OrderingToString(Ordering Order)
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
bool isKernelFunction(const Function &F)
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
NVPTX::Scope operator[](SyncScope::ID ID) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.