20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
65int NVPTXDAGToDAGISel::getDivF32Level()
const {
69bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
73bool NVPTXDAGToDAGISel::useF32FTZ()
const {
77bool NVPTXDAGToDAGISel::allowFMA()
const {
82bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
87bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
91void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
93 if (
N->isMachineOpcode()) {
98 switch (
N->getOpcode()) {
114 if (tryEXTRACT_VECTOR_ELEMENT(
N))
121 SelectSETP_BF16X2(
N);
125 if (tryLoadVector(
N))
135 if (tryStoreVector(
N))
147 if (tryStoreRetval(
N))
155 if (tryStoreParam(
N))
159 if (tryIntrinsicNoChain(
N))
163 if (tryIntrinsicChain(
N))
167 if (tryIntrinsicVoid(
N))
178 SelectAddrSpaceCast(
N);
181 if (
N->getOperand(1).getValueType() == MVT::i128) {
182 SelectV2I64toI128(
N);
188 if (
N->getOperand(1).getValueType() == MVT::i128) {
189 SelectI128toV2I64(
N);
200bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
201 unsigned IID =
N->getConstantOperandVal(1);
205 case Intrinsic::nvvm_ldu_global_f:
206 case Intrinsic::nvvm_ldu_global_i:
207 case Intrinsic::nvvm_ldu_global_p:
235 return CmpMode::NotANumber;
269bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
270 unsigned PTXCmpMode =
271 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
274 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
280bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
281 unsigned PTXCmpMode =
282 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
285 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
293bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
303 for (
auto *U :
Vector.getNode()->users()) {
306 if (
U->getOperand(0) !=
Vector)
309 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
310 if (IdxConst->getZExtValue() == 0)
312 else if (IdxConst->getZExtValue() == 1)
329 for (
auto *
Node : E0)
331 for (
auto *
Node : E1)
338 const Value *Src =
N->getMemOperand()->getValue();
343 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
344 switch (PT->getAddressSpace()) {
365struct OperationOrderings {
372static OperationOrderings
474 !HasMemoryOrdering) {
476 formatv(
"PTX does not support \"atomic\" for orderings different than"
477 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "
489 bool AddrGenericOrGlobalOrShared =
493 if (!AddrGenericOrGlobalOrShared)
496 bool UseRelaxedMMIO =
518 formatv(
"PTX only supports Acquire Ordering on reads: {}",
519 N->getOperationName()));
524 formatv(
"PTX only supports Release Ordering on writes: {}",
525 N->getOperationName()));
529 formatv(
"NVPTX does not support AcquireRelease Ordering on "
531 "yet and PTX does not support it on loads or stores: {}",
532 N->getOperationName()));
545 else if (
N->writeMem())
549 formatv(
"NVPTX does not support SequentiallyConsistent Ordering on "
550 "read-modify-writes yet: {}",
551 N->getOperationName()));
552 return OperationOrderings(InstrOrder,
557 formatv(
"NVPTX backend does not support AtomicOrdering \"{}\" yet.",
580 auto S = Scopes[
N->getSyncScopeID()];
619 if (
N->isInvariant())
631 if (
auto *
A = dyn_cast<const Argument>(V))
632 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
633 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
634 return GV->isConstant();
642 T->failIfClustersUnsupported(
".cluster scope fence");
650 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
651 : NVPTX::INT_MEMBAR_SYS;
653 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
654 : NVPTX::INT_MEMBAR_CTA;
656 return NVPTX::atomic_thread_fence_acq_rel_cluster;
658 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
659 : NVPTX::INT_MEMBAR_GL;
662 formatv(
"Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
670 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
671 : NVPTX::INT_MEMBAR_SYS;
673 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
674 : NVPTX::INT_MEMBAR_CTA;
676 return NVPTX::atomic_thread_fence_seq_cst_cluster;
678 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
679 : NVPTX::INT_MEMBAR_GL;
691 formatv(
"Unsupported \"{}\" ordering and \"{}\" scope for fence.",
692 OrderingToString(O), ScopeToString(S)));
700std::pair<NVPTX::Ordering, NVPTX::Scope>
701NVPTXDAGToDAGISel::insertMemoryInstructionFence(
SDLoc DL,
SDValue &Chain,
718 formatv(
"Unexpected fence ordering: \"{}\".",
724bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
725 unsigned IID =
N->getConstantOperandVal(0);
729 case Intrinsic::nvvm_texsurf_handle_internal:
730 SelectTexSurfHandle(
N);
735void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
740 MVT::i64, GlobalVal));
743void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
749 assert(SrcAddrSpace != DstAddrSpace &&
750 "addrspacecast must be between different address spaces");
764 switch (SrcAddrSpace) {
767 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
770 Opc = TM.
is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
773 Opc = TM.
is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
776 Opc = TM.
is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;
783 if (SrcAddrSpace != 0)
786 switch (DstAddrSpace) {
789 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
792 Opc = TM.
is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
795 Opc = TM.
is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
798 Opc = TM.
is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
801 Opc = TM.
is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
820static std::optional<unsigned>
822 unsigned Opcode_i16,
unsigned Opcode_i32,
823 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
824 std::optional<unsigned> Opcode_f64) {
867bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
869 assert(
LD->readMem() &&
"Expected load");
876 EVT LoadedVT =
LD->getMemoryVT();
901 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
908 "Unexpected vector type");
922 std::optional<unsigned> Opcode;
926 getI32Imm(CodeAddrSpace,
DL),
927 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
928 getI32Imm(FromTypeWidth,
DL)});
930 if (SelectDirectAddr(N1,
Addr)) {
931 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
932 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
933 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
936 Ops.append({
Addr, Chain});
939 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
940 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
941 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
947 if (PointerSize == 64)
950 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
951 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
953 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
954 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
955 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
960 if (PointerSize == 64)
963 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
964 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
966 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
967 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
968 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
971 Ops.append({N1, Chain});
994 return Isv2x16VT(EltVT) || EltVT == MVT::v4i8;
997bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1006 return tryLDGLDU(
N);
1013 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1026 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1029 unsigned ExtensionType = cast<ConstantSDNode>(
1030 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1038 switch (
N->getOpcode()) {
1049 EVT EltVT =
N->getValueType(0);
1059 std::optional<unsigned> Opcode;
1063 getI32Imm(CodeAddrSpace,
DL),
1064 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1065 getI32Imm(FromTypeWidth,
DL)});
1067 if (SelectDirectAddr(Op1,
Addr)) {
1068 switch (
N->getOpcode()) {
1073 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1074 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1075 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1080 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1081 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1086 Ops.append({
Addr, Chain});
1087 }
else if (PointerSize == 64
1090 switch (
N->getOpcode()) {
1095 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1096 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1097 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1102 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1103 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1109 }
else if (PointerSize == 64
1112 if (PointerSize == 64) {
1113 switch (
N->getOpcode()) {
1119 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1120 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1121 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1126 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1127 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1131 switch (
N->getOpcode()) {
1136 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1137 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1138 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1143 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1144 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1152 if (PointerSize == 64) {
1153 switch (
N->getOpcode()) {
1159 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1160 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1161 NVPTX::LDV_f64_v2_areg_64);
1166 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1167 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1171 switch (
N->getOpcode()) {
1177 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1178 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1179 NVPTX::LDV_f64_v2_areg);
1184 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1185 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1191 Ops.append({Op1, Chain});
1202bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1203 auto *Mem = cast<MemSDNode>(
N);
1209 EVT OrigType =
N->getValueType(0);
1210 EVT EltVT = Mem->getMemoryVT();
1211 unsigned NumElts = 1;
1217 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1218 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1219 (EltVT == MVT::i16 && OrigType == MVT::v2i16) ||
1220 (EltVT == MVT::i8 && OrigType == MVT::v4i8)) {
1222 "NumElts must be divisible by the number of elts in subvectors");
1231 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1233 for (
unsigned i = 0; i != NumElts; ++i) {
1240 std::optional<unsigned> Opcode;
1245 if (SelectDirectAddr(Op1,
Addr)) {
1246 switch (
N->getOpcode()) {
1252 NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1253 NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1254 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1259 NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1260 NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1261 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1265 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1266 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1267 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1268 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1269 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1270 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1274 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1275 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1276 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1277 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1278 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1279 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1284 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1285 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1286 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1291 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1292 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1293 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1303 switch (
N->getOpcode()) {
1308 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1309 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1310 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1311 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1312 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1313 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1317 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1318 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1319 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1320 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1321 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1322 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1326 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1327 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1328 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1329 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1330 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1331 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1335 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1336 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1337 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1338 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1339 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1340 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1345 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1346 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1347 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1352 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1353 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1354 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1358 switch (
N->getOpcode()) {
1364 NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1365 NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1366 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1371 NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1372 NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1373 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1377 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1378 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1379 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1380 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1381 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1382 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1386 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1387 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1388 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1389 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1390 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1391 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1396 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1397 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1398 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1403 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1404 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1405 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1415 switch (
N->getOpcode()) {
1420 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1421 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1422 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1423 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1424 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1425 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1429 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1430 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1431 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1432 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1433 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1434 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1438 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1439 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1440 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1441 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1442 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1443 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1447 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1448 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1449 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1450 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1451 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1452 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1457 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1458 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1459 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1464 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1465 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1466 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1470 switch (
N->getOpcode()) {
1475 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1476 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1477 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1478 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1479 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1480 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1484 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1485 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1486 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1487 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1488 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1489 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1493 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1494 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1495 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1496 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1497 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1498 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1502 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1503 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1504 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1505 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1506 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1507 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1512 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1513 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1514 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1519 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1520 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1521 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1527 SDValue Ops[] = { Op1, Chain };
1544 if (OrigType != EltVT &&
1554 for (
unsigned i = 0; i != NumElts; ++i) {
1570bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1572 assert(
ST->writeMem() &&
"Expected store");
1575 assert((PlainStore || AtomicStore) &&
"Expected store");
1578 if (PlainStore && PlainStore->
isIndexed())
1581 EVT StoreVT =
ST->getMemoryVT();
1604 "Unexpected vector type");
1616 std::optional<unsigned> Opcode;
1618 Value.getNode()->getSimpleValueType(0).SimpleTy;
1621 {
Value, getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1622 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1623 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1625 if (SelectDirectAddr(BasePtr,
Addr)) {
1626 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1627 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1628 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1631 Ops.append({
Addr, Chain});
1632 }
else if (PointerSize == 64
1635 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1636 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1637 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1641 }
else if (PointerSize == 64
1644 if (PointerSize == 64)
1647 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1648 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1650 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1651 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1652 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1657 if (PointerSize == 64)
1659 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1660 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1661 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1663 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1664 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1665 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1682bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1685 std::optional<unsigned> Opcode;
1702 auto [
Ordering,
Scope] = insertMemoryInstructionFence(
DL, Chain, MemSD);
1715 switch (
N->getOpcode()) {
1718 Ops.
append({
N->getOperand(1),
N->getOperand(2)});
1719 N2 =
N->getOperand(3);
1723 Ops.
append({
N->getOperand(1),
N->getOperand(2),
N->getOperand(3),
1725 N2 =
N->getOperand(5);
1737 Ops.
append({getI32Imm(Ordering,
DL), getI32Imm(Scope,
DL),
1738 getI32Imm(CodeAddrSpace,
DL), getI32Imm(VecType,
DL),
1739 getI32Imm(ToType,
DL), getI32Imm(ToTypeWidth,
DL)});
1741 if (SelectDirectAddr(N2,
Addr)) {
1742 switch (
N->getOpcode()) {
1747 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1748 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1749 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1753 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1754 NVPTX::STV_i32_v4_avar, std::nullopt,
1755 NVPTX::STV_f32_v4_avar, std::nullopt);
1761 switch (
N->getOpcode()) {
1766 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1767 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1768 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1773 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1774 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1780 if (PointerSize == 64) {
1781 switch (
N->getOpcode()) {
1787 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1788 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1789 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1794 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1795 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1799 switch (
N->getOpcode()) {
1804 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1805 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1806 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1810 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1811 NVPTX::STV_i32_v4_ari, std::nullopt,
1812 NVPTX::STV_f32_v4_ari, std::nullopt);
1818 if (PointerSize == 64) {
1819 switch (
N->getOpcode()) {
1825 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1826 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1827 NVPTX::STV_f64_v2_areg_64);
1832 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1833 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1837 switch (
N->getOpcode()) {
1843 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1844 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1845 NVPTX::STV_f64_v2_areg);
1850 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
1851 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
1872bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
1880 switch (
Node->getOpcode()) {
1894 EVT EltVT =
Node->getValueType(0);
1897 std::optional<unsigned> Opcode;
1904 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
1905 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
1906 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
1911 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
1912 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
1913 NVPTX::LoadParamMemV2F64);
1918 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
1919 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
1928 }
else if (VecSize == 2) {
1931 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
1935 unsigned OffsetVal =
Offset->getAsZExtVal();
1944bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
1948 unsigned OffsetVal =
Offset->getAsZExtVal();
1952 unsigned NumElts = 1;
1953 switch (
N->getOpcode()) {
1969 for (
unsigned i = 0; i < NumElts; ++i)
1976 std::optional<unsigned> Opcode = 0;
1982 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
1983 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
1984 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
1985 if (Opcode == NVPTX::StoreRetvalI8) {
1989 switch (Ops[0].getSimpleValueType().SimpleTy) {
1993 Opcode = NVPTX::StoreRetvalI8TruncI32;
1996 Opcode = NVPTX::StoreRetvalI8TruncI64;
2003 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2004 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2005 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2009 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2010 NVPTX::StoreRetvalV4I32, std::nullopt,
2011 NVPTX::StoreRetvalV4F32, std::nullopt);
2026#define getOpcV2H(ty, opKind0, opKind1) \
2027 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2029#define getOpcV2H1(ty, opKind0, isImm1) \
2030 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2032#define getOpcodeForVectorStParamV2(ty, isimm) \
2033 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2035#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2036 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2038#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2039 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2040 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2042#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2043 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2044 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2046#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2047 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2048 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2050#define getOpcodeForVectorStParamV4(ty, isimm) \
2051 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2052 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2054#define getOpcodeForVectorStParam(n, ty, isimm) \
2055 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2056 : getOpcodeForVectorStParamV4(ty, isimm)
2065 for (
unsigned i = 0; i < NumElts; i++) {
2066 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2069 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2091 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2096 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2102 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2103 : NVPTX::StoreParamV4I8_rrrr;
2106 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2107 : NVPTX::StoreParamV4I16_rrrr;
2112 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2113 : NVPTX::StoreParamV4I32_rrrr;
2119bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2123 unsigned ParamVal =
Param->getAsZExtVal();
2125 unsigned OffsetVal =
Offset->getAsZExtVal();
2127 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2131 switch (
N->getOpcode()) {
2149 for (
unsigned i = 0; i < NumElts; ++i)
2157 std::optional<unsigned> Opcode;
2158 switch (
N->getOpcode()) {
2166 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2167 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2169 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2181 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2182 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2183 NVPTX::StoreParamF64_i);
2187 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2188 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2189 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2190 if (Opcode == NVPTX::StoreParamI8_r) {
2194 switch (Ops[0].getSimpleValueType().SimpleTy) {
2198 Opcode = NVPTX::StoreParamI8TruncI32_r;
2201 Opcode = NVPTX::StoreParamI8TruncI64_r;
2219 Opcode = NVPTX::StoreParamI32_r;
2223 MVT::i32, Ops[0], CvtNone);
2228 Opcode = NVPTX::StoreParamI32_r;
2232 MVT::i32, Ops[0], CvtNone);
2249bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
2256 bool IsSigned =
false;
2261 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
2286 Val =
LHS.getNode()->getOperand(0);
2287 Start =
LHS.getNode()->getOperand(1);
2293 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
2294 if (NumBits > GoodBits) {
2328 if (isa<ConstantSDNode>(AndLHS)) {
2352 NumBits = NumZeros + NumOnes - ShiftAmt;
2358 if (ShiftAmt < NumZeros) {
2375 Val =
LHS->getOperand(0);
2394 if (OuterShiftAmt < InnerShiftAmt) {
2430 Opc = NVPTX::BFE_S32rii;
2432 Opc = NVPTX::BFE_U32rii;
2436 Opc = NVPTX::BFE_S64rii;
2438 Opc = NVPTX::BFE_U64rii;
2454 return V.getOpcode() ==
ISD::ADD ||
2455 (V->getOpcode() ==
ISD::OR && V->getFlags().hasDisjoint());
2486 FindRootAddressAndTotalOffset =
2488 uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
2492 AccumulatedOffset += CN->getZExtValue();
2493 if (SelectDirectAddr(PossibleBaseAddr,
Base))
2494 return AccumulatedOffset;
2495 return FindRootAddressAndTotalOffset(PossibleBaseAddr,
2499 return std::nullopt;
2501 if (
auto AccumulatedOffset = FindRootAddressAndTotalOffset(
Addr, 0)) {
2534 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
2539 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
2547 if (!CN->getAPIntValue().isSignedIntN(32))
2551 SDLoc(OpNode), MVT::i32);
2570bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
2571 unsigned int spN)
const {
2572 const Value *Src =
nullptr;
2573 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
2574 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
2576 Src = mN->getMemOperand()->getValue();
2580 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
2581 return (PT->getAddressSpace() == spN);
2589 std::vector<SDValue> &OutOps) {
2591 switch (ConstraintID) {
2595 if (SelectDirectAddr(
Op, Op0)) {
2596 OutOps.push_back(Op0);
2600 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
2601 OutOps.push_back(Op0);
2602 OutOps.push_back(Op1);
2610void NVPTXDAGToDAGISel::SelectV2I64toI128(
SDNode *
N) {
2629 NewOps[0] =
N->getOperand(0);
2632 if (
N->getNumOperands() == 5)
2633 NewOps[3] =
N->getOperand(4);
2639void NVPTXDAGToDAGISel::SelectI128toV2I64(
SDNode *
N) {
2657 NVPTX::I128toV2I64,
DL,
2666unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
2677 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
2679 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
2681 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
2688 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
2690 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
2692 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
2699 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
2701 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
2703 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
2710 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
2712 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
2714 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
2721 return NVPTX::CVT_f32_f16;
2723 return NVPTX::CVT_f64_f16;
2728bool NVPTXDAGToDAGISel::tryFence(
SDNode *
N) {
2731 unsigned int FenceOp =
2733 Scopes[
N->getConstantOperandVal(2)],
Subtarget);
2751 "NVPTXScopes::operator[]");
2753 auto S = Scopes.find(
ID);
2754 if (S == Scopes.end()) {
2766#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, is_s32, suffix) \
2768 ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \
2769 : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix)
2771#define CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(op, dim, mode, is_ch, is_s32) \
2772 (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, _CH)) \
2773 : (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, )))
2775#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, \
2778 ? (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(RED, dim, mode, is_ch, is_s32)) \
2779 : (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(S2G, dim, mode, is_ch, \
2782#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32) \
2784 if (is_mc && is_ch) \
2785 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC_CH); \
2787 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _CH); \
2789 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC); \
2790 return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, ); \
2793#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch) \
2794 (is_ch ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \
2795 : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode)
2798 bool IsCacheHint,
bool IsIm2Col,
2799 bool IsReduce =
false) {
2804 IsCacheHint, IsShared32);
2807 IsCacheHint, IsShared32);
2810 IsCacheHint, IsShared32);
2813 "GetCpAsyncBulkTensorS2GOpcode.");
2819 IsCacheHint, IsShared32);
2822 IsCacheHint, IsShared32);
2825 IsCacheHint, IsShared32);
2828 IsCacheHint, IsShared32);
2831 IsCacheHint, IsShared32);
2834 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorS2GOpcode.");
2841 bool IsCacheHint,
bool IsIm2Col) {
2846 IsCacheHint, IsShared32);
2849 IsCacheHint, IsShared32);
2852 IsCacheHint, IsShared32);
2855 "GetCpAsyncBulkTensorG2SOpcode.");
2861 IsCacheHint, IsShared32);
2864 IsCacheHint, IsShared32);
2867 IsCacheHint, IsShared32);
2870 IsCacheHint, IsShared32);
2873 IsCacheHint, IsShared32);
2876 "Invalid Dimension in tile mode for GetCpAsyncBulkTensorG2SOpcode.");
2893 "GetCpAsyncBulkTensorPrefetchOpcode.");
2909 "GetCpAsyncBulkTensorPrefetchOpcode.");
2916 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
2917 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
2919 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
2920 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
2922 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
2923 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
2930void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(
SDNode *
N,
2938 size_t NumOps =
N->getNumOperands();
2942 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
2943 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2944 bool IsMultiCast =
N->getConstantOperandVal(NumOps - 2) == 1;
2945 size_t NumBaseArgs = NumDims + NumOffsets + 3;
2946 size_t MultiCastIdx = NumBaseArgs + 2;
2957 Ops.
push_back(
N->getOperand(MultiCastIdx + 1));
2965 NumDims, IsShared32, IsMultiCast, IsCacheHint, IsIm2Col);
2969void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(
SDNode *
N,
2975 size_t NumOps =
N->getNumOperands();
2976 size_t NumDims = NumOps - 6;
2977 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
2978 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
2991void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon(
SDNode *
N,
2998 size_t NumOps =
N->getNumOperands();
3002 size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
3003 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3004 size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1);
3015void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(
SDNode *
N,
3022 size_t NumOps =
N->getNumOperands();
3023 size_t NumDims = NumOps - 6;
3024 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3025 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
3035 NumDims, IsShared32, IsCacheHint, IsIm2Col,
true);
3039void NVPTXDAGToDAGISel::SelectCpAsyncBulkS2G(
SDNode *
N) {
3044 size_t NumOps =
N->getNumOperands();
3045 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3046 size_t NumArgs = IsCacheHint ? 4 : 3;
3056 Opcode = IsShared32 ? NVPTX::CP_ASYNC_BULK_S2G_SHARED32_CH
3057 : NVPTX::CP_ASYNC_BULK_S2G_CH;
3059 Opcode = IsShared32 ? NVPTX::CP_ASYNC_BULK_S2G_SHARED32
3060 : NVPTX::CP_ASYNC_BULK_S2G;
3064void NVPTXDAGToDAGISel::SelectCpAsyncBulkG2S(
SDNode *
N) {
3070 size_t NumOps =
N->getNumOperands();
3071 bool IsCacheHint =
N->getConstantOperandVal(NumOps - 1) == 1;
3072 bool IsMultiCast =
N->getConstantOperandVal(NumOps - 2) == 1;
3073 size_t NumBaseArgs = 4;
3074 size_t MultiCastIdx = NumBaseArgs + 2;
3085 Ops.
push_back(
N->getOperand(MultiCastIdx + 1));
3092 unsigned Opcode = [&]() {
3093 if (IsMultiCast && IsCacheHint)
3094 return IsShared32 ? NVPTX::CP_ASYNC_BULK_G2S_SHARED32_MC_CH
3095 : NVPTX::CP_ASYNC_BULK_G2S_MC_CH;
3097 return IsShared32 ? NVPTX::CP_ASYNC_BULK_G2S_SHARED32_MC
3098 : NVPTX::CP_ASYNC_BULK_G2S_MC;
3100 return IsShared32 ? NVPTX::CP_ASYNC_BULK_G2S_SHARED32_CH
3101 : NVPTX::CP_ASYNC_BULK_G2S_CH;
3102 return IsShared32 ? NVPTX::CP_ASYNC_BULK_G2S_SHARED32
3103 : NVPTX::CP_ASYNC_BULK_G2S;
3108bool NVPTXDAGToDAGISel::tryIntrinsicVoid(
SDNode *
N) {
3109 unsigned IID =
N->getConstantOperandVal(1);
3111 auto CastTy = [](TMARedTy
Op) {
return static_cast<unsigned>(
Op); };
3115 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
3116 SelectCpAsyncBulkG2S(
N);
3118 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_global:
3119 SelectCpAsyncBulkS2G(
N);
3121 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_1d:
3122 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_2d:
3123 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_3d:
3124 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_4d:
3125 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_5d:
3126 SelectCpAsyncBulkTensorS2GCommon(
N);
3128 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_3d:
3129 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_4d:
3130 case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_5d:
3131 SelectCpAsyncBulkTensorS2GCommon(
N,
true);
3133 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
3134 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
3135 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
3136 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
3137 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d:
3138 SelectCpAsyncBulkTensorG2SCommon(
N);
3140 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
3141 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
3142 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
3143 SelectCpAsyncBulkTensorG2SCommon(
N,
true);
3145 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d:
3146 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d:
3147 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d:
3148 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d:
3149 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d:
3150 SelectCpAsyncBulkTensorPrefetchCommon(
N);
3152 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
3153 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
3154 case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
3155 SelectCpAsyncBulkTensorPrefetchCommon(
N,
true);
3157 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:
3158 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:
3159 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:
3160 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d:
3161 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d:
3162 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD));
3164 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d:
3165 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d:
3166 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d:
3167 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::ADD),
3170 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d:
3171 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d:
3172 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d:
3173 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d:
3174 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d:
3175 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN));
3177 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d:
3178 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d:
3179 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d:
3180 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MIN),
3183 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d:
3184 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d:
3185 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d:
3186 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d:
3187 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d:
3188 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX));
3190 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d:
3191 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d:
3192 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d:
3193 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::MAX),
3196 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d:
3197 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d:
3198 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d:
3199 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d:
3200 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d:
3201 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC));
3203 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d:
3204 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d:
3205 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d:
3206 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::INC),
3209 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d:
3210 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d:
3211 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d:
3212 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d:
3213 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d:
3214 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC));
3216 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d:
3217 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d:
3218 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d:
3219 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::DEC),
3222 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d:
3223 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d:
3224 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d:
3225 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d:
3226 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d:
3227 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND));
3229 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d:
3230 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d:
3231 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d:
3232 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::AND),
3235 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d:
3236 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d:
3237 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d:
3238 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d:
3239 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d:
3240 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR));
3242 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d:
3243 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d:
3244 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d:
3245 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::OR),
3248 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d:
3249 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d:
3250 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d:
3251 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d:
3252 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d:
3253 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR));
3255 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d:
3256 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d:
3257 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d:
3258 SelectCpAsyncBulkTensorReduceCommon(
N, CastTy(TMARedTy::XOR),
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static bool isAddLike(const SDValue V)
static bool isVectorElementTypeUpsized(EVT EltVT)
static size_t GetDimsFromIntrinsic(unsigned IID)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch)
static unsigned GetCpAsyncBulkTensorPrefetchOpcode(size_t Dim, bool IsCacheHint, bool IsIm2Col)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32)
static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32, bool IsMultiCast, bool IsCacheHint, bool IsIm2Col)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, NVPTXSubtarget const *T)
static unsigned GetCpAsyncBulkTensorS2GOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col, bool IsReduce=false)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, is_s32)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Record instruction ordering so we can query their relative positions within a function.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
void failIfClustersUnsupported(std::string const &FailureMessage) const
const NVPTXTargetLowering * getTargetLowering() const override
bool hasRelaxedMMIO() const
bool hasMemoryOrdering() const
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
std::string ScopeToString(Scope S)
std::string OrderingToString(Ordering Order)
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
bool isKernelFunction(const Function &F)
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
NVPTX::Scope operator[](SyncScope::ID ID) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.