34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
157 if (
MI.isCompare()) {
168 switch (
Use.getOpcode()) {
169 case AMDGPU::S_AND_SAVEEXEC_B32:
170 case AMDGPU::S_AND_SAVEEXEC_B64:
172 case AMDGPU::S_AND_B32:
173 case AMDGPU::S_AND_B64:
174 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
185 if (!
MI.isConvergent())
188 switch (
MI.getOpcode()) {
191 case AMDGPU::V_READFIRSTLANE_B32:
208 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
213 for (
auto Op :
MI.uses()) {
214 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
220 if (FromCycle ==
nullptr)
226 while (FromCycle && !FromCycle->
contains(ToCycle)) {
246 int64_t &Offset1)
const {
254 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
258 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
274 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
275 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
276 if (Offset0Idx == -1 || Offset1Idx == -1)
283 Offset0Idx -=
get(Opc0).NumDefs;
284 Offset1Idx -=
get(Opc1).NumDefs;
314 if (!Load0Offset || !Load1Offset)
331 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
332 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
334 if (OffIdx0 == -1 || OffIdx1 == -1)
340 OffIdx0 -=
get(Opc0).NumDefs;
341 OffIdx1 -=
get(Opc1).NumDefs;
360 case AMDGPU::DS_READ2ST64_B32:
361 case AMDGPU::DS_READ2ST64_B64:
362 case AMDGPU::DS_WRITE2ST64_B32:
363 case AMDGPU::DS_WRITE2ST64_B64:
378 OffsetIsScalable =
false;
395 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
397 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
398 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
411 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
412 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
413 if (Offset0 + 1 != Offset1)
424 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 Offset = EltSize * Offset0;
434 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
435 if (DataOpIdx == -1) {
436 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
438 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
454 if (BaseOp && !BaseOp->
isFI())
462 if (SOffset->
isReg())
468 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
470 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
479 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
480 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
482 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
483 if (VAddr0Idx >= 0) {
485 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
492 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
507 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
524 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
526 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
543 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
551 if (MO1->getAddrSpace() != MO2->getAddrSpace())
554 const auto *Base1 = MO1->getValue();
555 const auto *Base2 = MO2->getValue();
556 if (!Base1 || !Base2)
564 return Base1 == Base2;
568 int64_t Offset1,
bool OffsetIsScalable1,
570 int64_t Offset2,
bool OffsetIsScalable2,
571 unsigned ClusterSize,
572 unsigned NumBytes)
const {
585 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
604 const unsigned LoadSize = NumBytes / ClusterSize;
605 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
606 return NumDWords <= MaxMemoryClusterDWords;
620 int64_t Offset0, int64_t Offset1,
621 unsigned NumLoads)
const {
622 assert(Offset1 > Offset0 &&
623 "Second offset should be larger than first offset!");
628 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
635 const char *Msg =
"illegal VGPR to SGPR copy") {
656 assert((
TII.getSubtarget().hasMAIInsts() &&
657 !
TII.getSubtarget().hasGFX90AInsts()) &&
658 "Expected GFX908 subtarget.");
661 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
662 "Source register of the copy should be either an SGPR or an AGPR.");
665 "Destination register of the copy should be an AGPR.");
674 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
677 if (!Def->modifiesRegister(SrcReg, &RI))
680 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
681 Def->getOperand(0).getReg() != SrcReg)
688 bool SafeToPropagate =
true;
691 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
692 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
693 SafeToPropagate =
false;
695 if (!SafeToPropagate)
698 for (
auto I = Def;
I !=
MI; ++
I)
699 I->clearRegisterKills(DefOp.
getReg(), &RI);
708 if (ImpUseSuperReg) {
709 Builder.addReg(ImpUseSuperReg,
717 RS.enterBasicBlockEnd(
MBB);
718 RS.backward(std::next(
MI));
727 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
730 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
731 "VGPR used for an intermediate copy should have been reserved.");
736 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
746 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
747 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
748 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
755 if (ImpUseSuperReg) {
756 UseBuilder.
addReg(ImpUseSuperReg,
777 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
778 int16_t SubIdx = BaseIndices[Idx];
779 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
780 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
781 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
782 unsigned Opcode = AMDGPU::S_MOV_B32;
785 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
786 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
787 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
791 DestSubReg = RI.getSubReg(DestReg, SubIdx);
792 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
793 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
794 Opcode = AMDGPU::S_MOV_B64;
809 assert(FirstMI && LastMI);
817 LastMI->addRegisterKilled(SrcReg, &RI);
823 Register SrcReg,
bool KillSrc,
bool RenamableDest,
824 bool RenamableSrc)
const {
826 unsigned Size = RI.getRegSizeInBits(*RC);
828 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
834 if (((
Size == 16) != (SrcSize == 16))) {
836 assert(ST.useRealTrue16Insts());
838 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
841 if (DestReg == SrcReg) {
847 RC = RI.getPhysRegBaseClass(DestReg);
848 Size = RI.getRegSizeInBits(*RC);
849 SrcRC = RI.getPhysRegBaseClass(SrcReg);
850 SrcSize = RI.getRegSizeInBits(*SrcRC);
854 if (RC == &AMDGPU::VGPR_32RegClass) {
856 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
857 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
858 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
859 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
865 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
866 RC == &AMDGPU::SReg_32RegClass) {
867 if (SrcReg == AMDGPU::SCC) {
874 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
875 if (DestReg == AMDGPU::VCC_LO) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
902 if (DestReg == AMDGPU::VCC) {
920 if (DestReg == AMDGPU::SCC) {
923 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 assert(ST.hasScalarCompareEq64());
941 if (RC == &AMDGPU::AGPR_32RegClass) {
942 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
943 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
949 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
958 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
965 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
966 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
968 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
969 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
970 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
971 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
975 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
988 if (IsAGPRDst || IsAGPRSrc) {
989 if (!DstLow || !SrcLow) {
991 "Cannot use hi16 subreg with an AGPR!");
998 if (ST.useRealTrue16Insts()) {
1004 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1005 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1017 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1018 if (!DstLow || !SrcLow) {
1020 "Cannot use hi16 subreg on VI!");
1043 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1044 if (ST.hasMovB64()) {
1049 if (ST.hasPkMovB32()) {
1065 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1066 if (RI.isSGPRClass(RC)) {
1067 if (!RI.isSGPRClass(SrcRC)) {
1071 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1077 unsigned EltSize = 4;
1078 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1079 if (RI.isAGPRClass(RC)) {
1080 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1081 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1082 else if (RI.hasVGPRs(SrcRC) ||
1083 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1084 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1086 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1087 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1088 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1089 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1090 (RI.isProperlyAlignedRC(*RC) &&
1091 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1093 if (ST.hasMovB64()) {
1094 Opcode = AMDGPU::V_MOV_B64_e32;
1096 }
else if (ST.hasPkMovB32()) {
1097 Opcode = AMDGPU::V_PK_MOV_B32;
1107 std::unique_ptr<RegScavenger> RS;
1108 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1109 RS = std::make_unique<RegScavenger>();
1115 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1116 const bool CanKillSuperReg = KillSrc && !Overlap;
1118 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1121 SubIdx = SubIndices[Idx];
1123 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1124 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1125 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1126 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1128 bool IsFirstSubreg = Idx == 0;
1129 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1131 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1135 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1136 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1182 return &AMDGPU::VGPR_32RegClass;
1195 "Not a VGPR32 reg");
1197 if (
Cond.size() == 1) {
1207 }
else if (
Cond.size() == 2) {
1208 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1210 case SIInstrInfo::SCC_TRUE: {
1221 case SIInstrInfo::SCC_FALSE: {
1232 case SIInstrInfo::VCCNZ: {
1246 case SIInstrInfo::VCCZ: {
1260 case SIInstrInfo::EXECNZ: {
1273 case SIInstrInfo::EXECZ: {
1323 int64_t &ImmVal)
const {
1324 switch (
MI.getOpcode()) {
1325 case AMDGPU::V_MOV_B32_e32:
1326 case AMDGPU::S_MOV_B32:
1327 case AMDGPU::S_MOVK_I32:
1328 case AMDGPU::S_MOV_B64:
1329 case AMDGPU::V_MOV_B64_e32:
1330 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1331 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1332 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1333 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1334 case AMDGPU::V_MOV_B64_PSEUDO:
1335 case AMDGPU::V_MOV_B16_t16_e32: {
1339 return MI.getOperand(0).getReg() == Reg;
1344 case AMDGPU::V_MOV_B16_t16_e64: {
1346 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1348 return MI.getOperand(0).getReg() == Reg;
1353 case AMDGPU::S_BREV_B32:
1354 case AMDGPU::V_BFREV_B32_e32:
1355 case AMDGPU::V_BFREV_B32_e64: {
1359 return MI.getOperand(0).getReg() == Reg;
1364 case AMDGPU::S_NOT_B32:
1365 case AMDGPU::V_NOT_B32_e32:
1366 case AMDGPU::V_NOT_B32_e64: {
1369 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1370 return MI.getOperand(0).getReg() == Reg;
1380std::optional<int64_t>
1385 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1386 return std::nullopt;
1389 if (Def && Def->isMoveImmediate()) {
1395 return std::nullopt;
1400 if (RI.isAGPRClass(DstRC))
1401 return AMDGPU::COPY;
1402 if (RI.getRegSizeInBits(*DstRC) == 16) {
1405 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1407 if (RI.getRegSizeInBits(*DstRC) == 32)
1408 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1409 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1410 return AMDGPU::S_MOV_B64;
1411 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1412 return AMDGPU::V_MOV_B64_PSEUDO;
1413 return AMDGPU::COPY;
1418 bool IsIndirectSrc)
const {
1419 if (IsIndirectSrc) {
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1437 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1439 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1441 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1443 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1445 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1446 if (VecSize <= 1024)
1447 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1465 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1467 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1469 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1471 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1473 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1475 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1477 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1478 if (VecSize <= 1024)
1479 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1494 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1496 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1498 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1500 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1502 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1504 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1506 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1508 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1510 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1511 if (VecSize <= 1024)
1512 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1539 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1541 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1543 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1544 if (VecSize <= 1024)
1545 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1552 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1554 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1556 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1558 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1559 if (VecSize <= 1024)
1560 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1567 bool IsSGPR)
const {
1579 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1586 return AMDGPU::SI_SPILL_S32_SAVE;
1588 return AMDGPU::SI_SPILL_S64_SAVE;
1590 return AMDGPU::SI_SPILL_S96_SAVE;
1592 return AMDGPU::SI_SPILL_S128_SAVE;
1594 return AMDGPU::SI_SPILL_S160_SAVE;
1596 return AMDGPU::SI_SPILL_S192_SAVE;
1598 return AMDGPU::SI_SPILL_S224_SAVE;
1600 return AMDGPU::SI_SPILL_S256_SAVE;
1602 return AMDGPU::SI_SPILL_S288_SAVE;
1604 return AMDGPU::SI_SPILL_S320_SAVE;
1606 return AMDGPU::SI_SPILL_S352_SAVE;
1608 return AMDGPU::SI_SPILL_S384_SAVE;
1610 return AMDGPU::SI_SPILL_S512_SAVE;
1612 return AMDGPU::SI_SPILL_S1024_SAVE;
1621 return AMDGPU::SI_SPILL_V16_SAVE;
1623 return AMDGPU::SI_SPILL_V32_SAVE;
1625 return AMDGPU::SI_SPILL_V64_SAVE;
1627 return AMDGPU::SI_SPILL_V96_SAVE;
1629 return AMDGPU::SI_SPILL_V128_SAVE;
1631 return AMDGPU::SI_SPILL_V160_SAVE;
1633 return AMDGPU::SI_SPILL_V192_SAVE;
1635 return AMDGPU::SI_SPILL_V224_SAVE;
1637 return AMDGPU::SI_SPILL_V256_SAVE;
1639 return AMDGPU::SI_SPILL_V288_SAVE;
1641 return AMDGPU::SI_SPILL_V320_SAVE;
1643 return AMDGPU::SI_SPILL_V352_SAVE;
1645 return AMDGPU::SI_SPILL_V384_SAVE;
1647 return AMDGPU::SI_SPILL_V512_SAVE;
1649 return AMDGPU::SI_SPILL_V1024_SAVE;
1658 return AMDGPU::SI_SPILL_AV32_SAVE;
1660 return AMDGPU::SI_SPILL_AV64_SAVE;
1662 return AMDGPU::SI_SPILL_AV96_SAVE;
1664 return AMDGPU::SI_SPILL_AV128_SAVE;
1666 return AMDGPU::SI_SPILL_AV160_SAVE;
1668 return AMDGPU::SI_SPILL_AV192_SAVE;
1670 return AMDGPU::SI_SPILL_AV224_SAVE;
1672 return AMDGPU::SI_SPILL_AV256_SAVE;
1674 return AMDGPU::SI_SPILL_AV288_SAVE;
1676 return AMDGPU::SI_SPILL_AV320_SAVE;
1678 return AMDGPU::SI_SPILL_AV352_SAVE;
1680 return AMDGPU::SI_SPILL_AV384_SAVE;
1682 return AMDGPU::SI_SPILL_AV512_SAVE;
1684 return AMDGPU::SI_SPILL_AV1024_SAVE;
1691 bool IsVectorSuperClass) {
1696 if (IsVectorSuperClass)
1697 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1699 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1712 if (ST.hasMAIInsts())
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize = RI.getSpillSize(*RC);
1735 if (RI.isSGPRClass(RC)) {
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1757 if (RI.spillSGPRToVGPR())
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V16_RESTORE;
1814 return AMDGPU::SI_SPILL_V32_RESTORE;
1816 return AMDGPU::SI_SPILL_V64_RESTORE;
1818 return AMDGPU::SI_SPILL_V96_RESTORE;
1820 return AMDGPU::SI_SPILL_V128_RESTORE;
1822 return AMDGPU::SI_SPILL_V160_RESTORE;
1824 return AMDGPU::SI_SPILL_V192_RESTORE;
1826 return AMDGPU::SI_SPILL_V224_RESTORE;
1828 return AMDGPU::SI_SPILL_V256_RESTORE;
1830 return AMDGPU::SI_SPILL_V288_RESTORE;
1832 return AMDGPU::SI_SPILL_V320_RESTORE;
1834 return AMDGPU::SI_SPILL_V352_RESTORE;
1836 return AMDGPU::SI_SPILL_V384_RESTORE;
1838 return AMDGPU::SI_SPILL_V512_RESTORE;
1840 return AMDGPU::SI_SPILL_V1024_RESTORE;
1849 return AMDGPU::SI_SPILL_AV32_RESTORE;
1851 return AMDGPU::SI_SPILL_AV64_RESTORE;
1853 return AMDGPU::SI_SPILL_AV96_RESTORE;
1855 return AMDGPU::SI_SPILL_AV128_RESTORE;
1857 return AMDGPU::SI_SPILL_AV160_RESTORE;
1859 return AMDGPU::SI_SPILL_AV192_RESTORE;
1861 return AMDGPU::SI_SPILL_AV224_RESTORE;
1863 return AMDGPU::SI_SPILL_AV256_RESTORE;
1865 return AMDGPU::SI_SPILL_AV288_RESTORE;
1867 return AMDGPU::SI_SPILL_AV320_RESTORE;
1869 return AMDGPU::SI_SPILL_AV352_RESTORE;
1871 return AMDGPU::SI_SPILL_AV384_RESTORE;
1873 return AMDGPU::SI_SPILL_AV512_RESTORE;
1875 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1882 bool IsVectorSuperClass) {
1887 if (IsVectorSuperClass)
1888 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1890 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1896 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1903 if (ST.hasMAIInsts())
1906 assert(!RI.isAGPRClass(RC));
1920 unsigned SpillSize = RI.getSpillSize(*RC);
1927 FrameInfo.getObjectAlign(FrameIndex));
1929 if (RI.isSGPRClass(RC)) {
1931 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1932 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1933 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1938 if (DestReg.
isVirtual() && SpillSize == 4) {
1943 if (RI.spillSGPRToVGPR())
1969 unsigned Quantity)
const {
1971 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1972 while (Quantity > 0) {
1973 unsigned Arg = std::min(Quantity, MaxSNopCount);
1980 auto *MF =
MBB.getParent();
1983 assert(Info->isEntryFunction());
1985 if (
MBB.succ_empty()) {
1986 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1987 if (HasNoTerminator) {
1988 if (Info->returnsVoid()) {
2002 constexpr unsigned DoorbellIDMask = 0x3ff;
2003 constexpr unsigned ECQueueWaveAbort = 0x400;
2008 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2009 MBB.splitAt(
MI,
false);
2013 MBB.addSuccessor(TrapBB);
2023 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2027 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2032 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2033 .
addUse(DoorbellRegMasked)
2034 .
addImm(ECQueueWaveAbort);
2035 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2036 .
addUse(SetWaveAbortBit);
2039 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2050 return MBB.getNextNode();
2054 switch (
MI.getOpcode()) {
2056 if (
MI.isMetaInstruction())
2061 return MI.getOperand(0).getImm() + 1;
2071 switch (
MI.getOpcode()) {
2073 case AMDGPU::S_MOV_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2079 case AMDGPU::S_MOV_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2085 case AMDGPU::S_XOR_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2091 case AMDGPU::S_XOR_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2096 case AMDGPU::S_OR_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_OR_B64));
2101 case AMDGPU::S_OR_B32_term:
2104 MI.setDesc(
get(AMDGPU::S_OR_B32));
2107 case AMDGPU::S_ANDN2_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2113 case AMDGPU::S_ANDN2_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2119 case AMDGPU::S_AND_B64_term:
2122 MI.setDesc(
get(AMDGPU::S_AND_B64));
2125 case AMDGPU::S_AND_B32_term:
2128 MI.setDesc(
get(AMDGPU::S_AND_B32));
2131 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2134 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2137 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2140 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2143 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2144 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2147 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2148 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2150 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2154 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2157 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2160 int64_t Imm =
MI.getOperand(1).getImm();
2162 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2163 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2168 MI.eraseFromParent();
2174 case AMDGPU::V_MOV_B64_PSEUDO: {
2176 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2177 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2185 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2186 MI.setDesc(Mov64Desc);
2191 if (
SrcOp.isImm()) {
2193 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2194 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2218 if (ST.hasPkMovB32() &&
2237 MI.eraseFromParent();
2240 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2244 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2248 if (ST.has64BitLiterals()) {
2249 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2255 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2260 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2261 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2263 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2264 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2269 MI.eraseFromParent();
2272 case AMDGPU::V_SET_INACTIVE_B32: {
2276 .
add(
MI.getOperand(3))
2277 .
add(
MI.getOperand(4))
2278 .
add(
MI.getOperand(1))
2279 .
add(
MI.getOperand(2))
2280 .
add(
MI.getOperand(5));
2281 MI.eraseFromParent();
2284 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2285 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2286 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2287 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2288 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2289 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2290 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2298 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2299 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2300 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2301 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2302 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2303 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2304 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2320 if (RI.hasVGPRs(EltRC)) {
2321 Opc = AMDGPU::V_MOVRELD_B32_e32;
2323 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2324 : AMDGPU::S_MOVRELD_B32;
2329 bool IsUndef =
MI.getOperand(1).isUndef();
2330 unsigned SubReg =
MI.getOperand(3).getImm();
2331 assert(VecReg ==
MI.getOperand(1).getReg());
2336 .
add(
MI.getOperand(2))
2340 const int ImpDefIdx =
2342 const int ImpUseIdx = ImpDefIdx + 1;
2344 MI.eraseFromParent();
2347 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2348 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2349 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2350 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2351 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2361 assert(ST.useVGPRIndexMode());
2363 bool IsUndef =
MI.getOperand(1).isUndef();
2372 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2376 .
add(
MI.getOperand(2))
2380 const int ImpDefIdx =
2382 const int ImpUseIdx = ImpDefIdx + 1;
2389 MI.eraseFromParent();
2392 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2393 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2394 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2395 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2406 assert(ST.useVGPRIndexMode());
2409 bool IsUndef =
MI.getOperand(1).isUndef();
2413 .
add(
MI.getOperand(2))
2426 MI.eraseFromParent();
2429 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2432 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2433 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2452 if (ST.hasGetPCZeroExtension()) {
2456 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2463 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2473 MI.eraseFromParent();
2476 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2486 Op.setOffset(
Op.getOffset() + 4);
2488 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2492 MI.eraseFromParent();
2495 case AMDGPU::ENTER_STRICT_WWM: {
2501 case AMDGPU::ENTER_STRICT_WQM: {
2508 MI.eraseFromParent();
2511 case AMDGPU::EXIT_STRICT_WWM:
2512 case AMDGPU::EXIT_STRICT_WQM: {
2518 case AMDGPU::SI_RETURN: {
2532 MI.eraseFromParent();
2536 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2537 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2538 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2541 case AMDGPU::S_GETPC_B64_pseudo:
2542 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2543 if (ST.hasGetPCZeroExtension()) {
2545 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2554 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2555 assert(ST.hasBF16PackedInsts());
2556 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2567 case AMDGPU::GET_STACK_BASE:
2570 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2577 Register DestReg =
MI.getOperand(0).getReg();
2587 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2588 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2589 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2592 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2595 MI.getNumExplicitOperands());
2613 case AMDGPU::S_MOV_B64:
2614 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2623 if (UsedLanes.
all())
2628 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2629 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2631 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2632 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2634 if (NeedLo && NeedHi)
2638 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2640 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2649 case AMDGPU::S_LOAD_DWORDX16_IMM:
2650 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2663 for (
auto &CandMO :
I->operands()) {
2664 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2672 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2676 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2682 unsigned NewOpcode = -1;
2683 if (SubregSize == 256)
2684 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2685 else if (SubregSize == 128)
2686 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2696 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2701 MI->getOperand(0).setReg(DestReg);
2702 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2706 OffsetMO->
setImm(FinalOffset);
2712 MI->setMemRefs(*MF, NewMMOs);
2725std::pair<MachineInstr*, MachineInstr*>
2727 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2729 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2732 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2733 return std::pair(&
MI,
nullptr);
2744 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2746 if (Dst.isPhysical()) {
2747 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2754 for (
unsigned I = 1;
I <= 2; ++
I) {
2757 if (
SrcOp.isImm()) {
2759 Imm.ashrInPlace(Part * 32);
2760 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2764 if (Src.isPhysical())
2765 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2772 MovDPP.addImm(MO.getImm());
2774 Split[Part] = MovDPP;
2778 if (Dst.isVirtual())
2785 MI.eraseFromParent();
2786 return std::pair(Split[0], Split[1]);
2789std::optional<DestSourcePair>
2791 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2794 return std::nullopt;
2798 AMDGPU::OpName Src0OpName,
2800 AMDGPU::OpName Src1OpName)
const {
2807 "All commutable instructions have both src0 and src1 modifiers");
2809 int Src0ModsVal = Src0Mods->
getImm();
2810 int Src1ModsVal = Src1Mods->
getImm();
2812 Src1Mods->
setImm(Src0ModsVal);
2813 Src0Mods->
setImm(Src1ModsVal);
2822 bool IsKill = RegOp.
isKill();
2824 bool IsUndef = RegOp.
isUndef();
2825 bool IsDebug = RegOp.
isDebug();
2827 if (NonRegOp.
isImm())
2829 else if (NonRegOp.
isFI())
2850 int64_t NonRegVal = NonRegOp1.
getImm();
2853 NonRegOp2.
setImm(NonRegVal);
2860 unsigned OpIdx1)
const {
2865 unsigned Opc =
MI.getOpcode();
2866 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2876 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2879 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2884 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2890 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2905 unsigned Src1Idx)
const {
2906 assert(!NewMI &&
"this should never be used");
2908 unsigned Opc =
MI.getOpcode();
2910 if (CommutedOpcode == -1)
2913 if (Src0Idx > Src1Idx)
2916 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2917 static_cast<int>(Src0Idx) &&
2918 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2919 static_cast<int>(Src1Idx) &&
2920 "inconsistency with findCommutedOpIndices");
2945 Src1, AMDGPU::OpName::src1_modifiers);
2948 AMDGPU::OpName::src1_sel);
2960 unsigned &SrcOpIdx0,
2961 unsigned &SrcOpIdx1)
const {
2966 unsigned &SrcOpIdx0,
2967 unsigned &SrcOpIdx1)
const {
2968 if (!
Desc.isCommutable())
2971 unsigned Opc =
Desc.getOpcode();
2972 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2976 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2980 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2984 int64_t BrOffset)
const {
3001 return MI.getOperand(0).getMBB();
3006 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
3007 MI.getOpcode() == AMDGPU::SI_LOOP)
3019 "new block should be inserted for expanding unconditional branch");
3022 "restore block should be inserted for restoring clobbered registers");
3030 if (ST.useAddPC64Inst()) {
3032 MCCtx.createTempSymbol(
"offset",
true);
3036 MCCtx.createTempSymbol(
"post_addpc",
true);
3037 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3041 Offset->setVariableValue(OffsetExpr);
3045 assert(RS &&
"RegScavenger required for long branching");
3053 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3054 ST.hasVALUReadSGPRHazard();
3055 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3056 if (FlushSGPRWrites)
3064 ApplyHazardWorkarounds();
3067 MCCtx.createTempSymbol(
"post_getpc",
true);
3071 MCCtx.createTempSymbol(
"offset_lo",
true);
3073 MCCtx.createTempSymbol(
"offset_hi",
true);
3076 .
addReg(PCReg, {}, AMDGPU::sub0)
3080 .
addReg(PCReg, {}, AMDGPU::sub1)
3082 ApplyHazardWorkarounds();
3123 if (LongBranchReservedReg) {
3124 RS->enterBasicBlock(
MBB);
3125 Scav = LongBranchReservedReg;
3127 RS->enterBasicBlockEnd(
MBB);
3128 Scav = RS->scavengeRegisterBackwards(
3133 RS->setRegUsed(Scav);
3141 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3158unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3160 case SIInstrInfo::SCC_TRUE:
3161 return AMDGPU::S_CBRANCH_SCC1;
3162 case SIInstrInfo::SCC_FALSE:
3163 return AMDGPU::S_CBRANCH_SCC0;
3164 case SIInstrInfo::VCCNZ:
3165 return AMDGPU::S_CBRANCH_VCCNZ;
3166 case SIInstrInfo::VCCZ:
3167 return AMDGPU::S_CBRANCH_VCCZ;
3168 case SIInstrInfo::EXECNZ:
3169 return AMDGPU::S_CBRANCH_EXECNZ;
3170 case SIInstrInfo::EXECZ:
3171 return AMDGPU::S_CBRANCH_EXECZ;
3177SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3179 case AMDGPU::S_CBRANCH_SCC0:
3181 case AMDGPU::S_CBRANCH_SCC1:
3183 case AMDGPU::S_CBRANCH_VCCNZ:
3185 case AMDGPU::S_CBRANCH_VCCZ:
3187 case AMDGPU::S_CBRANCH_EXECNZ:
3189 case AMDGPU::S_CBRANCH_EXECZ:
3201 bool AllowModify)
const {
3202 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3204 TBB =
I->getOperand(0).getMBB();
3208 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3209 if (Pred == INVALID_BR)
3214 Cond.push_back(
I->getOperand(1));
3218 if (
I ==
MBB.end()) {
3224 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3226 FBB =
I->getOperand(0).getMBB();
3236 bool AllowModify)
const {
3244 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3245 switch (
I->getOpcode()) {
3246 case AMDGPU::S_MOV_B64_term:
3247 case AMDGPU::S_XOR_B64_term:
3248 case AMDGPU::S_OR_B64_term:
3249 case AMDGPU::S_ANDN2_B64_term:
3250 case AMDGPU::S_AND_B64_term:
3251 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3252 case AMDGPU::S_MOV_B32_term:
3253 case AMDGPU::S_XOR_B32_term:
3254 case AMDGPU::S_OR_B32_term:
3255 case AMDGPU::S_ANDN2_B32_term:
3256 case AMDGPU::S_AND_B32_term:
3257 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3260 case AMDGPU::SI_ELSE:
3261 case AMDGPU::SI_KILL_I1_TERMINATOR:
3262 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3279 int *BytesRemoved)
const {
3281 unsigned RemovedSize = 0;
3284 if (
MI.isBranch() ||
MI.isReturn()) {
3286 MI.eraseFromParent();
3292 *BytesRemoved = RemovedSize;
3309 int *BytesAdded)
const {
3310 if (!FBB &&
Cond.empty()) {
3314 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3321 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3333 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3351 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3358 if (
Cond.size() != 2) {
3362 if (
Cond[0].isImm()) {
3373 Register FalseReg,
int &CondCycles,
3374 int &TrueCycles,
int &FalseCycles)
const {
3384 CondCycles = TrueCycles = FalseCycles = NumInsts;
3387 return RI.hasVGPRs(RC) && NumInsts <= 6;
3401 if (NumInsts % 2 == 0)
3404 CondCycles = TrueCycles = FalseCycles = NumInsts;
3405 return RI.isSGPRClass(RC);
3416 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3417 if (Pred == VCCZ || Pred == SCC_FALSE) {
3418 Pred =
static_cast<BranchPredicate
>(-Pred);
3424 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3426 if (DstSize == 32) {
3428 if (Pred == SCC_TRUE) {
3443 if (DstSize == 64 && Pred == SCC_TRUE) {
3453 static const int16_t Sub0_15[] = {
3454 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3455 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3456 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3457 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3460 static const int16_t Sub0_15_64[] = {
3461 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3462 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3463 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3464 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3467 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3469 const int16_t *SubIndices = Sub0_15;
3470 int NElts = DstSize / 32;
3474 if (Pred == SCC_TRUE) {
3476 SelOp = AMDGPU::S_CSELECT_B32;
3477 EltRC = &AMDGPU::SGPR_32RegClass;
3479 SelOp = AMDGPU::S_CSELECT_B64;
3480 EltRC = &AMDGPU::SGPR_64RegClass;
3481 SubIndices = Sub0_15_64;
3487 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3492 for (
int Idx = 0; Idx != NElts; ++Idx) {
3496 unsigned SubIdx = SubIndices[Idx];
3499 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3501 .
addReg(FalseReg, {}, SubIdx)
3502 .addReg(TrueReg, {}, SubIdx);
3505 .
addReg(TrueReg, {}, SubIdx)
3506 .addReg(FalseReg, {}, SubIdx);
3518 switch (
MI.getOpcode()) {
3519 case AMDGPU::V_MOV_B16_t16_e32:
3520 case AMDGPU::V_MOV_B16_t16_e64:
3521 case AMDGPU::V_MOV_B32_e32:
3522 case AMDGPU::V_MOV_B32_e64:
3523 case AMDGPU::V_MOV_B64_PSEUDO:
3524 case AMDGPU::V_MOV_B64_e32:
3525 case AMDGPU::V_MOV_B64_e64:
3526 case AMDGPU::S_MOV_B32:
3527 case AMDGPU::S_MOV_B64:
3528 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3530 case AMDGPU::WWM_COPY:
3531 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3532 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3533 case AMDGPU::V_ACCVGPR_MOV_B32:
3534 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3535 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3543 switch (
MI.getOpcode()) {
3544 case AMDGPU::V_MOV_B16_t16_e32:
3545 case AMDGPU::V_MOV_B16_t16_e64:
3547 case AMDGPU::V_MOV_B32_e32:
3548 case AMDGPU::V_MOV_B32_e64:
3549 case AMDGPU::V_MOV_B64_PSEUDO:
3550 case AMDGPU::V_MOV_B64_e32:
3551 case AMDGPU::V_MOV_B64_e64:
3552 case AMDGPU::S_MOV_B32:
3553 case AMDGPU::S_MOV_B64:
3554 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3556 case AMDGPU::WWM_COPY:
3557 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3558 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3559 case AMDGPU::V_ACCVGPR_MOV_B32:
3560 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3561 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3569 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3570 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3571 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3574 unsigned Opc =
MI.getOpcode();
3576 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3578 MI.removeOperand(Idx);
3584 MI.setDesc(NewDesc);
3590 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3591 Desc.implicit_defs().size();
3593 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3594 MI.removeOperand(
I);
3598 unsigned SubRegIndex) {
3599 switch (SubRegIndex) {
3600 case AMDGPU::NoSubRegister:
3610 case AMDGPU::sub1_lo16:
3612 case AMDGPU::sub1_hi16:
3615 return std::nullopt;
3623 case AMDGPU::V_MAC_F16_e32:
3624 case AMDGPU::V_MAC_F16_e64:
3625 case AMDGPU::V_MAD_F16_e64:
3626 return AMDGPU::V_MADAK_F16;
3627 case AMDGPU::V_MAC_F32_e32:
3628 case AMDGPU::V_MAC_F32_e64:
3629 case AMDGPU::V_MAD_F32_e64:
3630 return AMDGPU::V_MADAK_F32;
3631 case AMDGPU::V_FMAC_F32_e32:
3632 case AMDGPU::V_FMAC_F32_e64:
3633 case AMDGPU::V_FMA_F32_e64:
3634 return AMDGPU::V_FMAAK_F32;
3635 case AMDGPU::V_FMAC_F16_e32:
3636 case AMDGPU::V_FMAC_F16_e64:
3637 case AMDGPU::V_FMAC_F16_t16_e64:
3638 case AMDGPU::V_FMAC_F16_fake16_e64:
3639 case AMDGPU::V_FMAC_F16_t16_e32:
3640 case AMDGPU::V_FMAC_F16_fake16_e32:
3641 case AMDGPU::V_FMA_F16_e64:
3642 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3643 ? AMDGPU::V_FMAAK_F16_t16
3644 : AMDGPU::V_FMAAK_F16_fake16
3645 : AMDGPU::V_FMAAK_F16;
3646 case AMDGPU::V_FMAC_F64_e32:
3647 case AMDGPU::V_FMAC_F64_e64:
3648 case AMDGPU::V_FMA_F64_e64:
3649 return AMDGPU::V_FMAAK_F64;
3657 case AMDGPU::V_MAC_F16_e32:
3658 case AMDGPU::V_MAC_F16_e64:
3659 case AMDGPU::V_MAD_F16_e64:
3660 return AMDGPU::V_MADMK_F16;
3661 case AMDGPU::V_MAC_F32_e32:
3662 case AMDGPU::V_MAC_F32_e64:
3663 case AMDGPU::V_MAD_F32_e64:
3664 return AMDGPU::V_MADMK_F32;
3665 case AMDGPU::V_FMAC_F32_e32:
3666 case AMDGPU::V_FMAC_F32_e64:
3667 case AMDGPU::V_FMA_F32_e64:
3668 return AMDGPU::V_FMAMK_F32;
3669 case AMDGPU::V_FMAC_F16_e32:
3670 case AMDGPU::V_FMAC_F16_e64:
3671 case AMDGPU::V_FMAC_F16_t16_e64:
3672 case AMDGPU::V_FMAC_F16_fake16_e64:
3673 case AMDGPU::V_FMAC_F16_t16_e32:
3674 case AMDGPU::V_FMAC_F16_fake16_e32:
3675 case AMDGPU::V_FMA_F16_e64:
3676 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3677 ? AMDGPU::V_FMAMK_F16_t16
3678 : AMDGPU::V_FMAMK_F16_fake16
3679 : AMDGPU::V_FMAMK_F16;
3680 case AMDGPU::V_FMAC_F64_e32:
3681 case AMDGPU::V_FMAC_F64_e64:
3682 case AMDGPU::V_FMA_F64_e64:
3683 return AMDGPU::V_FMAMK_F64;
3697 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3700 if (
Opc == AMDGPU::COPY) {
3701 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3708 if (HasMultipleUses) {
3711 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3714 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3722 if (ImmDefSize == 32 &&
3727 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3728 RI.getSubRegIdxSize(UseSubReg) == 16;
3731 if (RI.hasVGPRs(DstRC))
3734 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3740 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3747 for (
unsigned MovOp :
3748 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3749 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3757 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3761 if (MovDstPhysReg) {
3765 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3772 if (MovDstPhysReg) {
3773 if (!MovDstRC->
contains(MovDstPhysReg))
3789 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3797 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3801 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3803 UseMI.getOperand(0).setReg(MovDstPhysReg);
3808 UseMI.setDesc(NewMCID);
3809 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3810 UseMI.addImplicitDefUseOperands(*MF);
3814 if (HasMultipleUses)
3817 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3818 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3819 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3820 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3821 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3822 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3823 Opc == AMDGPU::V_FMAC_F64_e64) {
3832 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3843 auto CopyRegOperandToNarrowerRC =
3846 if (!
MI.getOperand(OpNo).isReg())
3850 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3853 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3854 get(AMDGPU::COPY), Tmp)
3856 MI.getOperand(OpNo).setReg(Tmp);
3857 MI.getOperand(OpNo).setIsKill();
3864 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3865 if (!RegSrc->
isReg())
3868 ST.getConstantBusLimit(
Opc) < 2)
3883 if (Def && Def->isMoveImmediate() &&
3898 unsigned SrcSubReg = RegSrc->
getSubReg();
3903 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3904 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3905 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3906 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3907 UseMI.untieRegOperand(
3908 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3915 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3916 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3920 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3921 UseMI.getOperand(0).getReg())
3923 UseMI.getOperand(0).setReg(Tmp);
3924 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3925 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3930 DefMI.eraseFromParent();
3937 if (ST.getConstantBusLimit(
Opc) < 2) {
3940 bool Src0Inlined =
false;
3941 if (Src0->
isReg()) {
3946 if (Def && Def->isMoveImmediate() &&
3951 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3952 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3958 if (Src1->
isReg() && !Src0Inlined) {
3961 if (Def && Def->isMoveImmediate() &&
3965 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3978 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3979 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3980 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3981 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3982 UseMI.untieRegOperand(
3983 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3985 const std::optional<int64_t> SubRegImm =
3995 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3996 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
4000 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
4001 UseMI.getOperand(0).getReg())
4003 UseMI.getOperand(0).setReg(Tmp);
4004 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
4005 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
4015 DefMI.eraseFromParent();
4027 if (BaseOps1.
size() != BaseOps2.
size())
4029 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4030 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4038 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4039 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4040 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4042 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4045bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4048 int64_t Offset0, Offset1;
4051 bool Offset0IsScalable, Offset1IsScalable;
4065 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4066 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4073 "MIa must load from or modify a memory location");
4075 "MIb must load from or modify a memory location");
4097 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4104 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4114 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4128 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4139 if (
Reg.isPhysical())
4143 Imm = Def->getOperand(1).getImm();
4163 unsigned NumOps =
MI.getNumOperands();
4166 if (
Op.isReg() &&
Op.isKill())
4174 case AMDGPU::V_MAC_F16_e32:
4175 case AMDGPU::V_MAC_F16_e64:
4176 return AMDGPU::V_MAD_F16_e64;
4177 case AMDGPU::V_MAC_F32_e32:
4178 case AMDGPU::V_MAC_F32_e64:
4179 return AMDGPU::V_MAD_F32_e64;
4180 case AMDGPU::V_MAC_LEGACY_F32_e32:
4181 case AMDGPU::V_MAC_LEGACY_F32_e64:
4182 return AMDGPU::V_MAD_LEGACY_F32_e64;
4183 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4184 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4185 return AMDGPU::V_FMA_LEGACY_F32_e64;
4186 case AMDGPU::V_FMAC_F16_e32:
4187 case AMDGPU::V_FMAC_F16_e64:
4188 case AMDGPU::V_FMAC_F16_t16_e64:
4189 case AMDGPU::V_FMAC_F16_fake16_e64:
4190 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4191 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4192 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4193 : AMDGPU::V_FMA_F16_gfx9_e64;
4194 case AMDGPU::V_FMAC_F32_e32:
4195 case AMDGPU::V_FMAC_F32_e64:
4196 return AMDGPU::V_FMA_F32_e64;
4197 case AMDGPU::V_FMAC_F64_e32:
4198 case AMDGPU::V_FMAC_F64_e64:
4199 return AMDGPU::V_FMA_F64_e64;
4219 if (
MI.isBundle()) {
4222 if (
MI.getBundleSize() != 1)
4224 CandidateMI =
MI.getNextNode();
4228 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4232 if (
MI.isBundle()) {
4237 MI.untieRegOperand(MO.getOperandNo());
4245 if (Def.isEarlyClobber() && Def.isReg() &&
4250 auto UpdateDefIndex = [&](
LiveRange &LR) {
4251 auto *S = LR.find(OldIndex);
4252 if (S != LR.end() && S->start == OldIndex) {
4253 assert(S->valno && S->valno->def == OldIndex);
4254 S->start = NewIndex;
4255 S->valno->def = NewIndex;
4259 for (
auto &SR : LI.subranges())
4265 if (U.RemoveMIUse) {
4268 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4272 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4273 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4274 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4275 U.RemoveMIUse->removeOperand(
I);
4280 if (
MI.isBundle()) {
4284 if (MO.isReg() && MO.getReg() == DefReg) {
4285 assert(MO.getSubReg() == 0 &&
4286 "tied sub-registers in bundles currently not supported");
4287 MI.removeOperand(MO.getOperandNo());
4304 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4305 MIOp.setIsUndef(
true);
4306 MIOp.setReg(DummyReg);
4310 if (
MI.isBundle()) {
4314 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4315 MIOp.setIsUndef(
true);
4316 MIOp.setReg(DummyReg);
4329 return MI.isBundle() ? &
MI : NewMI;
4334 ThreeAddressUpdates &U)
const {
4336 unsigned Opc =
MI.getOpcode();
4340 if (NewMFMAOpc != -1) {
4343 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4344 MIB.
add(
MI.getOperand(
I));
4352 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4357 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4358 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4359 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4363 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4364 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4365 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4366 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4367 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4368 bool Src0Literal =
false;
4373 case AMDGPU::V_MAC_F16_e64:
4374 case AMDGPU::V_FMAC_F16_e64:
4375 case AMDGPU::V_FMAC_F16_t16_e64:
4376 case AMDGPU::V_FMAC_F16_fake16_e64:
4377 case AMDGPU::V_MAC_F32_e64:
4378 case AMDGPU::V_MAC_LEGACY_F32_e64:
4379 case AMDGPU::V_FMAC_F32_e64:
4380 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4381 case AMDGPU::V_FMAC_F64_e64:
4383 case AMDGPU::V_MAC_F16_e32:
4384 case AMDGPU::V_FMAC_F16_e32:
4385 case AMDGPU::V_MAC_F32_e32:
4386 case AMDGPU::V_MAC_LEGACY_F32_e32:
4387 case AMDGPU::V_FMAC_F32_e32:
4388 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4389 case AMDGPU::V_FMAC_F64_e32: {
4390 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4391 AMDGPU::OpName::src0);
4392 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4403 MachineInstrBuilder MIB;
4406 const MachineOperand *Src0Mods =
4409 const MachineOperand *Src1Mods =
4412 const MachineOperand *Src2Mods =
4418 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4419 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4421 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4423 MachineInstr *
DefMI;
4459 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4475 if (Src0Literal && !ST.hasVOP3Literal())
4503 switch (
MI.getOpcode()) {
4504 case AMDGPU::S_SET_GPR_IDX_ON:
4505 case AMDGPU::S_SET_GPR_IDX_MODE:
4506 case AMDGPU::S_SET_GPR_IDX_OFF:
4524 if (
MI.isTerminator() ||
MI.isPosition())
4528 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4531 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4537 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4538 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4539 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4540 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4541 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4546 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4547 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4548 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4562 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4567 if (
MI.memoperands_empty())
4572 unsigned AS = Memop->getAddrSpace();
4573 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4574 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4575 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4576 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4591 if (
MI.memoperands_empty())
4600 unsigned AS = Memop->getAddrSpace();
4617 if (ST.isTgSplitEnabled())
4622 if (
MI.memoperands_empty())
4627 unsigned AS = Memop->getAddrSpace();
4643 unsigned Opcode =
MI.getOpcode();
4658 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4659 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4660 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4661 Opcode == AMDGPU::S_SETHALT)
4664 if (
MI.isCall() ||
MI.isInlineAsm())
4680 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4681 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4682 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4683 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4691 if (
MI.isMetaInstruction())
4695 if (
MI.isCopyLike()) {
4696 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4700 return MI.readsRegister(AMDGPU::EXEC, &RI);
4711 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4715 switch (Imm.getBitWidth()) {
4721 ST.hasInv2PiInlineImm());
4724 ST.hasInv2PiInlineImm());
4726 return ST.has16BitInsts() &&
4728 ST.hasInv2PiInlineImm());
4735 APInt IntImm = Imm.bitcastToAPInt();
4737 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4745 return ST.has16BitInsts() &&
4748 return ST.has16BitInsts() &&
4758 switch (OperandType) {
4768 int32_t Trunc =
static_cast<int32_t
>(Imm);
4810 int16_t Trunc =
static_cast<int16_t
>(Imm);
4811 return ST.has16BitInsts() &&
4820 int16_t Trunc =
static_cast<int16_t
>(Imm);
4821 return ST.has16BitInsts() &&
4872 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4878 return ST.hasVOP3Literal();
4882 int64_t ImmVal)
const {
4885 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4886 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4887 AMDGPU::OpName::src2))
4889 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4901 "unexpected imm-like operand kind");
4914 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4932 AMDGPU::OpName
OpName)
const {
4934 return Mods && Mods->
getImm();
4947 switch (
MI.getOpcode()) {
4948 default:
return false;
4950 case AMDGPU::V_ADDC_U32_e64:
4951 case AMDGPU::V_SUBB_U32_e64:
4952 case AMDGPU::V_SUBBREV_U32_e64: {
4955 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4960 case AMDGPU::V_MAC_F16_e64:
4961 case AMDGPU::V_MAC_F32_e64:
4962 case AMDGPU::V_MAC_LEGACY_F32_e64:
4963 case AMDGPU::V_FMAC_F16_e64:
4964 case AMDGPU::V_FMAC_F16_t16_e64:
4965 case AMDGPU::V_FMAC_F16_fake16_e64:
4966 case AMDGPU::V_FMAC_F32_e64:
4967 case AMDGPU::V_FMAC_F64_e64:
4968 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4969 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4974 case AMDGPU::V_CNDMASK_B32_e64:
4980 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
5010 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
5019 unsigned Op32)
const {
5033 Inst32.
add(
MI.getOperand(
I));
5037 int Idx =
MI.getNumExplicitDefs();
5039 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5044 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5066 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5074 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5077 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5078 AMDGPU::SReg_64RegClass.contains(Reg);
5106 switch (MO.getReg()) {
5108 case AMDGPU::VCC_LO:
5109 case AMDGPU::VCC_HI:
5111 case AMDGPU::FLAT_SCR:
5124 switch (
MI.getOpcode()) {
5125 case AMDGPU::V_READLANE_B32:
5126 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5127 case AMDGPU::V_WRITELANE_B32:
5128 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5135 if (
MI.isPreISelOpcode() ||
5136 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5154 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5165 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5166 ErrInfo =
"illegal copy from vector register to SGPR";
5184 if (!MRI.
isSSA() &&
MI.isCopy())
5185 return verifyCopy(
MI, MRI, ErrInfo);
5187 if (SIInstrInfo::isGenericOpcode(Opcode))
5190 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5191 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5192 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5194 if (Src0Idx == -1) {
5196 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5197 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5198 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5199 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5204 if (!
Desc.isVariadic() &&
5205 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5206 ErrInfo =
"Instruction has wrong number of operands.";
5210 if (
MI.isInlineAsm()) {
5223 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5224 ErrInfo =
"inlineasm operand has incorrect register class.";
5232 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5233 ErrInfo =
"missing memory operand from image instruction.";
5238 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5241 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5242 "all fp values to integers.";
5247 int16_t RegClass = getOpRegClassID(OpInfo);
5249 switch (OpInfo.OperandType) {
5251 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5252 ErrInfo =
"Illegal immediate value for operand.";
5287 ErrInfo =
"Illegal immediate value for operand.";
5295 ErrInfo =
"Expected inline constant for operand.";
5309 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5310 ErrInfo =
"Expected immediate, but got non-immediate";
5319 if (OpInfo.isGenericType())
5334 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5335 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5337 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5339 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5340 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5347 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5348 ErrInfo =
"Subtarget requires even aligned vector registers";
5353 if (RegClass != -1) {
5354 if (Reg.isVirtual())
5359 ErrInfo =
"Operand has incorrect register class.";
5367 if (!ST.hasSDWA()) {
5368 ErrInfo =
"SDWA is not supported on this target";
5372 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5373 AMDGPU::OpName::dst_sel}) {
5377 int64_t Imm = MO->
getImm();
5379 ErrInfo =
"Invalid SDWA selection";
5384 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5386 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5391 if (!ST.hasSDWAScalar()) {
5393 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5394 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5401 "Only reg allowed as operands in SDWA instructions on GFX9+";
5407 if (!ST.hasSDWAOmod()) {
5410 if (OMod !=
nullptr &&
5412 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5417 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5418 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5419 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5420 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5423 unsigned Mods = Src0ModsMO->
getImm();
5426 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5432 if (
isVOPC(BasicOpcode)) {
5433 if (!ST.hasSDWASdst() && DstIdx != -1) {
5436 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5437 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5440 }
else if (!ST.hasSDWAOutModsVOPC()) {
5443 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5444 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5450 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5451 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5458 if (DstUnused && DstUnused->isImm() &&
5461 if (!Dst.isReg() || !Dst.isTied()) {
5462 ErrInfo =
"Dst register should have tied register";
5467 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5470 "Dst register should be tied to implicit use of preserved register";
5474 ErrInfo =
"Dst register should use same physical register as preserved";
5481 if (
isImage(Opcode) && !
MI.mayStore()) {
5493 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5501 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5505 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5506 if (RegCount > DstSize) {
5507 ErrInfo =
"Image instruction returns too many registers for dst "
5516 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5517 unsigned ConstantBusCount = 0;
5518 bool UsesLiteral =
false;
5521 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5525 LiteralVal = &
MI.getOperand(ImmIdx);
5534 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5545 }
else if (!MO.
isFI()) {
5552 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5562 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5563 return !RI.regsOverlap(SGPRUsed, SGPR);
5572 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5573 Opcode != AMDGPU::V_WRITELANE_B32) {
5574 ErrInfo =
"VOP* instruction violates constant bus restriction";
5578 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5579 ErrInfo =
"VOP3 instruction uses literal";
5586 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5587 unsigned SGPRCount = 0;
5590 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5598 if (MO.
getReg() != SGPRUsed)
5603 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5604 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5611 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5612 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5619 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5629 ErrInfo =
"ABS not allowed in VOP3B instructions";
5642 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5649 if (
Desc.isBranch()) {
5651 ErrInfo =
"invalid branch target for SOPK instruction";
5658 ErrInfo =
"invalid immediate for SOPK instruction";
5663 ErrInfo =
"invalid immediate for SOPK instruction";
5670 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5671 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5672 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5673 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5674 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5675 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5677 const unsigned StaticNumOps =
5678 Desc.getNumOperands() +
Desc.implicit_uses().size();
5679 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5685 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5686 ErrInfo =
"missing implicit register operands";
5692 if (!Dst->isUse()) {
5693 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5698 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5699 UseOpIdx != StaticNumOps + 1) {
5700 ErrInfo =
"movrel implicit operands should be tied";
5707 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5709 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5710 ErrInfo =
"src0 should be subreg of implicit vector use";
5718 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5719 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5725 if (
MI.mayStore() &&
5730 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5731 ErrInfo =
"scalar stores must use m0 as offset register";
5737 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5739 if (
Offset->getImm() != 0) {
5740 ErrInfo =
"subtarget does not support offsets in flat instructions";
5745 if (
isDS(
MI) && !ST.hasGDS()) {
5747 if (GDSOp && GDSOp->
getImm() != 0) {
5748 ErrInfo =
"GDS is not supported on this subtarget";
5756 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5757 AMDGPU::OpName::vaddr0);
5758 AMDGPU::OpName RSrcOpName =
5759 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5760 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5768 ErrInfo =
"dim is out of range";
5773 if (ST.hasR128A16()) {
5775 IsA16 = R128A16->
getImm() != 0;
5776 }
else if (ST.hasA16()) {
5778 IsA16 = A16->
getImm() != 0;
5781 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5783 unsigned AddrWords =
5786 unsigned VAddrWords;
5788 VAddrWords = RsrcIdx - VAddr0Idx;
5789 if (ST.hasPartialNSAEncoding() &&
5791 unsigned LastVAddrIdx = RsrcIdx - 1;
5792 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5800 if (VAddrWords != AddrWords) {
5802 <<
" but got " << VAddrWords <<
"\n");
5803 ErrInfo =
"bad vaddr size";
5813 unsigned DC = DppCt->
getImm();
5814 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5815 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5816 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5817 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5818 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5819 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5820 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5821 ErrInfo =
"Invalid dpp_ctrl value";
5824 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5825 !ST.hasDPPWavefrontShifts()) {
5826 ErrInfo =
"Invalid dpp_ctrl value: "
5827 "wavefront shifts are not supported on GFX10+";
5830 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5831 !ST.hasDPPBroadcasts()) {
5832 ErrInfo =
"Invalid dpp_ctrl value: "
5833 "broadcasts are not supported on GFX10+";
5836 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5838 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5839 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5840 !ST.hasGFX90AInsts()) {
5841 ErrInfo =
"Invalid dpp_ctrl value: "
5842 "row_newbroadcast/row_share is not supported before "
5846 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5847 ErrInfo =
"Invalid dpp_ctrl value: "
5848 "row_share and row_xmask are not supported before GFX10";
5853 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5856 ErrInfo =
"Invalid dpp_ctrl value: "
5857 "DP ALU dpp only support row_newbcast";
5864 AMDGPU::OpName DataName =
5865 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5871 if (ST.hasGFX90AInsts()) {
5872 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5873 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5874 ErrInfo =
"Invalid register class: "
5875 "vdata and vdst should be both VGPR or AGPR";
5878 if (
Data && Data2 &&
5879 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5880 ErrInfo =
"Invalid register class: "
5881 "both data operands should be VGPR or AGPR";
5885 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5886 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5887 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5888 ErrInfo =
"Invalid register class: "
5889 "agpr loads and stores not supported on this GPU";
5895 if (ST.needsAlignedVGPRs()) {
5896 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5901 if (Reg.isPhysical())
5902 return !(RI.getHWRegIndex(Reg) & 1);
5904 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5905 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5908 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5909 Opcode == AMDGPU::DS_GWS_BARRIER) {
5911 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5912 ErrInfo =
"Subtarget requires even aligned vector registers "
5913 "for DS_GWS instructions";
5919 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5920 ErrInfo =
"Subtarget requires even aligned vector registers "
5921 "for vaddr operand of image instructions";
5927 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5929 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5930 ErrInfo =
"Invalid register class: "
5931 "v_accvgpr_write with an SGPR is not supported on this GPU";
5936 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5939 ErrInfo =
"pseudo expects only physical SGPRs";
5946 if (!ST.hasScaleOffset()) {
5947 ErrInfo =
"Subtarget does not support offset scaling";
5951 ErrInfo =
"Instruction does not support offset scaling";
5960 for (
unsigned I = 0;
I < 3; ++
I) {
5966 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5967 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5969 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5970 &AMDGPU::SReg_64RegClass) ||
5971 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5972 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5981 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5983 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5985 : AMDGPU::V_MOV_B32_e32;
5995 default:
return AMDGPU::INSTRUCTION_LIST_END;
5996 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5997 case AMDGPU::COPY:
return AMDGPU::COPY;
5998 case AMDGPU::PHI:
return AMDGPU::PHI;
5999 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
6000 case AMDGPU::WQM:
return AMDGPU::WQM;
6001 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
6002 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
6003 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
6004 case AMDGPU::S_ADD_I32:
6005 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
6006 case AMDGPU::S_ADDC_U32:
6007 return AMDGPU::V_ADDC_U32_e32;
6008 case AMDGPU::S_SUB_I32:
6009 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
6012 case AMDGPU::S_ADD_U32:
6013 return AMDGPU::V_ADD_CO_U32_e32;
6014 case AMDGPU::S_SUB_U32:
6015 return AMDGPU::V_SUB_CO_U32_e32;
6016 case AMDGPU::S_ADD_U64_PSEUDO:
6017 return AMDGPU::V_ADD_U64_PSEUDO;
6018 case AMDGPU::S_SUB_U64_PSEUDO:
6019 return AMDGPU::V_SUB_U64_PSEUDO;
6020 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
6021 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
6022 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
6023 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
6024 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
6025 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
6026 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
6027 case AMDGPU::S_XNOR_B32:
6028 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
6029 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
6030 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
6031 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
6032 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6033 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6034 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6035 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6036 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6037 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6038 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6039 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6040 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6041 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6042 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6043 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6044 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6045 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6046 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6047 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6048 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6049 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6050 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6051 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6052 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6053 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6054 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6055 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6056 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6057 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6058 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6059 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6060 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6061 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6062 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6063 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6064 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6065 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6066 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6067 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6068 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6069 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6070 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6071 case AMDGPU::S_CVT_F32_F16:
6072 case AMDGPU::S_CVT_HI_F32_F16:
6073 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6074 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6075 case AMDGPU::S_CVT_F16_F32:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6077 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6078 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6079 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6080 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6081 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6082 case AMDGPU::S_CEIL_F16:
6083 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6084 : AMDGPU::V_CEIL_F16_fake16_e64;
6085 case AMDGPU::S_FLOOR_F16:
6086 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6087 : AMDGPU::V_FLOOR_F16_fake16_e64;
6088 case AMDGPU::S_TRUNC_F16:
6089 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6090 : AMDGPU::V_TRUNC_F16_fake16_e64;
6091 case AMDGPU::S_RNDNE_F16:
6092 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6093 : AMDGPU::V_RNDNE_F16_fake16_e64;
6094 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6095 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6096 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6097 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6098 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6099 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6100 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6101 case AMDGPU::S_ADD_F16:
6102 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6103 : AMDGPU::V_ADD_F16_fake16_e64;
6104 case AMDGPU::S_SUB_F16:
6105 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6106 : AMDGPU::V_SUB_F16_fake16_e64;
6107 case AMDGPU::S_MIN_F16:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6109 : AMDGPU::V_MIN_F16_fake16_e64;
6110 case AMDGPU::S_MAX_F16:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6112 : AMDGPU::V_MAX_F16_fake16_e64;
6113 case AMDGPU::S_MINIMUM_F16:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6115 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6116 case AMDGPU::S_MAXIMUM_F16:
6117 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6118 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6119 case AMDGPU::S_MUL_F16:
6120 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6121 : AMDGPU::V_MUL_F16_fake16_e64;
6122 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6123 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6124 case AMDGPU::S_FMAC_F16:
6125 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6126 : AMDGPU::V_FMAC_F16_fake16_e64;
6127 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6128 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6129 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6130 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6131 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6132 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6133 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6134 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6135 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6136 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6137 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6138 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6139 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6140 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6141 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6142 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6143 case AMDGPU::S_CMP_LT_F16:
6144 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6145 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6146 case AMDGPU::S_CMP_EQ_F16:
6147 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6148 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6149 case AMDGPU::S_CMP_LE_F16:
6150 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6151 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6152 case AMDGPU::S_CMP_GT_F16:
6153 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6154 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6155 case AMDGPU::S_CMP_LG_F16:
6156 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6157 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6158 case AMDGPU::S_CMP_GE_F16:
6159 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6160 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6161 case AMDGPU::S_CMP_O_F16:
6162 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6163 : AMDGPU::V_CMP_O_F16_fake16_e64;
6164 case AMDGPU::S_CMP_U_F16:
6165 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6166 : AMDGPU::V_CMP_U_F16_fake16_e64;
6167 case AMDGPU::S_CMP_NGE_F16:
6168 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6169 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6170 case AMDGPU::S_CMP_NLG_F16:
6171 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6172 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6173 case AMDGPU::S_CMP_NGT_F16:
6174 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6175 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6176 case AMDGPU::S_CMP_NLE_F16:
6177 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6178 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6179 case AMDGPU::S_CMP_NEQ_F16:
6180 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6181 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6182 case AMDGPU::S_CMP_NLT_F16:
6183 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6184 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6185 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6186 case AMDGPU::V_S_EXP_F16_e64:
6187 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6188 : AMDGPU::V_EXP_F16_fake16_e64;
6189 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6190 case AMDGPU::V_S_LOG_F16_e64:
6191 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6192 : AMDGPU::V_LOG_F16_fake16_e64;
6193 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6194 case AMDGPU::V_S_RCP_F16_e64:
6195 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6196 : AMDGPU::V_RCP_F16_fake16_e64;
6197 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6198 case AMDGPU::V_S_RSQ_F16_e64:
6199 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6200 : AMDGPU::V_RSQ_F16_fake16_e64;
6201 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6202 case AMDGPU::V_S_SQRT_F16_e64:
6203 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6204 : AMDGPU::V_SQRT_F16_fake16_e64;
6207 "Unexpected scalar opcode without corresponding vector one!");
6256 "Not a whole wave func");
6259 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6260 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6267 unsigned OpNo)
const {
6269 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6270 Desc.operands()[OpNo].RegClass == -1) {
6273 if (Reg.isVirtual()) {
6277 return RI.getPhysRegBaseClass(Reg);
6280 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6281 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6289 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6291 unsigned Size = RI.getRegSizeInBits(*RC);
6292 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6293 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6294 : AMDGPU::V_MOV_B32_e32;
6296 Opcode = AMDGPU::COPY;
6297 else if (RI.isSGPRClass(RC))
6298 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6312 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6318 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6329 if (SubIdx == AMDGPU::sub0)
6331 if (SubIdx == AMDGPU::sub1)
6343void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6359 if (Reg.isPhysical())
6369 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6372 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6379 unsigned Opc =
MI.getOpcode();
6385 constexpr AMDGPU::OpName OpNames[] = {
6386 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6389 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6390 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6400 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6401 if (IsAGPR && !ST.hasMAIInsts())
6407 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6408 const int DataIdx = AMDGPU::getNamedOperandIdx(
6409 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6410 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6411 MI.getOperand(DataIdx).isReg() &&
6412 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6414 if ((
int)
OpIdx == DataIdx) {
6415 if (VDstIdx != -1 &&
6416 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6419 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6420 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6421 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6426 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6427 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6428 RI.isSGPRReg(MRI, MO.
getReg()))
6431 if (ST.hasFlatScratchHiInB64InstHazard() &&
6438 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6459 constexpr unsigned NumOps = 3;
6460 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6461 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6462 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6463 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6468 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6471 MO = &
MI.getOperand(SrcIdx);
6474 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6478 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6482 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6486 return !OpSel && !OpSelHi;
6495 int64_t RegClass = getOpRegClassID(OpInfo);
6497 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6506 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6507 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6511 if (!LiteralLimit--)
6521 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6529 if (--ConstantBusLimit <= 0)
6541 if (!LiteralLimit--)
6543 if (--ConstantBusLimit <= 0)
6549 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6553 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6555 !
Op.isIdenticalTo(*MO))
6565 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6579 bool Is64BitOp = Is64BitFPOp ||
6586 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6595 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6613 bool IsGFX950Only = ST.hasGFX950Insts();
6614 bool IsGFX940Only = ST.hasGFX940Insts();
6616 if (!IsGFX950Only && !IsGFX940Only)
6634 unsigned Opcode =
MI.getOpcode();
6636 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6637 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6638 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6639 case AMDGPU::V_MQSAD_U32_U8_e64:
6640 case AMDGPU::V_PK_ADD_F16:
6641 case AMDGPU::V_PK_ADD_F32:
6642 case AMDGPU::V_PK_ADD_I16:
6643 case AMDGPU::V_PK_ADD_U16:
6644 case AMDGPU::V_PK_ASHRREV_I16:
6645 case AMDGPU::V_PK_FMA_F16:
6646 case AMDGPU::V_PK_FMA_F32:
6647 case AMDGPU::V_PK_FMAC_F16_e32:
6648 case AMDGPU::V_PK_FMAC_F16_e64:
6649 case AMDGPU::V_PK_LSHLREV_B16:
6650 case AMDGPU::V_PK_LSHRREV_B16:
6651 case AMDGPU::V_PK_MAD_I16:
6652 case AMDGPU::V_PK_MAD_U16:
6653 case AMDGPU::V_PK_MAX_F16:
6654 case AMDGPU::V_PK_MAX_I16:
6655 case AMDGPU::V_PK_MAX_U16:
6656 case AMDGPU::V_PK_MIN_F16:
6657 case AMDGPU::V_PK_MIN_I16:
6658 case AMDGPU::V_PK_MIN_U16:
6659 case AMDGPU::V_PK_MOV_B32:
6660 case AMDGPU::V_PK_MUL_F16:
6661 case AMDGPU::V_PK_MUL_F32:
6662 case AMDGPU::V_PK_MUL_LO_U16:
6663 case AMDGPU::V_PK_SUB_I16:
6664 case AMDGPU::V_PK_SUB_U16:
6665 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6674 unsigned Opc =
MI.getOpcode();
6677 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6680 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6686 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6687 RI.isSGPRReg(MRI, Src0.
getReg()))
6693 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6695 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6701 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6712 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6713 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6714 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6726 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6727 RI.isVGPR(MRI, Src1.
getReg())) {
6740 if (HasImplicitSGPR || !
MI.isCommutable()) {
6757 if (CommutedOpc == -1) {
6762 MI.setDesc(
get(CommutedOpc));
6766 bool Src0Kill = Src0.
isKill();
6770 else if (Src1.
isReg()) {
6785 unsigned Opc =
MI.getOpcode();
6788 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6789 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6790 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6793 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6794 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6795 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6796 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6797 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6798 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6799 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6809 if (VOP3Idx[2] != -1) {
6821 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6822 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6824 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6826 SGPRsUsed.
insert(SGPRReg);
6830 for (
int Idx : VOP3Idx) {
6839 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6851 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6858 if (ConstantBusLimit > 0) {
6870 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6871 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6877 for (
unsigned I = 0;
I < 3; ++
I) {
6890 SRC = RI.getCommonSubClass(SRC, DstRC);
6893 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6895 if (RI.hasAGPRs(VRC)) {
6896 VRC = RI.getEquivalentVGPRClass(VRC);
6899 get(TargetOpcode::COPY), NewSrcReg)
6906 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6912 for (
unsigned i = 0; i < SubRegs; ++i) {
6915 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6916 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6922 get(AMDGPU::REG_SEQUENCE), DstReg);
6923 for (
unsigned i = 0; i < SubRegs; ++i) {
6925 MIB.
addImm(RI.getSubRegFromChannel(i));
6938 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6940 SBase->setReg(SGPR);
6943 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6951 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6952 if (OldSAddrIdx < 0)
6965 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6968 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6969 if (NewVAddrIdx < 0)
6972 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6976 if (OldVAddrIdx >= 0) {
6990 if (OldVAddrIdx == NewVAddrIdx) {
7001 assert(OldSAddrIdx == NewVAddrIdx);
7003 if (OldVAddrIdx >= 0) {
7004 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
7005 AMDGPU::OpName::vdst_in);
7009 if (NewVDstIn != -1) {
7010 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
7016 if (NewVDstIn != -1) {
7017 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7058 unsigned OpSubReg =
Op.getSubReg();
7061 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7077 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7080 bool ImpDef = Def->isImplicitDef();
7081 while (!ImpDef && Def && Def->isCopy()) {
7082 if (Def->getOperand(1).getReg().isPhysical())
7085 ImpDef = Def && Def->isImplicitDef();
7087 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7106 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7112 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7113 unsigned NumSubRegs =
RegSize / 32;
7114 Register VScalarOp = ScalarOp->getReg();
7116 if (NumSubRegs == 1) {
7119 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7124 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7130 CondReg = NewCondReg;
7140 ScalarOp->setReg(CurReg);
7141 ScalarOp->setIsKill();
7145 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7146 "Unhandled register size");
7148 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7155 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7156 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7159 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7160 .
addReg(VScalarOp, VScalarOpUndef,
7161 TRI->getSubRegFromChannel(Idx + 1));
7168 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7175 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7178 if (NumSubRegs <= 2)
7179 Cmp.addReg(VScalarOp);
7181 Cmp.addReg(VScalarOp, VScalarOpUndef,
7182 TRI->getSubRegFromChannel(Idx, 2));
7186 CondReg = NewCondReg;
7196 const auto *SScalarOpRC =
7202 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7203 unsigned Channel = 0;
7204 for (
Register Piece : ReadlanePieces) {
7205 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7209 ScalarOp->setReg(SScalarOp);
7210 ScalarOp->setIsKill();
7246 if (!Begin.isValid())
7248 if (!End.isValid()) {
7254 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7262 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7263 std::numeric_limits<unsigned>::max()) !=
7281 for (
auto I = Begin;
I != AfterMI;
I++) {
7282 for (
auto &MO :
I->all_uses())
7308 MBB.addSuccessor(LoopBB);
7318 for (
auto &Succ : RemainderBB->
successors()) {
7342static std::tuple<unsigned, unsigned>
7350 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7351 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7358 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7375 .
addImm(AMDGPU::sub0_sub1)
7381 return std::tuple(RsrcPtr, NewSRsrc);
7418 if (
MI.getOpcode() == AMDGPU::PHI) {
7420 assert(!RI.isSGPRClass(VRC));
7423 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7425 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7441 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7444 if (RI.hasVGPRs(DstRC)) {
7448 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7450 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7468 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7473 if (DstRC != Src0RC) {
7482 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7484 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7490 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7491 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7492 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7493 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7494 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7495 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7496 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7498 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7511 ? AMDGPU::OpName::rsrc
7512 : AMDGPU::OpName::srsrc;
7517 AMDGPU::OpName SampOpName =
7518 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7527 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7533 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7534 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7539 while (Start->getOpcode() != FrameSetupOpcode)
7542 while (End->getOpcode() != FrameDestroyOpcode)
7546 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7547 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7555 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7559 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7569 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7570 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7571 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7572 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7574 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7581 bool isSoffsetLegal =
true;
7583 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7584 if (SoffsetIdx != -1) {
7588 isSoffsetLegal =
false;
7592 bool isRsrcLegal =
true;
7594 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7595 if (RsrcIdx != -1) {
7597 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7598 isRsrcLegal =
false;
7602 if (isRsrcLegal && isSoffsetLegal)
7630 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7634 unsigned RsrcPtr, NewSRsrc;
7641 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7642 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7648 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7649 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7662 }
else if (!VAddr && ST.hasAddr64()) {
7666 "FIXME: Need to emit flat atomics here");
7668 unsigned RsrcPtr, NewSRsrc;
7694 MIB.
addImm(CPol->getImm());
7699 MIB.
addImm(TFE->getImm());
7719 MI.removeFromParent();
7724 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7725 .addImm(AMDGPU::sub0)
7726 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7727 .addImm(AMDGPU::sub1);
7730 if (!isSoffsetLegal) {
7741 if (!isSoffsetLegal) {
7753 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7754 if (RsrcIdx != -1) {
7755 DeferredList.insert(
MI);
7760 return DeferredList.contains(
MI);
7770 if (!ST.useRealTrue16Insts())
7773 unsigned Opcode =
MI.getOpcode();
7777 OpIdx >=
get(Opcode).getNumOperands() ||
7778 get(Opcode).operands()[
OpIdx].RegClass == -1)
7782 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7786 if (!RI.isVGPRClass(CurrRC))
7789 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7791 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7792 Op.setSubReg(AMDGPU::lo16);
7793 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7803 Op.setReg(NewDstReg);
7815 while (!Worklist.
empty()) {
7829 "Deferred MachineInstr are not supposed to re-populate worklist");
7849 case AMDGPU::S_ADD_I32:
7850 case AMDGPU::S_SUB_I32: {
7854 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7862 case AMDGPU::S_MUL_U64:
7863 if (ST.hasVectorMulU64()) {
7864 NewOpcode = AMDGPU::V_MUL_U64_e64;
7868 splitScalarSMulU64(Worklist, Inst, MDT);
7872 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7873 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7876 splitScalarSMulPseudo(Worklist, Inst, MDT);
7880 case AMDGPU::S_AND_B64:
7881 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7885 case AMDGPU::S_OR_B64:
7886 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7890 case AMDGPU::S_XOR_B64:
7891 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7895 case AMDGPU::S_NAND_B64:
7896 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7900 case AMDGPU::S_NOR_B64:
7901 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7905 case AMDGPU::S_XNOR_B64:
7906 if (ST.hasDLInsts())
7907 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7909 splitScalar64BitXnor(Worklist, Inst, MDT);
7913 case AMDGPU::S_ANDN2_B64:
7914 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7918 case AMDGPU::S_ORN2_B64:
7919 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7923 case AMDGPU::S_BREV_B64:
7924 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7928 case AMDGPU::S_NOT_B64:
7929 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7933 case AMDGPU::S_BCNT1_I32_B64:
7934 splitScalar64BitBCNT(Worklist, Inst);
7938 case AMDGPU::S_BFE_I64:
7939 splitScalar64BitBFE(Worklist, Inst);
7943 case AMDGPU::S_FLBIT_I32_B64:
7944 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7947 case AMDGPU::S_FF1_I32_B64:
7948 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7952 case AMDGPU::S_LSHL_B32:
7953 if (ST.hasOnlyRevVALUShifts()) {
7954 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7958 case AMDGPU::S_ASHR_I32:
7959 if (ST.hasOnlyRevVALUShifts()) {
7960 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7964 case AMDGPU::S_LSHR_B32:
7965 if (ST.hasOnlyRevVALUShifts()) {
7966 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7970 case AMDGPU::S_LSHL_B64:
7971 if (ST.hasOnlyRevVALUShifts()) {
7973 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7974 : AMDGPU::V_LSHLREV_B64_e64;
7978 case AMDGPU::S_ASHR_I64:
7979 if (ST.hasOnlyRevVALUShifts()) {
7980 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7984 case AMDGPU::S_LSHR_B64:
7985 if (ST.hasOnlyRevVALUShifts()) {
7986 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7991 case AMDGPU::S_ABS_I32:
7992 lowerScalarAbs(Worklist, Inst);
7996 case AMDGPU::S_ABSDIFF_I32:
7997 lowerScalarAbsDiff(Worklist, Inst);
8001 case AMDGPU::S_CBRANCH_SCC0:
8002 case AMDGPU::S_CBRANCH_SCC1: {
8005 bool IsSCC = CondReg == AMDGPU::SCC;
8013 case AMDGPU::S_BFE_U64:
8014 case AMDGPU::S_BFM_B64:
8017 case AMDGPU::S_PACK_LL_B32_B16:
8018 case AMDGPU::S_PACK_LH_B32_B16:
8019 case AMDGPU::S_PACK_HL_B32_B16:
8020 case AMDGPU::S_PACK_HH_B32_B16:
8021 movePackToVALU(Worklist, MRI, Inst);
8025 case AMDGPU::S_XNOR_B32:
8026 lowerScalarXnor(Worklist, Inst);
8030 case AMDGPU::S_NAND_B32:
8031 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8035 case AMDGPU::S_NOR_B32:
8036 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8040 case AMDGPU::S_ANDN2_B32:
8041 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8045 case AMDGPU::S_ORN2_B32:
8046 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8054 case AMDGPU::S_ADD_CO_PSEUDO:
8055 case AMDGPU::S_SUB_CO_PSEUDO: {
8056 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8057 ? AMDGPU::V_ADDC_U32_e64
8058 : AMDGPU::V_SUBB_U32_e64;
8059 const auto *CarryRC = RI.getWaveMaskRegClass();
8081 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8085 case AMDGPU::S_UADDO_PSEUDO:
8086 case AMDGPU::S_USUBO_PSEUDO: {
8092 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8093 ? AMDGPU::V_ADD_CO_U32_e64
8094 : AMDGPU::V_SUB_CO_U32_e64;
8106 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8110 case AMDGPU::S_LSHL1_ADD_U32:
8111 case AMDGPU::S_LSHL2_ADD_U32:
8112 case AMDGPU::S_LSHL3_ADD_U32:
8113 case AMDGPU::S_LSHL4_ADD_U32: {
8117 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8118 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8119 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8133 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8137 case AMDGPU::S_CSELECT_B32:
8138 case AMDGPU::S_CSELECT_B64:
8139 lowerSelect(Worklist, Inst, MDT);
8142 case AMDGPU::S_CMP_EQ_I32:
8143 case AMDGPU::S_CMP_LG_I32:
8144 case AMDGPU::S_CMP_GT_I32:
8145 case AMDGPU::S_CMP_GE_I32:
8146 case AMDGPU::S_CMP_LT_I32:
8147 case AMDGPU::S_CMP_LE_I32:
8148 case AMDGPU::S_CMP_EQ_U32:
8149 case AMDGPU::S_CMP_LG_U32:
8150 case AMDGPU::S_CMP_GT_U32:
8151 case AMDGPU::S_CMP_GE_U32:
8152 case AMDGPU::S_CMP_LT_U32:
8153 case AMDGPU::S_CMP_LE_U32:
8154 case AMDGPU::S_CMP_EQ_U64:
8155 case AMDGPU::S_CMP_LG_U64:
8156 case AMDGPU::S_CMP_LT_F32:
8157 case AMDGPU::S_CMP_EQ_F32:
8158 case AMDGPU::S_CMP_LE_F32:
8159 case AMDGPU::S_CMP_GT_F32:
8160 case AMDGPU::S_CMP_LG_F32:
8161 case AMDGPU::S_CMP_GE_F32:
8162 case AMDGPU::S_CMP_O_F32:
8163 case AMDGPU::S_CMP_U_F32:
8164 case AMDGPU::S_CMP_NGE_F32:
8165 case AMDGPU::S_CMP_NLG_F32:
8166 case AMDGPU::S_CMP_NGT_F32:
8167 case AMDGPU::S_CMP_NLE_F32:
8168 case AMDGPU::S_CMP_NEQ_F32:
8169 case AMDGPU::S_CMP_NLT_F32: {
8174 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8188 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8192 case AMDGPU::S_CMP_LT_F16:
8193 case AMDGPU::S_CMP_EQ_F16:
8194 case AMDGPU::S_CMP_LE_F16:
8195 case AMDGPU::S_CMP_GT_F16:
8196 case AMDGPU::S_CMP_LG_F16:
8197 case AMDGPU::S_CMP_GE_F16:
8198 case AMDGPU::S_CMP_O_F16:
8199 case AMDGPU::S_CMP_U_F16:
8200 case AMDGPU::S_CMP_NGE_F16:
8201 case AMDGPU::S_CMP_NLG_F16:
8202 case AMDGPU::S_CMP_NGT_F16:
8203 case AMDGPU::S_CMP_NLE_F16:
8204 case AMDGPU::S_CMP_NEQ_F16:
8205 case AMDGPU::S_CMP_NLT_F16: {
8228 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8232 case AMDGPU::S_CVT_HI_F32_F16: {
8235 if (ST.useRealTrue16Insts()) {
8240 .
addReg(TmpReg, {}, AMDGPU::hi16)
8256 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8260 case AMDGPU::S_MINIMUM_F32:
8261 case AMDGPU::S_MAXIMUM_F32: {
8273 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8277 case AMDGPU::S_MINIMUM_F16:
8278 case AMDGPU::S_MAXIMUM_F16: {
8280 ? &AMDGPU::VGPR_16RegClass
8281 : &AMDGPU::VGPR_32RegClass);
8293 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8297 case AMDGPU::V_S_EXP_F16_e64:
8298 case AMDGPU::V_S_LOG_F16_e64:
8299 case AMDGPU::V_S_RCP_F16_e64:
8300 case AMDGPU::V_S_RSQ_F16_e64:
8301 case AMDGPU::V_S_SQRT_F16_e64: {
8303 ? &AMDGPU::VGPR_16RegClass
8304 : &AMDGPU::VGPR_32RegClass);
8316 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8322 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8330 if (NewOpcode == Opcode) {
8340 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8354 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8361 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8392 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8396 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8402 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8409 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8411 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8416 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8424 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8434 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8435 AMDGPU::OpName::src0_modifiers) >= 0)
8439 NewInstr->addOperand(Src);
8442 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8445 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8447 NewInstr.addImm(
Size);
8448 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8452 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8457 "Scalar BFE is only implemented for constant width and offset");
8465 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8466 AMDGPU::OpName::src1_modifiers) >= 0)
8468 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8470 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8471 AMDGPU::OpName::src2_modifiers) >= 0)
8473 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8475 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8477 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8479 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8485 NewInstr->addOperand(
Op);
8492 if (
Op.getReg() == AMDGPU::SCC) {
8494 if (
Op.isDef() && !
Op.isDead())
8495 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8497 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8502 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8503 Register DstReg = NewInstr->getOperand(0).getReg();
8518 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8522std::pair<bool, MachineBasicBlock *>
8525 if (ST.hasAddNoCarryInsts()) {
8537 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8539 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8540 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8551 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8552 return std::pair(
true, NewBB);
8555 return std::pair(
false,
nullptr);
8572 bool IsSCC = (CondReg == AMDGPU::SCC);
8586 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8591 bool CopyFound =
false;
8592 for (MachineInstr &CandI :
8595 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8597 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8599 .
addReg(CandI.getOperand(1).getReg());
8611 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8620 MachineInstr *NewInst;
8621 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8622 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8637 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8652 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8653 : AMDGPU::V_SUB_CO_U32_e32;
8664 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8681 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8682 : AMDGPU::V_SUB_CO_U32_e32;
8695 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8709 if (ST.hasDLInsts()) {
8719 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8725 bool Src0IsSGPR = Src0.
isReg() &&
8727 bool Src1IsSGPR = Src1.
isReg() &&
8741 }
else if (Src1IsSGPR) {
8759 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8765 unsigned Opcode)
const {
8789 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8794 unsigned Opcode)
const {
8818 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8833 const MCInstrDesc &InstDesc =
get(Opcode);
8834 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8836 &AMDGPU::SGPR_32RegClass;
8838 const TargetRegisterClass *Src0SubRC =
8839 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8842 AMDGPU::sub0, Src0SubRC);
8845 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8846 const TargetRegisterClass *NewDestSubRC =
8847 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8850 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8853 AMDGPU::sub1, Src0SubRC);
8856 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8870 Worklist.
insert(&LoHalf);
8871 Worklist.
insert(&HiHalf);
8877 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
8900 const TargetRegisterClass *Src0SubRC =
8901 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8902 if (RI.isSGPRClass(Src0SubRC))
8903 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8904 const TargetRegisterClass *Src1SubRC =
8905 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8906 if (RI.isSGPRClass(Src1SubRC))
8907 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8911 MachineOperand Op0L =
8913 MachineOperand Op1L =
8915 MachineOperand Op0H =
8917 MachineOperand Op1H =
8936 MachineInstr *Op1L_Op0H =
8942 MachineInstr *Op1H_Op0L =
8948 MachineInstr *Carry =
8953 MachineInstr *LoHalf =
8963 MachineInstr *HiHalf =
8986 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9009 const TargetRegisterClass *Src0SubRC =
9010 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9011 if (RI.isSGPRClass(Src0SubRC))
9012 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9013 const TargetRegisterClass *Src1SubRC =
9014 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9015 if (RI.isSGPRClass(Src1SubRC))
9016 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9020 MachineOperand Op0L =
9022 MachineOperand Op1L =
9026 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9027 ? AMDGPU::V_MUL_HI_U32_e64
9028 : AMDGPU::V_MUL_HI_I32_e64;
9029 MachineInstr *HiHalf =
9032 MachineInstr *LoHalf =
9051 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9067 const MCInstrDesc &InstDesc =
get(Opcode);
9068 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9070 &AMDGPU::SGPR_32RegClass;
9072 const TargetRegisterClass *Src0SubRC =
9073 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9074 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9076 &AMDGPU::SGPR_32RegClass;
9078 const TargetRegisterClass *Src1SubRC =
9079 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9082 AMDGPU::sub0, Src0SubRC);
9084 AMDGPU::sub0, Src1SubRC);
9086 AMDGPU::sub1, Src0SubRC);
9088 AMDGPU::sub1, Src1SubRC);
9091 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9092 const TargetRegisterClass *NewDestSubRC =
9093 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9096 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9101 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9114 Worklist.
insert(&LoHalf);
9115 Worklist.
insert(&HiHalf);
9118 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9138 MachineOperand* Op0;
9139 MachineOperand* Op1;
9141 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9174 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9175 const TargetRegisterClass *SrcRC = Src.isReg() ?
9177 &AMDGPU::SGPR_32RegClass;
9182 const TargetRegisterClass *SrcSubRC =
9183 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9186 AMDGPU::sub0, SrcSubRC);
9188 AMDGPU::sub1, SrcSubRC);
9198 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9217 Offset == 0 &&
"Not implemented");
9240 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9250 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9253 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9259 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9278 const MCInstrDesc &InstDesc =
get(Opcode);
9280 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9281 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9282 : AMDGPU::V_ADD_CO_U32_e32;
9284 const TargetRegisterClass *SrcRC =
9285 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9286 const TargetRegisterClass *SrcSubRC =
9287 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9289 MachineOperand SrcRegSub0 =
9291 MachineOperand SrcRegSub1 =
9304 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9310 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9314 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9317void SIInstrInfo::addUsersToMoveToVALUWorklist(
9321 MachineInstr &
UseMI = *MO.getParent();
9325 switch (
UseMI.getOpcode()) {
9328 case AMDGPU::SOFT_WQM:
9329 case AMDGPU::STRICT_WWM:
9330 case AMDGPU::STRICT_WQM:
9331 case AMDGPU::REG_SEQUENCE:
9333 case AMDGPU::INSERT_SUBREG:
9336 OpNo = MO.getOperandNo();
9343 if (!RI.hasVectorRegisters(OpRC))
9360 if (ST.useRealTrue16Insts()) {
9362 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9365 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9371 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9374 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9383 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9385 case AMDGPU::S_PACK_LL_B32_B16:
9387 .addReg(SrcReg0, {},
9388 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9389 .addImm(AMDGPU::lo16)
9390 .addReg(SrcReg1, {},
9391 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9392 .addImm(AMDGPU::hi16);
9394 case AMDGPU::S_PACK_LH_B32_B16:
9396 .addReg(SrcReg0, {},
9397 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9398 .addImm(AMDGPU::lo16)
9399 .addReg(SrcReg1, {}, AMDGPU::hi16)
9400 .addImm(AMDGPU::hi16);
9402 case AMDGPU::S_PACK_HL_B32_B16:
9403 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9404 .addImm(AMDGPU::lo16)
9405 .addReg(SrcReg1, {},
9406 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9407 .addImm(AMDGPU::hi16);
9409 case AMDGPU::S_PACK_HH_B32_B16:
9410 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9411 .addImm(AMDGPU::lo16)
9412 .addReg(SrcReg1, {}, AMDGPU::hi16)
9413 .addImm(AMDGPU::hi16);
9421 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9426 case AMDGPU::S_PACK_LL_B32_B16: {
9445 case AMDGPU::S_PACK_LH_B32_B16: {
9455 case AMDGPU::S_PACK_HL_B32_B16: {
9466 case AMDGPU::S_PACK_HH_B32_B16: {
9486 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9495 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9496 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9497 SmallVector<MachineInstr *, 4> CopyToDelete;
9500 for (MachineInstr &
MI :
9504 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9507 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9508 Register DestReg =
MI.getOperand(0).getReg();
9515 MI.getOperand(SCCIdx).setReg(NewCond);
9521 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9524 for (
auto &Copy : CopyToDelete)
9525 Copy->eraseFromParent();
9533void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9539 for (MachineInstr &
MI :
9542 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9544 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9553 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9561 case AMDGPU::REG_SEQUENCE:
9562 case AMDGPU::INSERT_SUBREG:
9564 case AMDGPU::SOFT_WQM:
9565 case AMDGPU::STRICT_WWM:
9566 case AMDGPU::STRICT_WQM: {
9568 if (RI.isAGPRClass(SrcRC)) {
9569 if (RI.isAGPRClass(NewDstRC))
9574 case AMDGPU::REG_SEQUENCE:
9575 case AMDGPU::INSERT_SUBREG:
9576 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9579 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9585 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9588 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9602 int OpIndices[3])
const {
9603 const MCInstrDesc &
Desc =
MI.getDesc();
9619 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9621 for (
unsigned i = 0; i < 3; ++i) {
9622 int Idx = OpIndices[i];
9626 const MachineOperand &MO =
MI.getOperand(Idx);
9632 const TargetRegisterClass *OpRC =
9633 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9634 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9641 if (RI.isSGPRClass(RegRC))
9659 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9660 SGPRReg = UsedSGPRs[0];
9663 if (!SGPRReg && UsedSGPRs[1]) {
9664 if (UsedSGPRs[1] == UsedSGPRs[2])
9665 SGPRReg = UsedSGPRs[1];
9672 AMDGPU::OpName OperandName)
const {
9673 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9676 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9680 return &
MI.getOperand(Idx);
9694 if (ST.isAmdHsaOS()) {
9697 RsrcDataFormat |= (1ULL << 56);
9702 RsrcDataFormat |= (2ULL << 59);
9705 return RsrcDataFormat;
9715 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9720 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9727 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9733 unsigned Opc =
MI.getOpcode();
9739 return get(
Opc).mayLoad() &&
9746 if (!Addr || !Addr->
isFI())
9755 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9757 return MI.getOperand(VDataIdx).getReg();
9767 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9769 return MI.getOperand(DataIdx).getReg();
9806 while (++
I != E &&
I->isInsideBundle()) {
9807 assert(!
I->isBundle() &&
"No nested bundle!");
9815 unsigned Opc =
MI.getOpcode();
9817 unsigned DescSize =
Desc.getSize();
9822 unsigned Size = DescSize;
9826 if (
MI.isBranch() && ST.hasOffset3fBug())
9837 bool HasLiteral =
false;
9838 unsigned LiteralSize = 4;
9839 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9844 if (ST.has64BitLiterals()) {
9845 switch (OpInfo.OperandType) {
9868 return HasLiteral ? DescSize + LiteralSize : DescSize;
9873 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9877 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9878 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9882 case TargetOpcode::BUNDLE:
9884 case TargetOpcode::INLINEASM:
9885 case TargetOpcode::INLINEASM_BR: {
9887 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9891 if (
MI.isMetaInstruction())
9895 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9898 unsigned LoInstOpcode = D16Info->LoOp;
9900 DescSize =
Desc.getSize();
9904 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9907 DescSize =
Desc.getSize();
9918 if (
MI.memoperands_empty())
9930 static const std::pair<int, const char *> TargetIndices[] = {
9969std::pair<unsigned, unsigned>
9976 static const std::pair<unsigned, const char *> TargetFlags[] = {
9994 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10010 return AMDGPU::WWM_COPY;
10012 return AMDGPU::COPY;
10029 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10033 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10034 return IsLRSplitInst;
10047 bool IsNullOrVectorRegister =
true;
10051 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10054 return IsNullOrVectorRegister &&
10056 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10057 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10065 if (ST.hasAddNoCarryInsts())
10081 if (ST.hasAddNoCarryInsts())
10085 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10087 : RS.scavengeRegisterBackwards(
10088 *RI.getBoolRC(),
I,
false,
10101 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10102 case AMDGPU::SI_KILL_I1_TERMINATOR:
10111 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10112 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10113 case AMDGPU::SI_KILL_I1_PSEUDO:
10114 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10126 const unsigned OffsetBits =
10128 return (1 << OffsetBits) - 1;
10132 if (!ST.isWave32())
10135 if (
MI.isInlineAsm())
10138 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10141 for (
auto &
Op :
MI.implicit_operands()) {
10142 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10143 Op.setReg(AMDGPU::VCC_LO);
10152 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10156 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10157 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10173 if (Imm > MaxImm) {
10174 if (Imm <= MaxImm + 64) {
10176 Overflow = Imm - MaxImm;
10195 if (Overflow > 0) {
10203 if (ST.hasRestrictedSOffset())
10208 SOffset = Overflow;
10246 if (!ST.hasFlatInstOffsets())
10254 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10266std::pair<int64_t, int64_t>
10269 int64_t RemainderOffset = COffsetVal;
10270 int64_t ImmField = 0;
10275 if (AllowNegative) {
10277 int64_t
D = 1LL << NumBits;
10278 RemainderOffset = (COffsetVal /
D) *
D;
10279 ImmField = COffsetVal - RemainderOffset;
10281 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10283 (ImmField % 4) != 0) {
10285 RemainderOffset += ImmField % 4;
10286 ImmField -= ImmField % 4;
10288 }
else if (COffsetVal >= 0) {
10290 RemainderOffset = COffsetVal - ImmField;
10294 assert(RemainderOffset + ImmField == COffsetVal);
10295 return {ImmField, RemainderOffset};
10299 if (ST.hasNegativeScratchOffsetBug() &&
10307 switch (ST.getGeneration()) {
10336 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10337 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10338 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10339 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10340 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10341 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10342 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10343 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10350#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10351 case OPCODE##_dpp: \
10352 case OPCODE##_e32: \
10353 case OPCODE##_e64: \
10354 case OPCODE##_e64_dpp: \
10355 case OPCODE##_sdwa:
10369 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10370 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10371 case AMDGPU::V_FMA_F16_gfx9_e64:
10372 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10373 case AMDGPU::V_INTERP_P2_F16:
10374 case AMDGPU::V_MAD_F16_e64:
10375 case AMDGPU::V_MAD_U16_e64:
10376 case AMDGPU::V_MAD_I16_e64:
10385 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10399 switch (ST.getGeneration()) {
10412 if (
isMAI(Opcode)) {
10420 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10423 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10430 if (ST.hasGFX90AInsts()) {
10431 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10432 if (ST.hasGFX940Insts())
10434 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10436 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10438 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10444 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10463 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10464 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10465 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10477 switch (
MI.getOpcode()) {
10479 case AMDGPU::REG_SEQUENCE:
10483 case AMDGPU::INSERT_SUBREG:
10484 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10501 if (!
P.Reg.isVirtual())
10506 while (
auto *
MI = DefInst) {
10508 switch (
MI->getOpcode()) {
10510 case AMDGPU::V_MOV_B32_e32: {
10511 auto &Op1 =
MI->getOperand(1);
10540 auto *DefBB =
DefMI.getParent();
10544 if (
UseMI.getParent() != DefBB)
10547 const int MaxInstScan = 20;
10551 auto E =
UseMI.getIterator();
10552 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10553 if (
I->isDebugInstr())
10556 if (++NumInst > MaxInstScan)
10559 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10572 auto *DefBB =
DefMI.getParent();
10574 const int MaxUseScan = 10;
10578 auto &UseInst = *
Use.getParent();
10581 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10584 if (++NumUse > MaxUseScan)
10591 const int MaxInstScan = 20;
10595 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10598 if (
I->isDebugInstr())
10601 if (++NumInst > MaxInstScan)
10614 if (Reg == VReg && --NumUse == 0)
10616 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10625 auto Cur =
MBB.begin();
10626 if (Cur !=
MBB.end())
10628 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10631 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10640 if (InsPt !=
MBB.end() &&
10641 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10642 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10643 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10644 InsPt->definesRegister(Src,
nullptr)) {
10648 .
addReg(Src, {}, SrcSubReg)
10673 if (isFullCopyInstr(
MI)) {
10674 Register DstReg =
MI.getOperand(0).getReg();
10675 Register SrcReg =
MI.getOperand(1).getReg();
10697 unsigned *PredCost)
const {
10698 if (
MI.isBundle()) {
10701 unsigned Lat = 0,
Count = 0;
10702 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10704 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10706 return Lat +
Count - 1;
10709 return SchedModel.computeInstrLatency(&
MI);
10716 return *CallAddrOp;
10723 unsigned Opcode =
MI.getOpcode();
10725 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10728 :
MI.getOperand(1).getReg();
10732 unsigned SrcAS = SrcTy.getAddressSpace();
10735 ST.hasGloballyAddressableScratch()
10743 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10744 return HandleAddrSpaceCast(
MI);
10747 auto IID = GI->getIntrinsicID();
10754 case Intrinsic::amdgcn_addrspacecast_nonnull:
10755 return HandleAddrSpaceCast(
MI);
10756 case Intrinsic::amdgcn_if:
10757 case Intrinsic::amdgcn_else:
10771 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10772 Opcode == AMDGPU::G_SEXTLOAD) {
10773 if (
MI.memoperands_empty())
10777 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10778 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10786 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10787 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10788 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10797 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10798 return Formatter.get();
10806 unsigned opcode =
MI.getOpcode();
10807 if (opcode == AMDGPU::V_READLANE_B32 ||
10808 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10809 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10812 if (isCopyInstr(
MI)) {
10816 RI.getPhysRegBaseClass(srcOp.
getReg());
10824 if (
MI.isPreISelOpcode())
10839 if (
MI.memoperands_empty())
10843 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10844 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10859 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10861 if (!
SrcOp.isReg())
10865 if (!Reg || !
SrcOp.readsReg())
10871 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10898 F,
"ds_ordered_count unsupported for this calling conv"));
10912 Register &SrcReg2, int64_t &CmpMask,
10913 int64_t &CmpValue)
const {
10914 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10917 switch (
MI.getOpcode()) {
10920 case AMDGPU::S_CMP_EQ_U32:
10921 case AMDGPU::S_CMP_EQ_I32:
10922 case AMDGPU::S_CMP_LG_U32:
10923 case AMDGPU::S_CMP_LG_I32:
10924 case AMDGPU::S_CMP_LT_U32:
10925 case AMDGPU::S_CMP_LT_I32:
10926 case AMDGPU::S_CMP_GT_U32:
10927 case AMDGPU::S_CMP_GT_I32:
10928 case AMDGPU::S_CMP_LE_U32:
10929 case AMDGPU::S_CMP_LE_I32:
10930 case AMDGPU::S_CMP_GE_U32:
10931 case AMDGPU::S_CMP_GE_I32:
10932 case AMDGPU::S_CMP_EQ_U64:
10933 case AMDGPU::S_CMP_LG_U64:
10934 SrcReg =
MI.getOperand(0).getReg();
10935 if (
MI.getOperand(1).isReg()) {
10936 if (
MI.getOperand(1).getSubReg())
10938 SrcReg2 =
MI.getOperand(1).getReg();
10940 }
else if (
MI.getOperand(1).isImm()) {
10942 CmpValue =
MI.getOperand(1).getImm();
10948 case AMDGPU::S_CMPK_EQ_U32:
10949 case AMDGPU::S_CMPK_EQ_I32:
10950 case AMDGPU::S_CMPK_LG_U32:
10951 case AMDGPU::S_CMPK_LG_I32:
10952 case AMDGPU::S_CMPK_LT_U32:
10953 case AMDGPU::S_CMPK_LT_I32:
10954 case AMDGPU::S_CMPK_GT_U32:
10955 case AMDGPU::S_CMPK_GT_I32:
10956 case AMDGPU::S_CMPK_LE_U32:
10957 case AMDGPU::S_CMPK_LE_I32:
10958 case AMDGPU::S_CMPK_GE_U32:
10959 case AMDGPU::S_CMPK_GE_I32:
10960 SrcReg =
MI.getOperand(0).getReg();
10962 CmpValue =
MI.getOperand(1).getImm();
10972 if (S->isLiveIn(AMDGPU::SCC))
10981bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10984 bool SCCIsDead =
false;
10987 constexpr unsigned ScanLimit = 12;
10988 unsigned Count = 0;
10989 for (MachineInstr &
MI :
10991 if (++
Count > ScanLimit)
10993 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10994 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10995 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10996 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10997 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11002 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11015 for (MachineInstr *
MI : InvertInstr) {
11016 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11017 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11019 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11020 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11021 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11022 ? AMDGPU::S_CBRANCH_SCC1
11023 : AMDGPU::S_CBRANCH_SCC0));
11036 bool NeedInversion)
const {
11037 MachineInstr *KillsSCC =
nullptr;
11042 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11044 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11047 if (NeedInversion && !invertSCCUse(SCCRedefine))
11049 if (MachineOperand *SccDef =
11051 SccDef->setIsDead(
false);
11059 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11060 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11062 bool Op1IsNonZeroImm =
11063 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11064 bool Op2IsZeroImm =
11065 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11066 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11072 unsigned &NewDefOpc) {
11075 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11076 Def.getOpcode() != AMDGPU::S_ADD_U32)
11082 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11088 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11090 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11093 NewDefOpc = AMDGPU::S_ADD_U32;
11095 NeedInversion = !NeedInversion;
11100 Register SrcReg2, int64_t CmpMask,
11109 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11110 this](
bool NeedInversion) ->
bool {
11134 unsigned NewDefOpc = Def->getOpcode();
11140 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11143 if (NewDefOpc != Def->getOpcode())
11144 Def->setDesc(
get(NewDefOpc));
11153 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11160 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11168 optimizeSCC(
Select, Def,
false);
11175 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11176 this](int64_t ExpectedValue,
unsigned SrcSize,
11177 bool IsReversible,
bool IsSigned) ->
bool {
11205 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11206 Def->getOpcode() != AMDGPU::S_AND_B64)
11210 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11221 SrcOp = &Def->getOperand(2);
11222 else if (isMask(&Def->getOperand(2)))
11223 SrcOp = &Def->getOperand(1);
11231 if (IsSigned && BitNo == SrcSize - 1)
11234 ExpectedValue <<= BitNo;
11236 bool IsReversedCC =
false;
11237 if (CmpValue != ExpectedValue) {
11240 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11245 Register DefReg = Def->getOperand(0).getReg();
11249 if (!optimizeSCC(Def, &CmpInstr,
false))
11260 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11261 : AMDGPU::S_BITCMP1_B32
11262 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11263 : AMDGPU::S_BITCMP1_B64;
11268 Def->eraseFromParent();
11276 case AMDGPU::S_CMP_EQ_U32:
11277 case AMDGPU::S_CMP_EQ_I32:
11278 case AMDGPU::S_CMPK_EQ_U32:
11279 case AMDGPU::S_CMPK_EQ_I32:
11280 return optimizeCmpAnd(1, 32,
true,
false) ||
11281 optimizeCmpSelect(
true);
11282 case AMDGPU::S_CMP_GE_U32:
11283 case AMDGPU::S_CMPK_GE_U32:
11284 return optimizeCmpAnd(1, 32,
false,
false);
11285 case AMDGPU::S_CMP_GE_I32:
11286 case AMDGPU::S_CMPK_GE_I32:
11287 return optimizeCmpAnd(1, 32,
false,
true);
11288 case AMDGPU::S_CMP_EQ_U64:
11289 return optimizeCmpAnd(1, 64,
true,
false);
11290 case AMDGPU::S_CMP_LG_U32:
11291 case AMDGPU::S_CMP_LG_I32:
11292 case AMDGPU::S_CMPK_LG_U32:
11293 case AMDGPU::S_CMPK_LG_I32:
11294 return optimizeCmpAnd(0, 32,
true,
false) ||
11295 optimizeCmpSelect(
false);
11296 case AMDGPU::S_CMP_GT_U32:
11297 case AMDGPU::S_CMPK_GT_U32:
11298 return optimizeCmpAnd(0, 32,
false,
false);
11299 case AMDGPU::S_CMP_GT_I32:
11300 case AMDGPU::S_CMPK_GT_I32:
11301 return optimizeCmpAnd(0, 32,
false,
true);
11302 case AMDGPU::S_CMP_LG_U64:
11303 return optimizeCmpAnd(0, 64,
true,
false) ||
11304 optimizeCmpSelect(
false);
11311 AMDGPU::OpName
OpName)
const {
11312 if (!ST.needsAlignedVGPRs())
11315 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11327 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11329 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11333 : &AMDGPU::VReg_64_Align2RegClass);
11335 .
addReg(DataReg, {},
Op.getSubReg())
11340 Op.setSubReg(AMDGPU::sub0);
11355 if (ST.hasGFX1250Insts())
11362 unsigned Opcode =
MI.getOpcode();
11368 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11369 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11372 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static MachineBasicBlock * loadScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.