34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
157 if (
MI.isCompare()) {
168 switch (
Use.getOpcode()) {
169 case AMDGPU::S_AND_SAVEEXEC_B32:
170 case AMDGPU::S_AND_SAVEEXEC_B64:
172 case AMDGPU::S_AND_B32:
173 case AMDGPU::S_AND_B64:
174 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
185 if (!
MI.isConvergent())
188 switch (
MI.getOpcode()) {
191 case AMDGPU::V_READFIRSTLANE_B32:
208 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
213 for (
auto Op :
MI.uses()) {
214 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
220 if (FromCycle ==
nullptr)
226 while (FromCycle && !FromCycle->
contains(ToCycle)) {
246 int64_t &Offset1)
const {
254 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
258 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
274 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
275 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
276 if (Offset0Idx == -1 || Offset1Idx == -1)
283 Offset0Idx -=
get(Opc0).NumDefs;
284 Offset1Idx -=
get(Opc1).NumDefs;
314 if (!Load0Offset || !Load1Offset)
331 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
332 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
334 if (OffIdx0 == -1 || OffIdx1 == -1)
340 OffIdx0 -=
get(Opc0).NumDefs;
341 OffIdx1 -=
get(Opc1).NumDefs;
360 case AMDGPU::DS_READ2ST64_B32:
361 case AMDGPU::DS_READ2ST64_B64:
362 case AMDGPU::DS_WRITE2ST64_B32:
363 case AMDGPU::DS_WRITE2ST64_B64:
378 OffsetIsScalable =
false;
395 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
397 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
398 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
411 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
412 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
413 if (Offset0 + 1 != Offset1)
424 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 Offset = EltSize * Offset0;
434 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
435 if (DataOpIdx == -1) {
436 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
438 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
454 if (BaseOp && !BaseOp->
isFI())
462 if (SOffset->
isReg())
468 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
470 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
479 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
480 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
482 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
483 if (VAddr0Idx >= 0) {
485 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
492 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
507 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
524 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
526 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
543 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
551 if (MO1->getAddrSpace() != MO2->getAddrSpace())
554 const auto *Base1 = MO1->getValue();
555 const auto *Base2 = MO2->getValue();
556 if (!Base1 || !Base2)
564 return Base1 == Base2;
568 int64_t Offset1,
bool OffsetIsScalable1,
570 int64_t Offset2,
bool OffsetIsScalable2,
571 unsigned ClusterSize,
572 unsigned NumBytes)
const {
585 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
604 const unsigned LoadSize = NumBytes / ClusterSize;
605 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
606 return NumDWords <= MaxMemoryClusterDWords;
620 int64_t Offset0, int64_t Offset1,
621 unsigned NumLoads)
const {
622 assert(Offset1 > Offset0 &&
623 "Second offset should be larger than first offset!");
628 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
635 const char *Msg =
"illegal VGPR to SGPR copy") {
656 assert((
TII.getSubtarget().hasMAIInsts() &&
657 !
TII.getSubtarget().hasGFX90AInsts()) &&
658 "Expected GFX908 subtarget.");
661 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
662 "Source register of the copy should be either an SGPR or an AGPR.");
665 "Destination register of the copy should be an AGPR.");
674 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
677 if (!Def->modifiesRegister(SrcReg, &RI))
680 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
681 Def->getOperand(0).getReg() != SrcReg)
688 bool SafeToPropagate =
true;
691 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
692 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
693 SafeToPropagate =
false;
695 if (!SafeToPropagate)
698 for (
auto I = Def;
I !=
MI; ++
I)
699 I->clearRegisterKills(DefOp.
getReg(), &RI);
708 if (ImpUseSuperReg) {
709 Builder.addReg(ImpUseSuperReg,
717 RS.enterBasicBlockEnd(
MBB);
718 RS.backward(std::next(
MI));
727 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
730 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
731 "VGPR used for an intermediate copy should have been reserved.");
736 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
746 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
747 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
748 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
755 if (ImpUseSuperReg) {
756 UseBuilder.
addReg(ImpUseSuperReg,
777 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
778 int16_t SubIdx = BaseIndices[Idx];
779 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
780 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
781 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
782 unsigned Opcode = AMDGPU::S_MOV_B32;
785 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
786 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
787 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
791 DestSubReg = RI.getSubReg(DestReg, SubIdx);
792 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
793 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
794 Opcode = AMDGPU::S_MOV_B64;
809 assert(FirstMI && LastMI);
817 LastMI->addRegisterKilled(SrcReg, &RI);
823 Register SrcReg,
bool KillSrc,
bool RenamableDest,
824 bool RenamableSrc)
const {
826 unsigned Size = RI.getRegSizeInBits(*RC);
828 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
834 if (((
Size == 16) != (SrcSize == 16))) {
836 assert(ST.useRealTrue16Insts());
838 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
841 if (DestReg == SrcReg) {
847 RC = RI.getPhysRegBaseClass(DestReg);
848 Size = RI.getRegSizeInBits(*RC);
849 SrcRC = RI.getPhysRegBaseClass(SrcReg);
850 SrcSize = RI.getRegSizeInBits(*SrcRC);
854 if (RC == &AMDGPU::VGPR_32RegClass) {
856 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
857 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
858 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
859 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
865 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
866 RC == &AMDGPU::SReg_32RegClass) {
867 if (SrcReg == AMDGPU::SCC) {
874 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
875 if (DestReg == AMDGPU::VCC_LO) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
902 if (DestReg == AMDGPU::VCC) {
920 if (DestReg == AMDGPU::SCC) {
923 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 assert(ST.hasScalarCompareEq64());
941 if (RC == &AMDGPU::AGPR_32RegClass) {
942 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
943 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
949 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
958 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
965 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
966 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
968 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
969 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
970 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
971 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
975 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
988 if (IsAGPRDst || IsAGPRSrc) {
989 if (!DstLow || !SrcLow) {
991 "Cannot use hi16 subreg with an AGPR!");
998 if (ST.useRealTrue16Insts()) {
1004 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1005 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1017 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1018 if (!DstLow || !SrcLow) {
1020 "Cannot use hi16 subreg on VI!");
1043 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1044 if (ST.hasMovB64()) {
1049 if (ST.hasPkMovB32()) {
1065 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1066 if (RI.isSGPRClass(RC)) {
1067 if (!RI.isSGPRClass(SrcRC)) {
1071 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1077 unsigned EltSize = 4;
1078 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1079 if (RI.isAGPRClass(RC)) {
1080 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1081 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1082 else if (RI.hasVGPRs(SrcRC) ||
1083 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1084 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1086 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1087 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1088 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1089 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1090 (RI.isProperlyAlignedRC(*RC) &&
1091 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1093 if (ST.hasMovB64()) {
1094 Opcode = AMDGPU::V_MOV_B64_e32;
1096 }
else if (ST.hasPkMovB32()) {
1097 Opcode = AMDGPU::V_PK_MOV_B32;
1107 std::unique_ptr<RegScavenger> RS;
1108 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1109 RS = std::make_unique<RegScavenger>();
1115 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1116 const bool CanKillSuperReg = KillSrc && !Overlap;
1118 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1121 SubIdx = SubIndices[Idx];
1123 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1124 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1125 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1126 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1128 bool IsFirstSubreg = Idx == 0;
1129 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1131 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1135 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1136 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1182 return &AMDGPU::VGPR_32RegClass;
1195 "Not a VGPR32 reg");
1197 if (
Cond.size() == 1) {
1207 }
else if (
Cond.size() == 2) {
1208 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1210 case SIInstrInfo::SCC_TRUE: {
1221 case SIInstrInfo::SCC_FALSE: {
1232 case SIInstrInfo::VCCNZ: {
1246 case SIInstrInfo::VCCZ: {
1260 case SIInstrInfo::EXECNZ: {
1273 case SIInstrInfo::EXECZ: {
1323 int64_t &ImmVal)
const {
1324 switch (
MI.getOpcode()) {
1325 case AMDGPU::V_MOV_B32_e32:
1326 case AMDGPU::S_MOV_B32:
1327 case AMDGPU::S_MOVK_I32:
1328 case AMDGPU::S_MOV_B64:
1329 case AMDGPU::V_MOV_B64_e32:
1330 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1331 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1332 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1333 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1334 case AMDGPU::V_MOV_B64_PSEUDO:
1335 case AMDGPU::V_MOV_B16_t16_e32: {
1339 return MI.getOperand(0).getReg() == Reg;
1344 case AMDGPU::V_MOV_B16_t16_e64: {
1346 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1348 return MI.getOperand(0).getReg() == Reg;
1353 case AMDGPU::S_BREV_B32:
1354 case AMDGPU::V_BFREV_B32_e32:
1355 case AMDGPU::V_BFREV_B32_e64: {
1359 return MI.getOperand(0).getReg() == Reg;
1364 case AMDGPU::S_NOT_B32:
1365 case AMDGPU::V_NOT_B32_e32:
1366 case AMDGPU::V_NOT_B32_e64: {
1369 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1370 return MI.getOperand(0).getReg() == Reg;
1380std::optional<int64_t>
1385 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1386 return std::nullopt;
1389 if (Def && Def->isMoveImmediate()) {
1395 return std::nullopt;
1400 if (RI.isAGPRClass(DstRC))
1401 return AMDGPU::COPY;
1402 if (RI.getRegSizeInBits(*DstRC) == 16) {
1405 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1407 if (RI.getRegSizeInBits(*DstRC) == 32)
1408 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1409 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1410 return AMDGPU::S_MOV_B64;
1411 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1412 return AMDGPU::V_MOV_B64_PSEUDO;
1413 return AMDGPU::COPY;
1418 bool IsIndirectSrc)
const {
1419 if (IsIndirectSrc) {
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1437 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1439 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1441 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1443 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1445 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1446 if (VecSize <= 1024)
1447 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1465 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1467 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1469 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1471 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1473 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1475 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1477 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1478 if (VecSize <= 1024)
1479 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1494 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1496 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1498 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1500 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1502 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1504 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1506 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1508 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1510 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1511 if (VecSize <= 1024)
1512 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1539 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1541 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1543 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1544 if (VecSize <= 1024)
1545 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1552 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1554 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1556 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1558 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1559 if (VecSize <= 1024)
1560 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1567 bool IsSGPR)
const {
1579 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1586 return AMDGPU::SI_SPILL_S32_SAVE;
1588 return AMDGPU::SI_SPILL_S64_SAVE;
1590 return AMDGPU::SI_SPILL_S96_SAVE;
1592 return AMDGPU::SI_SPILL_S128_SAVE;
1594 return AMDGPU::SI_SPILL_S160_SAVE;
1596 return AMDGPU::SI_SPILL_S192_SAVE;
1598 return AMDGPU::SI_SPILL_S224_SAVE;
1600 return AMDGPU::SI_SPILL_S256_SAVE;
1602 return AMDGPU::SI_SPILL_S288_SAVE;
1604 return AMDGPU::SI_SPILL_S320_SAVE;
1606 return AMDGPU::SI_SPILL_S352_SAVE;
1608 return AMDGPU::SI_SPILL_S384_SAVE;
1610 return AMDGPU::SI_SPILL_S512_SAVE;
1612 return AMDGPU::SI_SPILL_S1024_SAVE;
1621 return AMDGPU::SI_SPILL_V16_SAVE;
1623 return AMDGPU::SI_SPILL_V32_SAVE;
1625 return AMDGPU::SI_SPILL_V64_SAVE;
1627 return AMDGPU::SI_SPILL_V96_SAVE;
1629 return AMDGPU::SI_SPILL_V128_SAVE;
1631 return AMDGPU::SI_SPILL_V160_SAVE;
1633 return AMDGPU::SI_SPILL_V192_SAVE;
1635 return AMDGPU::SI_SPILL_V224_SAVE;
1637 return AMDGPU::SI_SPILL_V256_SAVE;
1639 return AMDGPU::SI_SPILL_V288_SAVE;
1641 return AMDGPU::SI_SPILL_V320_SAVE;
1643 return AMDGPU::SI_SPILL_V352_SAVE;
1645 return AMDGPU::SI_SPILL_V384_SAVE;
1647 return AMDGPU::SI_SPILL_V512_SAVE;
1649 return AMDGPU::SI_SPILL_V1024_SAVE;
1658 return AMDGPU::SI_SPILL_AV32_SAVE;
1660 return AMDGPU::SI_SPILL_AV64_SAVE;
1662 return AMDGPU::SI_SPILL_AV96_SAVE;
1664 return AMDGPU::SI_SPILL_AV128_SAVE;
1666 return AMDGPU::SI_SPILL_AV160_SAVE;
1668 return AMDGPU::SI_SPILL_AV192_SAVE;
1670 return AMDGPU::SI_SPILL_AV224_SAVE;
1672 return AMDGPU::SI_SPILL_AV256_SAVE;
1674 return AMDGPU::SI_SPILL_AV288_SAVE;
1676 return AMDGPU::SI_SPILL_AV320_SAVE;
1678 return AMDGPU::SI_SPILL_AV352_SAVE;
1680 return AMDGPU::SI_SPILL_AV384_SAVE;
1682 return AMDGPU::SI_SPILL_AV512_SAVE;
1684 return AMDGPU::SI_SPILL_AV1024_SAVE;
1691 bool IsVectorSuperClass) {
1696 if (IsVectorSuperClass)
1697 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1699 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1712 if (ST.hasMAIInsts())
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize = RI.getSpillSize(*RC);
1735 if (RI.isSGPRClass(RC)) {
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1757 if (RI.spillSGPRToVGPR())
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V16_RESTORE;
1814 return AMDGPU::SI_SPILL_V32_RESTORE;
1816 return AMDGPU::SI_SPILL_V64_RESTORE;
1818 return AMDGPU::SI_SPILL_V96_RESTORE;
1820 return AMDGPU::SI_SPILL_V128_RESTORE;
1822 return AMDGPU::SI_SPILL_V160_RESTORE;
1824 return AMDGPU::SI_SPILL_V192_RESTORE;
1826 return AMDGPU::SI_SPILL_V224_RESTORE;
1828 return AMDGPU::SI_SPILL_V256_RESTORE;
1830 return AMDGPU::SI_SPILL_V288_RESTORE;
1832 return AMDGPU::SI_SPILL_V320_RESTORE;
1834 return AMDGPU::SI_SPILL_V352_RESTORE;
1836 return AMDGPU::SI_SPILL_V384_RESTORE;
1838 return AMDGPU::SI_SPILL_V512_RESTORE;
1840 return AMDGPU::SI_SPILL_V1024_RESTORE;
1849 return AMDGPU::SI_SPILL_AV32_RESTORE;
1851 return AMDGPU::SI_SPILL_AV64_RESTORE;
1853 return AMDGPU::SI_SPILL_AV96_RESTORE;
1855 return AMDGPU::SI_SPILL_AV128_RESTORE;
1857 return AMDGPU::SI_SPILL_AV160_RESTORE;
1859 return AMDGPU::SI_SPILL_AV192_RESTORE;
1861 return AMDGPU::SI_SPILL_AV224_RESTORE;
1863 return AMDGPU::SI_SPILL_AV256_RESTORE;
1865 return AMDGPU::SI_SPILL_AV288_RESTORE;
1867 return AMDGPU::SI_SPILL_AV320_RESTORE;
1869 return AMDGPU::SI_SPILL_AV352_RESTORE;
1871 return AMDGPU::SI_SPILL_AV384_RESTORE;
1873 return AMDGPU::SI_SPILL_AV512_RESTORE;
1875 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1882 bool IsVectorSuperClass) {
1887 if (IsVectorSuperClass)
1888 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1890 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1896 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1903 if (ST.hasMAIInsts())
1906 assert(!RI.isAGPRClass(RC));
1920 unsigned SpillSize = RI.getSpillSize(*RC);
1927 FrameInfo.getObjectAlign(FrameIndex));
1929 if (RI.isSGPRClass(RC)) {
1931 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1932 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1933 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1938 if (DestReg.
isVirtual() && SpillSize == 4) {
1943 if (RI.spillSGPRToVGPR())
1969 unsigned Quantity)
const {
1971 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1972 while (Quantity > 0) {
1973 unsigned Arg = std::min(Quantity, MaxSNopCount);
1980 auto *MF =
MBB.getParent();
1983 assert(Info->isEntryFunction());
1985 if (
MBB.succ_empty()) {
1986 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1987 if (HasNoTerminator) {
1988 if (Info->returnsVoid()) {
2002 constexpr unsigned DoorbellIDMask = 0x3ff;
2003 constexpr unsigned ECQueueWaveAbort = 0x400;
2008 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2009 MBB.splitAt(
MI,
false);
2013 MBB.addSuccessor(TrapBB);
2023 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2027 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2032 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2033 .
addUse(DoorbellRegMasked)
2034 .
addImm(ECQueueWaveAbort);
2035 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2036 .
addUse(SetWaveAbortBit);
2039 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2050 return MBB.getNextNode();
2054 switch (
MI.getOpcode()) {
2056 if (
MI.isMetaInstruction())
2061 return MI.getOperand(0).getImm() + 1;
2071 switch (
MI.getOpcode()) {
2073 case AMDGPU::S_MOV_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2079 case AMDGPU::S_MOV_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2085 case AMDGPU::S_XOR_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2091 case AMDGPU::S_XOR_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2096 case AMDGPU::S_OR_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_OR_B64));
2101 case AMDGPU::S_OR_B32_term:
2104 MI.setDesc(
get(AMDGPU::S_OR_B32));
2107 case AMDGPU::S_ANDN2_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2113 case AMDGPU::S_ANDN2_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2119 case AMDGPU::S_AND_B64_term:
2122 MI.setDesc(
get(AMDGPU::S_AND_B64));
2125 case AMDGPU::S_AND_B32_term:
2128 MI.setDesc(
get(AMDGPU::S_AND_B32));
2131 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2134 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2137 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2140 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2143 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2144 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2147 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2148 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2150 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2154 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2157 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2160 int64_t Imm =
MI.getOperand(1).getImm();
2162 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2163 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2170 MI.eraseFromParent();
2176 case AMDGPU::V_MOV_B64_PSEUDO: {
2178 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2179 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2187 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2188 MI.setDesc(Mov64Desc);
2193 if (
SrcOp.isImm()) {
2195 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2196 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2222 if (ST.hasPkMovB32() &&
2243 MI.eraseFromParent();
2246 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2250 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2254 if (ST.has64BitLiterals()) {
2255 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2261 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2266 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2267 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2269 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2270 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2277 MI.eraseFromParent();
2280 case AMDGPU::V_SET_INACTIVE_B32: {
2284 .
add(
MI.getOperand(3))
2285 .
add(
MI.getOperand(4))
2286 .
add(
MI.getOperand(1))
2287 .
add(
MI.getOperand(2))
2288 .
add(
MI.getOperand(5));
2289 MI.eraseFromParent();
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2328 if (RI.hasVGPRs(EltRC)) {
2329 Opc = AMDGPU::V_MOVRELD_B32_e32;
2331 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2332 : AMDGPU::S_MOVRELD_B32;
2337 bool IsUndef =
MI.getOperand(1).isUndef();
2338 unsigned SubReg =
MI.getOperand(3).getImm();
2339 assert(VecReg ==
MI.getOperand(1).getReg());
2344 .
add(
MI.getOperand(2))
2348 const int ImpDefIdx =
2350 const int ImpUseIdx = ImpDefIdx + 1;
2352 MI.eraseFromParent();
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2369 assert(ST.useVGPRIndexMode());
2371 bool IsUndef =
MI.getOperand(1).isUndef();
2380 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2384 .
add(
MI.getOperand(2))
2388 const int ImpDefIdx =
2390 const int ImpUseIdx = ImpDefIdx + 1;
2397 MI.eraseFromParent();
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2414 assert(ST.useVGPRIndexMode());
2417 bool IsUndef =
MI.getOperand(1).isUndef();
2421 .
add(
MI.getOperand(2))
2434 MI.eraseFromParent();
2437 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2440 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2441 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2460 if (ST.hasGetPCZeroExtension()) {
2464 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2471 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2481 MI.eraseFromParent();
2484 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2494 Op.setOffset(
Op.getOffset() + 4);
2496 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2500 MI.eraseFromParent();
2503 case AMDGPU::ENTER_STRICT_WWM: {
2509 case AMDGPU::ENTER_STRICT_WQM: {
2516 MI.eraseFromParent();
2519 case AMDGPU::EXIT_STRICT_WWM:
2520 case AMDGPU::EXIT_STRICT_WQM: {
2526 case AMDGPU::SI_RETURN: {
2540 MI.eraseFromParent();
2544 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2545 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2546 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2549 case AMDGPU::S_GETPC_B64_pseudo:
2550 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2551 if (ST.hasGetPCZeroExtension()) {
2553 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2562 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2563 assert(ST.hasBF16PackedInsts());
2564 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2575 case AMDGPU::GET_STACK_BASE:
2578 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2585 Register DestReg =
MI.getOperand(0).getReg();
2595 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2596 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2597 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2600 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2603 MI.getNumExplicitOperands());
2621 case AMDGPU::S_MOV_B64:
2622 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2631 if (UsedLanes.
all())
2636 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2637 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2639 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2640 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2642 if (NeedLo && NeedHi)
2646 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2648 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2657 case AMDGPU::S_LOAD_DWORDX16_IMM:
2658 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2671 for (
auto &CandMO :
I->operands()) {
2672 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2680 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2684 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2690 unsigned NewOpcode = -1;
2691 if (SubregSize == 256)
2692 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2693 else if (SubregSize == 128)
2694 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2704 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2709 MI->getOperand(0).setReg(DestReg);
2710 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2714 OffsetMO->
setImm(FinalOffset);
2720 MI->setMemRefs(*MF, NewMMOs);
2733std::pair<MachineInstr*, MachineInstr*>
2735 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2737 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2740 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2741 return std::pair(&
MI,
nullptr);
2752 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2754 if (Dst.isPhysical()) {
2755 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2762 for (
unsigned I = 1;
I <= 2; ++
I) {
2765 if (
SrcOp.isImm()) {
2767 Imm.ashrInPlace(Part * 32);
2768 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2772 if (Src.isPhysical())
2773 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2780 MovDPP.addImm(MO.getImm());
2782 Split[Part] = MovDPP;
2786 if (Dst.isVirtual())
2793 MI.eraseFromParent();
2794 return std::pair(Split[0], Split[1]);
2797std::optional<DestSourcePair>
2799 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2802 return std::nullopt;
2806 AMDGPU::OpName Src0OpName,
2808 AMDGPU::OpName Src1OpName)
const {
2815 "All commutable instructions have both src0 and src1 modifiers");
2817 int Src0ModsVal = Src0Mods->
getImm();
2818 int Src1ModsVal = Src1Mods->
getImm();
2820 Src1Mods->
setImm(Src0ModsVal);
2821 Src0Mods->
setImm(Src1ModsVal);
2830 bool IsKill = RegOp.
isKill();
2832 bool IsUndef = RegOp.
isUndef();
2833 bool IsDebug = RegOp.
isDebug();
2835 if (NonRegOp.
isImm())
2837 else if (NonRegOp.
isFI())
2858 int64_t NonRegVal = NonRegOp1.
getImm();
2861 NonRegOp2.
setImm(NonRegVal);
2868 unsigned OpIdx1)
const {
2873 unsigned Opc =
MI.getOpcode();
2874 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2884 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2887 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2892 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2898 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2913 unsigned Src1Idx)
const {
2914 assert(!NewMI &&
"this should never be used");
2916 unsigned Opc =
MI.getOpcode();
2918 if (CommutedOpcode == -1)
2921 if (Src0Idx > Src1Idx)
2924 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2925 static_cast<int>(Src0Idx) &&
2926 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2927 static_cast<int>(Src1Idx) &&
2928 "inconsistency with findCommutedOpIndices");
2953 Src1, AMDGPU::OpName::src1_modifiers);
2956 AMDGPU::OpName::src1_sel);
2968 unsigned &SrcOpIdx0,
2969 unsigned &SrcOpIdx1)
const {
2974 unsigned &SrcOpIdx0,
2975 unsigned &SrcOpIdx1)
const {
2976 if (!
Desc.isCommutable())
2979 unsigned Opc =
Desc.getOpcode();
2980 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2984 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2988 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2992 int64_t BrOffset)
const {
3009 return MI.getOperand(0).getMBB();
3014 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
3015 MI.getOpcode() == AMDGPU::SI_LOOP)
3027 "new block should be inserted for expanding unconditional branch");
3030 "restore block should be inserted for restoring clobbered registers");
3038 if (ST.useAddPC64Inst()) {
3040 MCCtx.createTempSymbol(
"offset",
true);
3044 MCCtx.createTempSymbol(
"post_addpc",
true);
3045 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3049 Offset->setVariableValue(OffsetExpr);
3053 assert(RS &&
"RegScavenger required for long branching");
3061 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3062 ST.hasVALUReadSGPRHazard();
3063 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3064 if (FlushSGPRWrites)
3072 ApplyHazardWorkarounds();
3075 MCCtx.createTempSymbol(
"post_getpc",
true);
3079 MCCtx.createTempSymbol(
"offset_lo",
true);
3081 MCCtx.createTempSymbol(
"offset_hi",
true);
3084 .
addReg(PCReg, {}, AMDGPU::sub0)
3088 .
addReg(PCReg, {}, AMDGPU::sub1)
3090 ApplyHazardWorkarounds();
3131 if (LongBranchReservedReg) {
3132 RS->enterBasicBlock(
MBB);
3133 Scav = LongBranchReservedReg;
3135 RS->enterBasicBlockEnd(
MBB);
3136 Scav = RS->scavengeRegisterBackwards(
3141 RS->setRegUsed(Scav);
3149 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3166unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3168 case SIInstrInfo::SCC_TRUE:
3169 return AMDGPU::S_CBRANCH_SCC1;
3170 case SIInstrInfo::SCC_FALSE:
3171 return AMDGPU::S_CBRANCH_SCC0;
3172 case SIInstrInfo::VCCNZ:
3173 return AMDGPU::S_CBRANCH_VCCNZ;
3174 case SIInstrInfo::VCCZ:
3175 return AMDGPU::S_CBRANCH_VCCZ;
3176 case SIInstrInfo::EXECNZ:
3177 return AMDGPU::S_CBRANCH_EXECNZ;
3178 case SIInstrInfo::EXECZ:
3179 return AMDGPU::S_CBRANCH_EXECZ;
3185SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3187 case AMDGPU::S_CBRANCH_SCC0:
3189 case AMDGPU::S_CBRANCH_SCC1:
3191 case AMDGPU::S_CBRANCH_VCCNZ:
3193 case AMDGPU::S_CBRANCH_VCCZ:
3195 case AMDGPU::S_CBRANCH_EXECNZ:
3197 case AMDGPU::S_CBRANCH_EXECZ:
3209 bool AllowModify)
const {
3210 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3212 TBB =
I->getOperand(0).getMBB();
3216 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3217 if (Pred == INVALID_BR)
3222 Cond.push_back(
I->getOperand(1));
3226 if (
I ==
MBB.end()) {
3232 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3234 FBB =
I->getOperand(0).getMBB();
3244 bool AllowModify)
const {
3252 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3253 switch (
I->getOpcode()) {
3254 case AMDGPU::S_MOV_B64_term:
3255 case AMDGPU::S_XOR_B64_term:
3256 case AMDGPU::S_OR_B64_term:
3257 case AMDGPU::S_ANDN2_B64_term:
3258 case AMDGPU::S_AND_B64_term:
3259 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3260 case AMDGPU::S_MOV_B32_term:
3261 case AMDGPU::S_XOR_B32_term:
3262 case AMDGPU::S_OR_B32_term:
3263 case AMDGPU::S_ANDN2_B32_term:
3264 case AMDGPU::S_AND_B32_term:
3265 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3268 case AMDGPU::SI_ELSE:
3269 case AMDGPU::SI_KILL_I1_TERMINATOR:
3270 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3287 int *BytesRemoved)
const {
3289 unsigned RemovedSize = 0;
3292 if (
MI.isBranch() ||
MI.isReturn()) {
3294 MI.eraseFromParent();
3300 *BytesRemoved = RemovedSize;
3317 int *BytesAdded)
const {
3318 if (!FBB &&
Cond.empty()) {
3322 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3329 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3341 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3359 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3366 if (
Cond.size() != 2) {
3370 if (
Cond[0].isImm()) {
3381 Register FalseReg,
int &CondCycles,
3382 int &TrueCycles,
int &FalseCycles)
const {
3392 CondCycles = TrueCycles = FalseCycles = NumInsts;
3395 return RI.hasVGPRs(RC) && NumInsts <= 6;
3409 if (NumInsts % 2 == 0)
3412 CondCycles = TrueCycles = FalseCycles = NumInsts;
3413 return RI.isSGPRClass(RC);
3424 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3425 if (Pred == VCCZ || Pred == SCC_FALSE) {
3426 Pred =
static_cast<BranchPredicate
>(-Pred);
3432 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3434 if (DstSize == 32) {
3436 if (Pred == SCC_TRUE) {
3451 if (DstSize == 64 && Pred == SCC_TRUE) {
3461 static const int16_t Sub0_15[] = {
3462 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3463 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3464 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3465 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3468 static const int16_t Sub0_15_64[] = {
3469 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3470 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3471 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3472 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3475 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3477 const int16_t *SubIndices = Sub0_15;
3478 int NElts = DstSize / 32;
3482 if (Pred == SCC_TRUE) {
3484 SelOp = AMDGPU::S_CSELECT_B32;
3485 EltRC = &AMDGPU::SGPR_32RegClass;
3487 SelOp = AMDGPU::S_CSELECT_B64;
3488 EltRC = &AMDGPU::SGPR_64RegClass;
3489 SubIndices = Sub0_15_64;
3495 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3500 for (
int Idx = 0; Idx != NElts; ++Idx) {
3504 unsigned SubIdx = SubIndices[Idx];
3507 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3509 .
addReg(FalseReg, {}, SubIdx)
3510 .addReg(TrueReg, {}, SubIdx);
3513 .
addReg(TrueReg, {}, SubIdx)
3514 .addReg(FalseReg, {}, SubIdx);
3526 switch (
MI.getOpcode()) {
3527 case AMDGPU::V_MOV_B16_t16_e32:
3528 case AMDGPU::V_MOV_B16_t16_e64:
3529 case AMDGPU::V_MOV_B32_e32:
3530 case AMDGPU::V_MOV_B32_e64:
3531 case AMDGPU::V_MOV_B64_PSEUDO:
3532 case AMDGPU::V_MOV_B64_e32:
3533 case AMDGPU::V_MOV_B64_e64:
3534 case AMDGPU::S_MOV_B32:
3535 case AMDGPU::S_MOV_B64:
3536 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3538 case AMDGPU::WWM_COPY:
3539 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3540 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3541 case AMDGPU::V_ACCVGPR_MOV_B32:
3542 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3543 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3551 switch (
MI.getOpcode()) {
3552 case AMDGPU::V_MOV_B16_t16_e32:
3553 case AMDGPU::V_MOV_B16_t16_e64:
3555 case AMDGPU::V_MOV_B32_e32:
3556 case AMDGPU::V_MOV_B32_e64:
3557 case AMDGPU::V_MOV_B64_PSEUDO:
3558 case AMDGPU::V_MOV_B64_e32:
3559 case AMDGPU::V_MOV_B64_e64:
3560 case AMDGPU::S_MOV_B32:
3561 case AMDGPU::S_MOV_B64:
3562 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3564 case AMDGPU::WWM_COPY:
3565 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3566 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3567 case AMDGPU::V_ACCVGPR_MOV_B32:
3568 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3569 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3577 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3578 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3579 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3582 unsigned Opc =
MI.getOpcode();
3584 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3586 MI.removeOperand(Idx);
3592 MI.setDesc(NewDesc);
3598 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3599 Desc.implicit_defs().size();
3601 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3602 MI.removeOperand(
I);
3606 unsigned SubRegIndex) {
3607 switch (SubRegIndex) {
3608 case AMDGPU::NoSubRegister:
3618 case AMDGPU::sub1_lo16:
3620 case AMDGPU::sub1_hi16:
3623 return std::nullopt;
3631 case AMDGPU::V_MAC_F16_e32:
3632 case AMDGPU::V_MAC_F16_e64:
3633 case AMDGPU::V_MAD_F16_e64:
3634 return AMDGPU::V_MADAK_F16;
3635 case AMDGPU::V_MAC_F32_e32:
3636 case AMDGPU::V_MAC_F32_e64:
3637 case AMDGPU::V_MAD_F32_e64:
3638 return AMDGPU::V_MADAK_F32;
3639 case AMDGPU::V_FMAC_F32_e32:
3640 case AMDGPU::V_FMAC_F32_e64:
3641 case AMDGPU::V_FMA_F32_e64:
3642 return AMDGPU::V_FMAAK_F32;
3643 case AMDGPU::V_FMAC_F16_e32:
3644 case AMDGPU::V_FMAC_F16_e64:
3645 case AMDGPU::V_FMAC_F16_t16_e64:
3646 case AMDGPU::V_FMAC_F16_fake16_e64:
3647 case AMDGPU::V_FMAC_F16_t16_e32:
3648 case AMDGPU::V_FMAC_F16_fake16_e32:
3649 case AMDGPU::V_FMA_F16_e64:
3650 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3651 ? AMDGPU::V_FMAAK_F16_t16
3652 : AMDGPU::V_FMAAK_F16_fake16
3653 : AMDGPU::V_FMAAK_F16;
3654 case AMDGPU::V_FMAC_F64_e32:
3655 case AMDGPU::V_FMAC_F64_e64:
3656 case AMDGPU::V_FMA_F64_e64:
3657 return AMDGPU::V_FMAAK_F64;
3665 case AMDGPU::V_MAC_F16_e32:
3666 case AMDGPU::V_MAC_F16_e64:
3667 case AMDGPU::V_MAD_F16_e64:
3668 return AMDGPU::V_MADMK_F16;
3669 case AMDGPU::V_MAC_F32_e32:
3670 case AMDGPU::V_MAC_F32_e64:
3671 case AMDGPU::V_MAD_F32_e64:
3672 return AMDGPU::V_MADMK_F32;
3673 case AMDGPU::V_FMAC_F32_e32:
3674 case AMDGPU::V_FMAC_F32_e64:
3675 case AMDGPU::V_FMA_F32_e64:
3676 return AMDGPU::V_FMAMK_F32;
3677 case AMDGPU::V_FMAC_F16_e32:
3678 case AMDGPU::V_FMAC_F16_e64:
3679 case AMDGPU::V_FMAC_F16_t16_e64:
3680 case AMDGPU::V_FMAC_F16_fake16_e64:
3681 case AMDGPU::V_FMAC_F16_t16_e32:
3682 case AMDGPU::V_FMAC_F16_fake16_e32:
3683 case AMDGPU::V_FMA_F16_e64:
3684 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3685 ? AMDGPU::V_FMAMK_F16_t16
3686 : AMDGPU::V_FMAMK_F16_fake16
3687 : AMDGPU::V_FMAMK_F16;
3688 case AMDGPU::V_FMAC_F64_e32:
3689 case AMDGPU::V_FMAC_F64_e64:
3690 case AMDGPU::V_FMA_F64_e64:
3691 return AMDGPU::V_FMAMK_F64;
3705 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3708 if (
Opc == AMDGPU::COPY) {
3709 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3716 if (HasMultipleUses) {
3719 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3722 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3730 if (ImmDefSize == 32 &&
3735 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3736 RI.getSubRegIdxSize(UseSubReg) == 16;
3739 if (RI.hasVGPRs(DstRC))
3742 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3748 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3755 for (
unsigned MovOp :
3756 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3757 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3765 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3769 if (MovDstPhysReg) {
3773 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3780 if (MovDstPhysReg) {
3781 if (!MovDstRC->
contains(MovDstPhysReg))
3797 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3805 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3809 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3811 UseMI.getOperand(0).setReg(MovDstPhysReg);
3816 UseMI.setDesc(NewMCID);
3817 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3818 UseMI.addImplicitDefUseOperands(*MF);
3822 if (HasMultipleUses)
3825 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3826 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3827 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3828 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3829 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3830 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3831 Opc == AMDGPU::V_FMAC_F64_e64) {
3840 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3851 auto CopyRegOperandToNarrowerRC =
3854 if (!
MI.getOperand(OpNo).isReg())
3858 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3861 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3862 get(AMDGPU::COPY), Tmp)
3864 MI.getOperand(OpNo).setReg(Tmp);
3865 MI.getOperand(OpNo).setIsKill();
3872 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3873 if (!RegSrc->
isReg())
3876 ST.getConstantBusLimit(
Opc) < 2)
3891 if (Def && Def->isMoveImmediate() &&
3906 unsigned SrcSubReg = RegSrc->
getSubReg();
3911 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3912 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3913 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3914 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3915 UseMI.untieRegOperand(
3916 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3923 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3924 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3928 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3929 UseMI.getOperand(0).getReg())
3931 UseMI.getOperand(0).setReg(Tmp);
3932 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3933 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3938 DefMI.eraseFromParent();
3945 if (ST.getConstantBusLimit(
Opc) < 2) {
3948 bool Src0Inlined =
false;
3949 if (Src0->
isReg()) {
3954 if (Def && Def->isMoveImmediate() &&
3959 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3960 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3966 if (Src1->
isReg() && !Src0Inlined) {
3969 if (Def && Def->isMoveImmediate() &&
3973 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3986 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3987 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3988 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3989 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3990 UseMI.untieRegOperand(
3991 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3993 const std::optional<int64_t> SubRegImm =
4003 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
4004 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
4008 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
4009 UseMI.getOperand(0).getReg())
4011 UseMI.getOperand(0).setReg(Tmp);
4012 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
4013 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
4023 DefMI.eraseFromParent();
4035 if (BaseOps1.
size() != BaseOps2.
size())
4037 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4038 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4046 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4047 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4048 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4050 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4053bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4056 int64_t Offset0, Offset1;
4059 bool Offset0IsScalable, Offset1IsScalable;
4073 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4074 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4081 "MIa must load from or modify a memory location");
4083 "MIb must load from or modify a memory location");
4105 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4112 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4122 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4136 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4147 if (
Reg.isPhysical())
4151 Imm = Def->getOperand(1).getImm();
4171 unsigned NumOps =
MI.getNumOperands();
4174 if (
Op.isReg() &&
Op.isKill())
4182 case AMDGPU::V_MAC_F16_e32:
4183 case AMDGPU::V_MAC_F16_e64:
4184 return AMDGPU::V_MAD_F16_e64;
4185 case AMDGPU::V_MAC_F32_e32:
4186 case AMDGPU::V_MAC_F32_e64:
4187 return AMDGPU::V_MAD_F32_e64;
4188 case AMDGPU::V_MAC_LEGACY_F32_e32:
4189 case AMDGPU::V_MAC_LEGACY_F32_e64:
4190 return AMDGPU::V_MAD_LEGACY_F32_e64;
4191 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4192 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4193 return AMDGPU::V_FMA_LEGACY_F32_e64;
4194 case AMDGPU::V_FMAC_F16_e32:
4195 case AMDGPU::V_FMAC_F16_e64:
4196 case AMDGPU::V_FMAC_F16_t16_e64:
4197 case AMDGPU::V_FMAC_F16_fake16_e64:
4198 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4199 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4200 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4201 : AMDGPU::V_FMA_F16_gfx9_e64;
4202 case AMDGPU::V_FMAC_F32_e32:
4203 case AMDGPU::V_FMAC_F32_e64:
4204 return AMDGPU::V_FMA_F32_e64;
4205 case AMDGPU::V_FMAC_F64_e32:
4206 case AMDGPU::V_FMAC_F64_e64:
4207 return AMDGPU::V_FMA_F64_e64;
4227 if (
MI.isBundle()) {
4230 if (
MI.getBundleSize() != 1)
4232 CandidateMI =
MI.getNextNode();
4236 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4240 if (
MI.isBundle()) {
4245 MI.untieRegOperand(MO.getOperandNo());
4253 if (Def.isEarlyClobber() && Def.isReg() &&
4258 auto UpdateDefIndex = [&](
LiveRange &LR) {
4259 auto *S = LR.find(OldIndex);
4260 if (S != LR.end() && S->start == OldIndex) {
4261 assert(S->valno && S->valno->def == OldIndex);
4262 S->start = NewIndex;
4263 S->valno->def = NewIndex;
4267 for (
auto &SR : LI.subranges())
4273 if (U.RemoveMIUse) {
4276 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4280 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4281 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4282 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4283 U.RemoveMIUse->removeOperand(
I);
4288 if (
MI.isBundle()) {
4292 if (MO.isReg() && MO.getReg() == DefReg) {
4293 assert(MO.getSubReg() == 0 &&
4294 "tied sub-registers in bundles currently not supported");
4295 MI.removeOperand(MO.getOperandNo());
4312 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4313 MIOp.setIsUndef(
true);
4314 MIOp.setReg(DummyReg);
4318 if (
MI.isBundle()) {
4322 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4323 MIOp.setIsUndef(
true);
4324 MIOp.setReg(DummyReg);
4337 return MI.isBundle() ? &
MI : NewMI;
4342 ThreeAddressUpdates &U)
const {
4344 unsigned Opc =
MI.getOpcode();
4348 if (NewMFMAOpc != -1) {
4351 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4352 MIB.
add(
MI.getOperand(
I));
4360 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4365 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4366 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4367 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4371 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4372 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4373 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4374 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4375 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4376 bool Src0Literal =
false;
4381 case AMDGPU::V_MAC_F16_e64:
4382 case AMDGPU::V_FMAC_F16_e64:
4383 case AMDGPU::V_FMAC_F16_t16_e64:
4384 case AMDGPU::V_FMAC_F16_fake16_e64:
4385 case AMDGPU::V_MAC_F32_e64:
4386 case AMDGPU::V_MAC_LEGACY_F32_e64:
4387 case AMDGPU::V_FMAC_F32_e64:
4388 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4389 case AMDGPU::V_FMAC_F64_e64:
4391 case AMDGPU::V_MAC_F16_e32:
4392 case AMDGPU::V_FMAC_F16_e32:
4393 case AMDGPU::V_MAC_F32_e32:
4394 case AMDGPU::V_MAC_LEGACY_F32_e32:
4395 case AMDGPU::V_FMAC_F32_e32:
4396 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4397 case AMDGPU::V_FMAC_F64_e32: {
4398 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4399 AMDGPU::OpName::src0);
4400 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4411 MachineInstrBuilder MIB;
4414 const MachineOperand *Src0Mods =
4417 const MachineOperand *Src1Mods =
4420 const MachineOperand *Src2Mods =
4426 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4427 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4429 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4431 MachineInstr *
DefMI;
4467 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4483 if (Src0Literal && !ST.hasVOP3Literal())
4511 switch (
MI.getOpcode()) {
4512 case AMDGPU::S_SET_GPR_IDX_ON:
4513 case AMDGPU::S_SET_GPR_IDX_MODE:
4514 case AMDGPU::S_SET_GPR_IDX_OFF:
4532 if (
MI.isTerminator() ||
MI.isPosition())
4536 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4539 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4545 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4546 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4547 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4548 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4549 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4554 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4555 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4556 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4570 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4575 if (
MI.memoperands_empty())
4580 unsigned AS = Memop->getAddrSpace();
4581 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4582 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4583 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4584 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4599 if (
MI.memoperands_empty())
4608 unsigned AS = Memop->getAddrSpace();
4625 if (ST.isTgSplitEnabled())
4630 if (
MI.memoperands_empty())
4635 unsigned AS = Memop->getAddrSpace();
4651 unsigned Opcode =
MI.getOpcode();
4666 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4667 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4668 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4669 Opcode == AMDGPU::S_SETHALT)
4672 if (
MI.isCall() ||
MI.isInlineAsm())
4688 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4689 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4690 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4691 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4699 if (
MI.isMetaInstruction())
4703 if (
MI.isCopyLike()) {
4704 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4708 return MI.readsRegister(AMDGPU::EXEC, &RI);
4719 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4723 switch (Imm.getBitWidth()) {
4729 ST.hasInv2PiInlineImm());
4732 ST.hasInv2PiInlineImm());
4734 return ST.has16BitInsts() &&
4736 ST.hasInv2PiInlineImm());
4743 APInt IntImm = Imm.bitcastToAPInt();
4745 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4753 return ST.has16BitInsts() &&
4756 return ST.has16BitInsts() &&
4766 switch (OperandType) {
4776 int32_t Trunc =
static_cast<int32_t
>(Imm);
4818 int16_t Trunc =
static_cast<int16_t
>(Imm);
4819 return ST.has16BitInsts() &&
4828 int16_t Trunc =
static_cast<int16_t
>(Imm);
4829 return ST.has16BitInsts() &&
4880 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4886 return ST.hasVOP3Literal();
4890 int64_t ImmVal)
const {
4893 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4894 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4895 AMDGPU::OpName::src2))
4897 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4909 "unexpected imm-like operand kind");
4922 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4940 AMDGPU::OpName
OpName)
const {
4942 return Mods && Mods->
getImm();
4955 switch (
MI.getOpcode()) {
4956 default:
return false;
4958 case AMDGPU::V_ADDC_U32_e64:
4959 case AMDGPU::V_SUBB_U32_e64:
4960 case AMDGPU::V_SUBBREV_U32_e64: {
4963 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4968 case AMDGPU::V_MAC_F16_e64:
4969 case AMDGPU::V_MAC_F32_e64:
4970 case AMDGPU::V_MAC_LEGACY_F32_e64:
4971 case AMDGPU::V_FMAC_F16_e64:
4972 case AMDGPU::V_FMAC_F16_t16_e64:
4973 case AMDGPU::V_FMAC_F16_fake16_e64:
4974 case AMDGPU::V_FMAC_F32_e64:
4975 case AMDGPU::V_FMAC_F64_e64:
4976 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4977 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4982 case AMDGPU::V_CNDMASK_B32_e64:
4988 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
5018 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
5027 unsigned Op32)
const {
5041 Inst32.
add(
MI.getOperand(
I));
5045 int Idx =
MI.getNumExplicitDefs();
5047 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5052 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5074 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5082 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5085 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5086 AMDGPU::SReg_64RegClass.contains(Reg);
5114 switch (MO.getReg()) {
5116 case AMDGPU::VCC_LO:
5117 case AMDGPU::VCC_HI:
5119 case AMDGPU::FLAT_SCR:
5132 switch (
MI.getOpcode()) {
5133 case AMDGPU::V_READLANE_B32:
5134 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5135 case AMDGPU::V_WRITELANE_B32:
5136 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5143 if (
MI.isPreISelOpcode() ||
5144 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5162 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5173 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5174 ErrInfo =
"illegal copy from vector register to SGPR";
5192 if (!MRI.
isSSA() &&
MI.isCopy())
5193 return verifyCopy(
MI, MRI, ErrInfo);
5195 if (SIInstrInfo::isGenericOpcode(Opcode))
5198 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5199 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5200 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5202 if (Src0Idx == -1) {
5204 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5205 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5206 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5207 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5212 if (!
Desc.isVariadic() &&
5213 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5214 ErrInfo =
"Instruction has wrong number of operands.";
5218 if (
MI.isInlineAsm()) {
5231 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5232 ErrInfo =
"inlineasm operand has incorrect register class.";
5240 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5241 ErrInfo =
"missing memory operand from image instruction.";
5246 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5249 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5250 "all fp values to integers.";
5255 int16_t RegClass = getOpRegClassID(OpInfo);
5257 switch (OpInfo.OperandType) {
5259 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5260 ErrInfo =
"Illegal immediate value for operand.";
5295 ErrInfo =
"Illegal immediate value for operand.";
5303 ErrInfo =
"Expected inline constant for operand.";
5317 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5318 ErrInfo =
"Expected immediate, but got non-immediate";
5327 if (OpInfo.isGenericType())
5342 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5343 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5345 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5347 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5348 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5355 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5356 ErrInfo =
"Subtarget requires even aligned vector registers";
5361 if (RegClass != -1) {
5362 if (Reg.isVirtual())
5367 ErrInfo =
"Operand has incorrect register class.";
5375 if (!ST.hasSDWA()) {
5376 ErrInfo =
"SDWA is not supported on this target";
5380 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5381 AMDGPU::OpName::dst_sel}) {
5385 int64_t Imm = MO->
getImm();
5387 ErrInfo =
"Invalid SDWA selection";
5392 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5394 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5399 if (!ST.hasSDWAScalar()) {
5401 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5402 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5409 "Only reg allowed as operands in SDWA instructions on GFX9+";
5415 if (!ST.hasSDWAOmod()) {
5418 if (OMod !=
nullptr &&
5420 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5425 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5426 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5427 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5428 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5431 unsigned Mods = Src0ModsMO->
getImm();
5434 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5440 if (
isVOPC(BasicOpcode)) {
5441 if (!ST.hasSDWASdst() && DstIdx != -1) {
5444 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5445 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5448 }
else if (!ST.hasSDWAOutModsVOPC()) {
5451 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5452 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5458 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5459 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5466 if (DstUnused && DstUnused->isImm() &&
5469 if (!Dst.isReg() || !Dst.isTied()) {
5470 ErrInfo =
"Dst register should have tied register";
5475 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5478 "Dst register should be tied to implicit use of preserved register";
5482 ErrInfo =
"Dst register should use same physical register as preserved";
5489 if (
isImage(Opcode) && !
MI.mayStore()) {
5501 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5509 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5513 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5514 if (RegCount > DstSize) {
5515 ErrInfo =
"Image instruction returns too many registers for dst "
5524 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5525 unsigned ConstantBusCount = 0;
5526 bool UsesLiteral =
false;
5529 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5533 LiteralVal = &
MI.getOperand(ImmIdx);
5542 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5553 }
else if (!MO.
isFI()) {
5560 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5570 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5571 return !RI.regsOverlap(SGPRUsed, SGPR);
5580 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5581 Opcode != AMDGPU::V_WRITELANE_B32) {
5582 ErrInfo =
"VOP* instruction violates constant bus restriction";
5586 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5587 ErrInfo =
"VOP3 instruction uses literal";
5594 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5595 unsigned SGPRCount = 0;
5598 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5606 if (MO.
getReg() != SGPRUsed)
5611 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5612 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5619 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5620 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5627 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5637 ErrInfo =
"ABS not allowed in VOP3B instructions";
5650 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5657 if (
Desc.isBranch()) {
5659 ErrInfo =
"invalid branch target for SOPK instruction";
5666 ErrInfo =
"invalid immediate for SOPK instruction";
5671 ErrInfo =
"invalid immediate for SOPK instruction";
5678 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5679 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5680 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5681 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5682 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5683 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5685 const unsigned StaticNumOps =
5686 Desc.getNumOperands() +
Desc.implicit_uses().size();
5687 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5693 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5694 ErrInfo =
"missing implicit register operands";
5700 if (!Dst->isUse()) {
5701 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5706 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5707 UseOpIdx != StaticNumOps + 1) {
5708 ErrInfo =
"movrel implicit operands should be tied";
5715 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5717 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5718 ErrInfo =
"src0 should be subreg of implicit vector use";
5726 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5727 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5733 if (
MI.mayStore() &&
5738 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5739 ErrInfo =
"scalar stores must use m0 as offset register";
5745 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5747 if (
Offset->getImm() != 0) {
5748 ErrInfo =
"subtarget does not support offsets in flat instructions";
5753 if (
isDS(
MI) && !ST.hasGDS()) {
5755 if (GDSOp && GDSOp->
getImm() != 0) {
5756 ErrInfo =
"GDS is not supported on this subtarget";
5764 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5765 AMDGPU::OpName::vaddr0);
5766 AMDGPU::OpName RSrcOpName =
5767 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5768 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5776 ErrInfo =
"dim is out of range";
5781 if (ST.hasR128A16()) {
5783 IsA16 = R128A16->
getImm() != 0;
5784 }
else if (ST.hasA16()) {
5786 IsA16 = A16->
getImm() != 0;
5789 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5791 unsigned AddrWords =
5794 unsigned VAddrWords;
5796 VAddrWords = RsrcIdx - VAddr0Idx;
5797 if (ST.hasPartialNSAEncoding() &&
5799 unsigned LastVAddrIdx = RsrcIdx - 1;
5800 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5808 if (VAddrWords != AddrWords) {
5810 <<
" but got " << VAddrWords <<
"\n");
5811 ErrInfo =
"bad vaddr size";
5821 unsigned DC = DppCt->
getImm();
5822 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5823 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5824 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5825 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5826 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5827 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5828 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5829 ErrInfo =
"Invalid dpp_ctrl value";
5832 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5833 !ST.hasDPPWavefrontShifts()) {
5834 ErrInfo =
"Invalid dpp_ctrl value: "
5835 "wavefront shifts are not supported on GFX10+";
5838 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5839 !ST.hasDPPBroadcasts()) {
5840 ErrInfo =
"Invalid dpp_ctrl value: "
5841 "broadcasts are not supported on GFX10+";
5844 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5846 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5847 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5848 !ST.hasGFX90AInsts()) {
5849 ErrInfo =
"Invalid dpp_ctrl value: "
5850 "row_newbroadcast/row_share is not supported before "
5854 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5855 ErrInfo =
"Invalid dpp_ctrl value: "
5856 "row_share and row_xmask are not supported before GFX10";
5861 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5864 ErrInfo =
"Invalid dpp_ctrl value: "
5865 "DP ALU dpp only support row_newbcast";
5872 AMDGPU::OpName DataName =
5873 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5879 if (ST.hasGFX90AInsts()) {
5880 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5881 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5882 ErrInfo =
"Invalid register class: "
5883 "vdata and vdst should be both VGPR or AGPR";
5886 if (
Data && Data2 &&
5887 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5888 ErrInfo =
"Invalid register class: "
5889 "both data operands should be VGPR or AGPR";
5893 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5894 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5895 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5896 ErrInfo =
"Invalid register class: "
5897 "agpr loads and stores not supported on this GPU";
5903 if (ST.needsAlignedVGPRs()) {
5904 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5909 if (Reg.isPhysical())
5910 return !(RI.getHWRegIndex(Reg) & 1);
5912 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5913 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5916 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5917 Opcode == AMDGPU::DS_GWS_BARRIER) {
5919 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5920 ErrInfo =
"Subtarget requires even aligned vector registers "
5921 "for DS_GWS instructions";
5927 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5928 ErrInfo =
"Subtarget requires even aligned vector registers "
5929 "for vaddr operand of image instructions";
5935 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5937 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5938 ErrInfo =
"Invalid register class: "
5939 "v_accvgpr_write with an SGPR is not supported on this GPU";
5944 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5947 ErrInfo =
"pseudo expects only physical SGPRs";
5954 if (!ST.hasScaleOffset()) {
5955 ErrInfo =
"Subtarget does not support offset scaling";
5959 ErrInfo =
"Instruction does not support offset scaling";
5968 for (
unsigned I = 0;
I < 3; ++
I) {
5974 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5975 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5977 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5978 &AMDGPU::SReg_64RegClass) ||
5979 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5980 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5989 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5991 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5993 : AMDGPU::V_MOV_B32_e32;
6003 default:
return AMDGPU::INSTRUCTION_LIST_END;
6004 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
6005 case AMDGPU::COPY:
return AMDGPU::COPY;
6006 case AMDGPU::PHI:
return AMDGPU::PHI;
6007 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
6008 case AMDGPU::WQM:
return AMDGPU::WQM;
6009 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
6010 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
6011 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
6012 case AMDGPU::S_ADD_I32:
6013 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
6014 case AMDGPU::S_ADDC_U32:
6015 return AMDGPU::V_ADDC_U32_e32;
6016 case AMDGPU::S_SUB_I32:
6017 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
6020 case AMDGPU::S_ADD_U32:
6021 return AMDGPU::V_ADD_CO_U32_e32;
6022 case AMDGPU::S_SUB_U32:
6023 return AMDGPU::V_SUB_CO_U32_e32;
6024 case AMDGPU::S_ADD_U64_PSEUDO:
6025 return AMDGPU::V_ADD_U64_PSEUDO;
6026 case AMDGPU::S_SUB_U64_PSEUDO:
6027 return AMDGPU::V_SUB_U64_PSEUDO;
6028 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
6029 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
6030 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
6031 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
6032 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
6033 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
6034 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
6035 case AMDGPU::S_XNOR_B32:
6036 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
6037 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
6038 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
6039 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
6040 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6041 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6042 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6043 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6044 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6045 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6046 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6047 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6048 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6049 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6050 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6051 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6052 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6053 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6054 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6055 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6056 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6057 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6058 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6059 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6060 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6061 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6062 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6063 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6064 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6065 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6066 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6067 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6068 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6069 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6070 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6071 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6072 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6073 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6074 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6075 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6076 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6077 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6078 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6079 case AMDGPU::S_CVT_F32_F16:
6080 case AMDGPU::S_CVT_HI_F32_F16:
6081 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6082 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6083 case AMDGPU::S_CVT_F16_F32:
6084 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6085 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6086 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6087 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6088 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6089 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6090 case AMDGPU::S_CEIL_F16:
6091 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6092 : AMDGPU::V_CEIL_F16_fake16_e64;
6093 case AMDGPU::S_FLOOR_F16:
6094 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6095 : AMDGPU::V_FLOOR_F16_fake16_e64;
6096 case AMDGPU::S_TRUNC_F16:
6097 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6098 : AMDGPU::V_TRUNC_F16_fake16_e64;
6099 case AMDGPU::S_RNDNE_F16:
6100 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6101 : AMDGPU::V_RNDNE_F16_fake16_e64;
6102 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6103 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6104 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6105 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6106 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6107 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6108 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6109 case AMDGPU::S_ADD_F16:
6110 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6111 : AMDGPU::V_ADD_F16_fake16_e64;
6112 case AMDGPU::S_SUB_F16:
6113 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6114 : AMDGPU::V_SUB_F16_fake16_e64;
6115 case AMDGPU::S_MIN_F16:
6116 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6117 : AMDGPU::V_MIN_F16_fake16_e64;
6118 case AMDGPU::S_MAX_F16:
6119 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6120 : AMDGPU::V_MAX_F16_fake16_e64;
6121 case AMDGPU::S_MINIMUM_F16:
6122 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6123 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6124 case AMDGPU::S_MAXIMUM_F16:
6125 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6126 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6127 case AMDGPU::S_MUL_F16:
6128 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6129 : AMDGPU::V_MUL_F16_fake16_e64;
6130 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6131 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6132 case AMDGPU::S_FMAC_F16:
6133 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6134 : AMDGPU::V_FMAC_F16_fake16_e64;
6135 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6136 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6137 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6138 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6139 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6140 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6141 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6142 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6143 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6144 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6145 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6146 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6147 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6148 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6149 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6150 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6151 case AMDGPU::S_CMP_LT_F16:
6152 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6153 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6154 case AMDGPU::S_CMP_EQ_F16:
6155 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6156 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6157 case AMDGPU::S_CMP_LE_F16:
6158 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6159 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6160 case AMDGPU::S_CMP_GT_F16:
6161 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6162 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6163 case AMDGPU::S_CMP_LG_F16:
6164 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6165 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6166 case AMDGPU::S_CMP_GE_F16:
6167 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6168 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6169 case AMDGPU::S_CMP_O_F16:
6170 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6171 : AMDGPU::V_CMP_O_F16_fake16_e64;
6172 case AMDGPU::S_CMP_U_F16:
6173 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6174 : AMDGPU::V_CMP_U_F16_fake16_e64;
6175 case AMDGPU::S_CMP_NGE_F16:
6176 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6177 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6178 case AMDGPU::S_CMP_NLG_F16:
6179 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6180 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6181 case AMDGPU::S_CMP_NGT_F16:
6182 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6183 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6184 case AMDGPU::S_CMP_NLE_F16:
6185 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6186 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6187 case AMDGPU::S_CMP_NEQ_F16:
6188 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6189 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6190 case AMDGPU::S_CMP_NLT_F16:
6191 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6192 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6193 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6194 case AMDGPU::V_S_EXP_F16_e64:
6195 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6196 : AMDGPU::V_EXP_F16_fake16_e64;
6197 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6198 case AMDGPU::V_S_LOG_F16_e64:
6199 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6200 : AMDGPU::V_LOG_F16_fake16_e64;
6201 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6202 case AMDGPU::V_S_RCP_F16_e64:
6203 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6204 : AMDGPU::V_RCP_F16_fake16_e64;
6205 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6206 case AMDGPU::V_S_RSQ_F16_e64:
6207 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6208 : AMDGPU::V_RSQ_F16_fake16_e64;
6209 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6210 case AMDGPU::V_S_SQRT_F16_e64:
6211 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6212 : AMDGPU::V_SQRT_F16_fake16_e64;
6215 "Unexpected scalar opcode without corresponding vector one!");
6264 "Not a whole wave func");
6267 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6268 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6275 unsigned OpNo)
const {
6277 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6278 Desc.operands()[OpNo].RegClass == -1) {
6281 if (Reg.isVirtual()) {
6285 return RI.getPhysRegBaseClass(Reg);
6288 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6289 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6297 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6299 unsigned Size = RI.getRegSizeInBits(*RC);
6300 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6301 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6302 : AMDGPU::V_MOV_B32_e32;
6304 Opcode = AMDGPU::COPY;
6305 else if (RI.isSGPRClass(RC))
6306 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6320 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6326 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6337 if (SubIdx == AMDGPU::sub0)
6339 if (SubIdx == AMDGPU::sub1)
6351void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6367 if (Reg.isPhysical())
6377 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6380 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6387 unsigned Opc =
MI.getOpcode();
6393 constexpr AMDGPU::OpName OpNames[] = {
6394 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6397 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6398 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6408 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6409 if (IsAGPR && !ST.hasMAIInsts())
6415 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6416 const int DataIdx = AMDGPU::getNamedOperandIdx(
6417 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6418 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6419 MI.getOperand(DataIdx).isReg() &&
6420 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6422 if ((
int)
OpIdx == DataIdx) {
6423 if (VDstIdx != -1 &&
6424 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6427 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6428 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6429 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6434 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6435 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6436 RI.isSGPRReg(MRI, MO.
getReg()))
6439 if (ST.hasFlatScratchHiInB64InstHazard() &&
6446 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6467 constexpr unsigned NumOps = 3;
6468 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6469 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6470 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6471 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6476 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6479 MO = &
MI.getOperand(SrcIdx);
6482 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6486 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6490 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6494 return !OpSel && !OpSelHi;
6503 int64_t RegClass = getOpRegClassID(OpInfo);
6505 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6514 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6515 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6519 if (!LiteralLimit--)
6529 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6537 if (--ConstantBusLimit <= 0)
6549 if (!LiteralLimit--)
6551 if (--ConstantBusLimit <= 0)
6557 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6561 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6563 !
Op.isIdenticalTo(*MO))
6573 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6587 bool Is64BitOp = Is64BitFPOp ||
6594 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6603 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6621 bool IsGFX950Only = ST.hasGFX950Insts();
6622 bool IsGFX940Only = ST.hasGFX940Insts();
6624 if (!IsGFX950Only && !IsGFX940Only)
6642 unsigned Opcode =
MI.getOpcode();
6644 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6645 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6646 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6647 case AMDGPU::V_MQSAD_U32_U8_e64:
6648 case AMDGPU::V_PK_ADD_F16:
6649 case AMDGPU::V_PK_ADD_F32:
6650 case AMDGPU::V_PK_ADD_I16:
6651 case AMDGPU::V_PK_ADD_U16:
6652 case AMDGPU::V_PK_ASHRREV_I16:
6653 case AMDGPU::V_PK_FMA_F16:
6654 case AMDGPU::V_PK_FMA_F32:
6655 case AMDGPU::V_PK_FMAC_F16_e32:
6656 case AMDGPU::V_PK_FMAC_F16_e64:
6657 case AMDGPU::V_PK_LSHLREV_B16:
6658 case AMDGPU::V_PK_LSHRREV_B16:
6659 case AMDGPU::V_PK_MAD_I16:
6660 case AMDGPU::V_PK_MAD_U16:
6661 case AMDGPU::V_PK_MAX_F16:
6662 case AMDGPU::V_PK_MAX_I16:
6663 case AMDGPU::V_PK_MAX_U16:
6664 case AMDGPU::V_PK_MIN_F16:
6665 case AMDGPU::V_PK_MIN_I16:
6666 case AMDGPU::V_PK_MIN_U16:
6667 case AMDGPU::V_PK_MOV_B32:
6668 case AMDGPU::V_PK_MUL_F16:
6669 case AMDGPU::V_PK_MUL_F32:
6670 case AMDGPU::V_PK_MUL_LO_U16:
6671 case AMDGPU::V_PK_SUB_I16:
6672 case AMDGPU::V_PK_SUB_U16:
6673 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6682 unsigned Opc =
MI.getOpcode();
6685 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6688 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6694 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6695 RI.isSGPRReg(MRI, Src0.
getReg()))
6701 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6703 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6709 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6720 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6721 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6722 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6734 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6735 RI.isVGPR(MRI, Src1.
getReg())) {
6748 if (HasImplicitSGPR || !
MI.isCommutable()) {
6765 if (CommutedOpc == -1) {
6770 MI.setDesc(
get(CommutedOpc));
6774 bool Src0Kill = Src0.
isKill();
6778 else if (Src1.
isReg()) {
6793 unsigned Opc =
MI.getOpcode();
6796 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6797 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6798 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6801 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6802 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6803 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6804 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6805 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6806 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6807 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6817 if (VOP3Idx[2] != -1) {
6829 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6830 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6832 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6834 SGPRsUsed.
insert(SGPRReg);
6838 for (
int Idx : VOP3Idx) {
6847 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6859 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6866 if (ConstantBusLimit > 0) {
6878 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6879 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6885 for (
unsigned I = 0;
I < 3; ++
I) {
6898 SRC = RI.getCommonSubClass(SRC, DstRC);
6901 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6903 if (RI.hasAGPRs(VRC)) {
6904 VRC = RI.getEquivalentVGPRClass(VRC);
6907 get(TargetOpcode::COPY), NewSrcReg)
6914 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6920 for (
unsigned i = 0; i < SubRegs; ++i) {
6923 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6924 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6930 get(AMDGPU::REG_SEQUENCE), DstReg);
6931 for (
unsigned i = 0; i < SubRegs; ++i) {
6933 MIB.
addImm(RI.getSubRegFromChannel(i));
6946 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6948 SBase->setReg(SGPR);
6951 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6959 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6960 if (OldSAddrIdx < 0)
6973 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6976 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6977 if (NewVAddrIdx < 0)
6980 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6984 if (OldVAddrIdx >= 0) {
6998 if (OldVAddrIdx == NewVAddrIdx) {
7009 assert(OldSAddrIdx == NewVAddrIdx);
7011 if (OldVAddrIdx >= 0) {
7012 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
7013 AMDGPU::OpName::vdst_in);
7017 if (NewVDstIn != -1) {
7018 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
7024 if (NewVDstIn != -1) {
7025 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7066 unsigned OpSubReg =
Op.getSubReg();
7069 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7085 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7088 bool ImpDef = Def->isImplicitDef();
7089 while (!ImpDef && Def && Def->isCopy()) {
7090 if (Def->getOperand(1).getReg().isPhysical())
7093 ImpDef = Def && Def->isImplicitDef();
7095 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7114 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7120 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7121 unsigned NumSubRegs =
RegSize / 32;
7122 Register VScalarOp = ScalarOp->getReg();
7124 if (NumSubRegs == 1) {
7127 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7132 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7138 CondReg = NewCondReg;
7148 ScalarOp->setReg(CurReg);
7149 ScalarOp->setIsKill();
7153 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7154 "Unhandled register size");
7156 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7163 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7164 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7167 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7168 .
addReg(VScalarOp, VScalarOpUndef,
7169 TRI->getSubRegFromChannel(Idx + 1));
7176 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7183 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7186 if (NumSubRegs <= 2)
7187 Cmp.addReg(VScalarOp);
7189 Cmp.addReg(VScalarOp, VScalarOpUndef,
7190 TRI->getSubRegFromChannel(Idx, 2));
7194 CondReg = NewCondReg;
7204 const auto *SScalarOpRC =
7210 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7211 unsigned Channel = 0;
7212 for (
Register Piece : ReadlanePieces) {
7213 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7217 ScalarOp->setReg(SScalarOp);
7218 ScalarOp->setIsKill();
7254 if (!Begin.isValid())
7256 if (!End.isValid()) {
7262 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7270 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7271 std::numeric_limits<unsigned>::max()) !=
7289 for (
auto I = Begin;
I != AfterMI;
I++) {
7290 for (
auto &MO :
I->all_uses())
7316 MBB.addSuccessor(LoopBB);
7326 for (
auto &Succ : RemainderBB->
successors()) {
7350static std::tuple<unsigned, unsigned>
7358 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7359 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7366 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7383 .
addImm(AMDGPU::sub0_sub1)
7389 return std::tuple(RsrcPtr, NewSRsrc);
7426 if (
MI.getOpcode() == AMDGPU::PHI) {
7428 assert(!RI.isSGPRClass(VRC));
7431 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7433 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7449 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7452 if (RI.hasVGPRs(DstRC)) {
7456 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7458 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7476 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7481 if (DstRC != Src0RC) {
7490 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7492 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7498 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7499 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7500 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7501 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7502 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7503 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7504 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7506 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7519 ? AMDGPU::OpName::rsrc
7520 : AMDGPU::OpName::srsrc;
7525 AMDGPU::OpName SampOpName =
7526 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7535 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7541 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7542 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7547 while (Start->getOpcode() != FrameSetupOpcode)
7550 while (End->getOpcode() != FrameDestroyOpcode)
7554 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7555 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7563 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7567 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7577 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7578 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7579 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7580 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7582 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7589 bool isSoffsetLegal =
true;
7591 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7592 if (SoffsetIdx != -1) {
7596 isSoffsetLegal =
false;
7600 bool isRsrcLegal =
true;
7602 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7603 if (RsrcIdx != -1) {
7605 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7606 isRsrcLegal =
false;
7610 if (isRsrcLegal && isSoffsetLegal)
7638 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7642 unsigned RsrcPtr, NewSRsrc;
7649 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7650 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7656 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7657 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7670 }
else if (!VAddr && ST.hasAddr64()) {
7674 "FIXME: Need to emit flat atomics here");
7676 unsigned RsrcPtr, NewSRsrc;
7702 MIB.
addImm(CPol->getImm());
7707 MIB.
addImm(TFE->getImm());
7727 MI.removeFromParent();
7732 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7733 .addImm(AMDGPU::sub0)
7734 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7735 .addImm(AMDGPU::sub1);
7738 if (!isSoffsetLegal) {
7749 if (!isSoffsetLegal) {
7761 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7762 if (RsrcIdx != -1) {
7763 DeferredList.insert(
MI);
7768 return DeferredList.contains(
MI);
7778 if (!ST.useRealTrue16Insts())
7781 unsigned Opcode =
MI.getOpcode();
7785 OpIdx >=
get(Opcode).getNumOperands() ||
7786 get(Opcode).operands()[
OpIdx].RegClass == -1)
7790 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7794 if (!RI.isVGPRClass(CurrRC))
7797 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7799 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7800 Op.setSubReg(AMDGPU::lo16);
7801 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7811 Op.setReg(NewDstReg);
7823 while (!Worklist.
empty()) {
7837 "Deferred MachineInstr are not supposed to re-populate worklist");
7857 case AMDGPU::S_ADD_I32:
7858 case AMDGPU::S_SUB_I32: {
7862 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7870 case AMDGPU::S_MUL_U64:
7871 if (ST.hasVectorMulU64()) {
7872 NewOpcode = AMDGPU::V_MUL_U64_e64;
7876 splitScalarSMulU64(Worklist, Inst, MDT);
7880 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7881 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7884 splitScalarSMulPseudo(Worklist, Inst, MDT);
7888 case AMDGPU::S_AND_B64:
7889 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7893 case AMDGPU::S_OR_B64:
7894 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7898 case AMDGPU::S_XOR_B64:
7899 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7903 case AMDGPU::S_NAND_B64:
7904 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7908 case AMDGPU::S_NOR_B64:
7909 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7913 case AMDGPU::S_XNOR_B64:
7914 if (ST.hasDLInsts())
7915 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7917 splitScalar64BitXnor(Worklist, Inst, MDT);
7921 case AMDGPU::S_ANDN2_B64:
7922 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7926 case AMDGPU::S_ORN2_B64:
7927 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7931 case AMDGPU::S_BREV_B64:
7932 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7936 case AMDGPU::S_NOT_B64:
7937 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7941 case AMDGPU::S_BCNT1_I32_B64:
7942 splitScalar64BitBCNT(Worklist, Inst);
7946 case AMDGPU::S_BFE_I64:
7947 splitScalar64BitBFE(Worklist, Inst);
7951 case AMDGPU::S_FLBIT_I32_B64:
7952 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7955 case AMDGPU::S_FF1_I32_B64:
7956 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7960 case AMDGPU::S_LSHL_B32:
7961 if (ST.hasOnlyRevVALUShifts()) {
7962 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7966 case AMDGPU::S_ASHR_I32:
7967 if (ST.hasOnlyRevVALUShifts()) {
7968 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7972 case AMDGPU::S_LSHR_B32:
7973 if (ST.hasOnlyRevVALUShifts()) {
7974 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7978 case AMDGPU::S_LSHL_B64:
7979 if (ST.hasOnlyRevVALUShifts()) {
7981 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7982 : AMDGPU::V_LSHLREV_B64_e64;
7986 case AMDGPU::S_ASHR_I64:
7987 if (ST.hasOnlyRevVALUShifts()) {
7988 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7992 case AMDGPU::S_LSHR_B64:
7993 if (ST.hasOnlyRevVALUShifts()) {
7994 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7999 case AMDGPU::S_ABS_I32:
8000 lowerScalarAbs(Worklist, Inst);
8004 case AMDGPU::S_ABSDIFF_I32:
8005 lowerScalarAbsDiff(Worklist, Inst);
8009 case AMDGPU::S_CBRANCH_SCC0:
8010 case AMDGPU::S_CBRANCH_SCC1: {
8013 bool IsSCC = CondReg == AMDGPU::SCC;
8021 case AMDGPU::S_BFE_U64:
8022 case AMDGPU::S_BFM_B64:
8025 case AMDGPU::S_PACK_LL_B32_B16:
8026 case AMDGPU::S_PACK_LH_B32_B16:
8027 case AMDGPU::S_PACK_HL_B32_B16:
8028 case AMDGPU::S_PACK_HH_B32_B16:
8029 movePackToVALU(Worklist, MRI, Inst);
8033 case AMDGPU::S_XNOR_B32:
8034 lowerScalarXnor(Worklist, Inst);
8038 case AMDGPU::S_NAND_B32:
8039 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8043 case AMDGPU::S_NOR_B32:
8044 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8048 case AMDGPU::S_ANDN2_B32:
8049 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8053 case AMDGPU::S_ORN2_B32:
8054 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8062 case AMDGPU::S_ADD_CO_PSEUDO:
8063 case AMDGPU::S_SUB_CO_PSEUDO: {
8064 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8065 ? AMDGPU::V_ADDC_U32_e64
8066 : AMDGPU::V_SUBB_U32_e64;
8067 const auto *CarryRC = RI.getWaveMaskRegClass();
8089 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8093 case AMDGPU::S_UADDO_PSEUDO:
8094 case AMDGPU::S_USUBO_PSEUDO: {
8100 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8101 ? AMDGPU::V_ADD_CO_U32_e64
8102 : AMDGPU::V_SUB_CO_U32_e64;
8114 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8118 case AMDGPU::S_LSHL1_ADD_U32:
8119 case AMDGPU::S_LSHL2_ADD_U32:
8120 case AMDGPU::S_LSHL3_ADD_U32:
8121 case AMDGPU::S_LSHL4_ADD_U32: {
8125 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8126 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8127 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8141 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8145 case AMDGPU::S_CSELECT_B32:
8146 case AMDGPU::S_CSELECT_B64:
8147 lowerSelect(Worklist, Inst, MDT);
8150 case AMDGPU::S_CMP_EQ_I32:
8151 case AMDGPU::S_CMP_LG_I32:
8152 case AMDGPU::S_CMP_GT_I32:
8153 case AMDGPU::S_CMP_GE_I32:
8154 case AMDGPU::S_CMP_LT_I32:
8155 case AMDGPU::S_CMP_LE_I32:
8156 case AMDGPU::S_CMP_EQ_U32:
8157 case AMDGPU::S_CMP_LG_U32:
8158 case AMDGPU::S_CMP_GT_U32:
8159 case AMDGPU::S_CMP_GE_U32:
8160 case AMDGPU::S_CMP_LT_U32:
8161 case AMDGPU::S_CMP_LE_U32:
8162 case AMDGPU::S_CMP_EQ_U64:
8163 case AMDGPU::S_CMP_LG_U64:
8164 case AMDGPU::S_CMP_LT_F32:
8165 case AMDGPU::S_CMP_EQ_F32:
8166 case AMDGPU::S_CMP_LE_F32:
8167 case AMDGPU::S_CMP_GT_F32:
8168 case AMDGPU::S_CMP_LG_F32:
8169 case AMDGPU::S_CMP_GE_F32:
8170 case AMDGPU::S_CMP_O_F32:
8171 case AMDGPU::S_CMP_U_F32:
8172 case AMDGPU::S_CMP_NGE_F32:
8173 case AMDGPU::S_CMP_NLG_F32:
8174 case AMDGPU::S_CMP_NGT_F32:
8175 case AMDGPU::S_CMP_NLE_F32:
8176 case AMDGPU::S_CMP_NEQ_F32:
8177 case AMDGPU::S_CMP_NLT_F32: {
8182 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8196 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8200 case AMDGPU::S_CMP_LT_F16:
8201 case AMDGPU::S_CMP_EQ_F16:
8202 case AMDGPU::S_CMP_LE_F16:
8203 case AMDGPU::S_CMP_GT_F16:
8204 case AMDGPU::S_CMP_LG_F16:
8205 case AMDGPU::S_CMP_GE_F16:
8206 case AMDGPU::S_CMP_O_F16:
8207 case AMDGPU::S_CMP_U_F16:
8208 case AMDGPU::S_CMP_NGE_F16:
8209 case AMDGPU::S_CMP_NLG_F16:
8210 case AMDGPU::S_CMP_NGT_F16:
8211 case AMDGPU::S_CMP_NLE_F16:
8212 case AMDGPU::S_CMP_NEQ_F16:
8213 case AMDGPU::S_CMP_NLT_F16: {
8236 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8240 case AMDGPU::S_CVT_HI_F32_F16: {
8243 if (ST.useRealTrue16Insts()) {
8248 .
addReg(TmpReg, {}, AMDGPU::hi16)
8264 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8268 case AMDGPU::S_MINIMUM_F32:
8269 case AMDGPU::S_MAXIMUM_F32: {
8281 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8285 case AMDGPU::S_MINIMUM_F16:
8286 case AMDGPU::S_MAXIMUM_F16: {
8288 ? &AMDGPU::VGPR_16RegClass
8289 : &AMDGPU::VGPR_32RegClass);
8301 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8305 case AMDGPU::V_S_EXP_F16_e64:
8306 case AMDGPU::V_S_LOG_F16_e64:
8307 case AMDGPU::V_S_RCP_F16_e64:
8308 case AMDGPU::V_S_RSQ_F16_e64:
8309 case AMDGPU::V_S_SQRT_F16_e64: {
8311 ? &AMDGPU::VGPR_16RegClass
8312 : &AMDGPU::VGPR_32RegClass);
8324 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8330 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8338 if (NewOpcode == Opcode) {
8348 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8362 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8369 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8400 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8404 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8410 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8417 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8419 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8424 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8432 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8442 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8443 AMDGPU::OpName::src0_modifiers) >= 0)
8447 NewInstr->addOperand(Src);
8450 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8453 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8455 NewInstr.addImm(
Size);
8456 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8460 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8465 "Scalar BFE is only implemented for constant width and offset");
8473 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8474 AMDGPU::OpName::src1_modifiers) >= 0)
8476 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8478 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8479 AMDGPU::OpName::src2_modifiers) >= 0)
8481 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8483 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8485 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8487 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8493 NewInstr->addOperand(
Op);
8500 if (
Op.getReg() == AMDGPU::SCC) {
8502 if (
Op.isDef() && !
Op.isDead())
8503 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8505 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8510 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8511 Register DstReg = NewInstr->getOperand(0).getReg();
8526 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8530std::pair<bool, MachineBasicBlock *>
8533 if (ST.hasAddNoCarryInsts()) {
8545 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8547 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8548 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8559 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8560 return std::pair(
true, NewBB);
8563 return std::pair(
false,
nullptr);
8580 bool IsSCC = (CondReg == AMDGPU::SCC);
8594 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8599 bool CopyFound =
false;
8600 for (MachineInstr &CandI :
8603 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8605 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8607 .
addReg(CandI.getOperand(1).getReg());
8619 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8628 MachineInstr *NewInst;
8629 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8630 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8645 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8660 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8661 : AMDGPU::V_SUB_CO_U32_e32;
8672 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8689 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8690 : AMDGPU::V_SUB_CO_U32_e32;
8703 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8717 if (ST.hasDLInsts()) {
8727 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8733 bool Src0IsSGPR = Src0.
isReg() &&
8735 bool Src1IsSGPR = Src1.
isReg() &&
8749 }
else if (Src1IsSGPR) {
8767 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8773 unsigned Opcode)
const {
8797 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8802 unsigned Opcode)
const {
8826 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8841 const MCInstrDesc &InstDesc =
get(Opcode);
8842 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8844 &AMDGPU::SGPR_32RegClass;
8846 const TargetRegisterClass *Src0SubRC =
8847 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8850 AMDGPU::sub0, Src0SubRC);
8853 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8854 const TargetRegisterClass *NewDestSubRC =
8855 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8858 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8861 AMDGPU::sub1, Src0SubRC);
8864 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8878 Worklist.
insert(&LoHalf);
8879 Worklist.
insert(&HiHalf);
8885 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
8908 const TargetRegisterClass *Src0SubRC =
8909 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8910 if (RI.isSGPRClass(Src0SubRC))
8911 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8912 const TargetRegisterClass *Src1SubRC =
8913 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8914 if (RI.isSGPRClass(Src1SubRC))
8915 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8919 MachineOperand Op0L =
8921 MachineOperand Op1L =
8923 MachineOperand Op0H =
8925 MachineOperand Op1H =
8944 MachineInstr *Op1L_Op0H =
8950 MachineInstr *Op1H_Op0L =
8956 MachineInstr *Carry =
8961 MachineInstr *LoHalf =
8971 MachineInstr *HiHalf =
8994 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9017 const TargetRegisterClass *Src0SubRC =
9018 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9019 if (RI.isSGPRClass(Src0SubRC))
9020 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9021 const TargetRegisterClass *Src1SubRC =
9022 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9023 if (RI.isSGPRClass(Src1SubRC))
9024 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9028 MachineOperand Op0L =
9030 MachineOperand Op1L =
9034 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9035 ? AMDGPU::V_MUL_HI_U32_e64
9036 : AMDGPU::V_MUL_HI_I32_e64;
9037 MachineInstr *HiHalf =
9040 MachineInstr *LoHalf =
9059 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9075 const MCInstrDesc &InstDesc =
get(Opcode);
9076 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9078 &AMDGPU::SGPR_32RegClass;
9080 const TargetRegisterClass *Src0SubRC =
9081 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9082 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9084 &AMDGPU::SGPR_32RegClass;
9086 const TargetRegisterClass *Src1SubRC =
9087 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9090 AMDGPU::sub0, Src0SubRC);
9092 AMDGPU::sub0, Src1SubRC);
9094 AMDGPU::sub1, Src0SubRC);
9096 AMDGPU::sub1, Src1SubRC);
9099 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9100 const TargetRegisterClass *NewDestSubRC =
9101 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9104 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9109 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9122 Worklist.
insert(&LoHalf);
9123 Worklist.
insert(&HiHalf);
9126 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9146 MachineOperand* Op0;
9147 MachineOperand* Op1;
9149 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9182 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9183 const TargetRegisterClass *SrcRC = Src.isReg() ?
9185 &AMDGPU::SGPR_32RegClass;
9190 const TargetRegisterClass *SrcSubRC =
9191 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9194 AMDGPU::sub0, SrcSubRC);
9196 AMDGPU::sub1, SrcSubRC);
9206 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9225 Offset == 0 &&
"Not implemented");
9248 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9258 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9261 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9267 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9286 const MCInstrDesc &InstDesc =
get(Opcode);
9288 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9289 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9290 : AMDGPU::V_ADD_CO_U32_e32;
9292 const TargetRegisterClass *SrcRC =
9293 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9294 const TargetRegisterClass *SrcSubRC =
9295 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9297 MachineOperand SrcRegSub0 =
9299 MachineOperand SrcRegSub1 =
9312 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9318 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9322 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9325void SIInstrInfo::addUsersToMoveToVALUWorklist(
9329 MachineInstr &
UseMI = *MO.getParent();
9333 switch (
UseMI.getOpcode()) {
9336 case AMDGPU::SOFT_WQM:
9337 case AMDGPU::STRICT_WWM:
9338 case AMDGPU::STRICT_WQM:
9339 case AMDGPU::REG_SEQUENCE:
9341 case AMDGPU::INSERT_SUBREG:
9344 OpNo = MO.getOperandNo();
9351 if (!RI.hasVectorRegisters(OpRC))
9368 if (ST.useRealTrue16Insts()) {
9370 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9373 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9379 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9382 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9391 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9393 case AMDGPU::S_PACK_LL_B32_B16:
9395 .addReg(SrcReg0, {},
9396 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9397 .addImm(AMDGPU::lo16)
9398 .addReg(SrcReg1, {},
9399 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9400 .addImm(AMDGPU::hi16);
9402 case AMDGPU::S_PACK_LH_B32_B16:
9404 .addReg(SrcReg0, {},
9405 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9406 .addImm(AMDGPU::lo16)
9407 .addReg(SrcReg1, {}, AMDGPU::hi16)
9408 .addImm(AMDGPU::hi16);
9410 case AMDGPU::S_PACK_HL_B32_B16:
9411 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9412 .addImm(AMDGPU::lo16)
9413 .addReg(SrcReg1, {},
9414 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9415 .addImm(AMDGPU::hi16);
9417 case AMDGPU::S_PACK_HH_B32_B16:
9418 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9419 .addImm(AMDGPU::lo16)
9420 .addReg(SrcReg1, {}, AMDGPU::hi16)
9421 .addImm(AMDGPU::hi16);
9429 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9434 case AMDGPU::S_PACK_LL_B32_B16: {
9453 case AMDGPU::S_PACK_LH_B32_B16: {
9463 case AMDGPU::S_PACK_HL_B32_B16: {
9474 case AMDGPU::S_PACK_HH_B32_B16: {
9494 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9503 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9504 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9505 SmallVector<MachineInstr *, 4> CopyToDelete;
9508 for (MachineInstr &
MI :
9512 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9515 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9516 Register DestReg =
MI.getOperand(0).getReg();
9523 MI.getOperand(SCCIdx).setReg(NewCond);
9529 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9532 for (
auto &Copy : CopyToDelete)
9533 Copy->eraseFromParent();
9541void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9547 for (MachineInstr &
MI :
9550 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9552 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9561 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9569 case AMDGPU::REG_SEQUENCE:
9570 case AMDGPU::INSERT_SUBREG:
9572 case AMDGPU::SOFT_WQM:
9573 case AMDGPU::STRICT_WWM:
9574 case AMDGPU::STRICT_WQM: {
9576 if (RI.isAGPRClass(SrcRC)) {
9577 if (RI.isAGPRClass(NewDstRC))
9582 case AMDGPU::REG_SEQUENCE:
9583 case AMDGPU::INSERT_SUBREG:
9584 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9587 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9593 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9596 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9610 int OpIndices[3])
const {
9611 const MCInstrDesc &
Desc =
MI.getDesc();
9627 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9629 for (
unsigned i = 0; i < 3; ++i) {
9630 int Idx = OpIndices[i];
9634 const MachineOperand &MO =
MI.getOperand(Idx);
9640 const TargetRegisterClass *OpRC =
9641 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9642 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9649 if (RI.isSGPRClass(RegRC))
9667 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9668 SGPRReg = UsedSGPRs[0];
9671 if (!SGPRReg && UsedSGPRs[1]) {
9672 if (UsedSGPRs[1] == UsedSGPRs[2])
9673 SGPRReg = UsedSGPRs[1];
9680 AMDGPU::OpName OperandName)
const {
9681 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9684 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9688 return &
MI.getOperand(Idx);
9702 if (ST.isAmdHsaOS()) {
9705 RsrcDataFormat |= (1ULL << 56);
9710 RsrcDataFormat |= (2ULL << 59);
9713 return RsrcDataFormat;
9723 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9728 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9735 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9741 unsigned Opc =
MI.getOpcode();
9747 return get(
Opc).mayLoad() &&
9752 int &FrameIndex)
const {
9754 if (!Addr || !Addr->
isFI())
9765 int &FrameIndex)
const {
9773 int &FrameIndex)
const {
9787 int &FrameIndex)
const {
9804 while (++
I != E &&
I->isInsideBundle()) {
9805 assert(!
I->isBundle() &&
"No nested bundle!");
9813 unsigned Opc =
MI.getOpcode();
9815 unsigned DescSize =
Desc.getSize();
9820 unsigned Size = DescSize;
9824 if (
MI.isBranch() && ST.hasOffset3fBug())
9835 bool HasLiteral =
false;
9836 unsigned LiteralSize = 4;
9837 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9842 if (ST.has64BitLiterals()) {
9843 switch (OpInfo.OperandType) {
9866 return HasLiteral ? DescSize + LiteralSize : DescSize;
9871 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9875 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9876 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9880 case TargetOpcode::BUNDLE:
9882 case TargetOpcode::INLINEASM:
9883 case TargetOpcode::INLINEASM_BR: {
9885 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9889 if (
MI.isMetaInstruction())
9893 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9896 unsigned LoInstOpcode = D16Info->LoOp;
9898 DescSize =
Desc.getSize();
9902 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9905 DescSize =
Desc.getSize();
9916 if (
MI.memoperands_empty())
9928 static const std::pair<int, const char *> TargetIndices[] = {
9967std::pair<unsigned, unsigned>
9974 static const std::pair<unsigned, const char *> TargetFlags[] = {
9992 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10008 return AMDGPU::WWM_COPY;
10010 return AMDGPU::COPY;
10027 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10031 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10032 return IsLRSplitInst;
10045 bool IsNullOrVectorRegister =
true;
10049 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10052 return IsNullOrVectorRegister &&
10054 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10055 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10063 if (ST.hasAddNoCarryInsts())
10079 if (ST.hasAddNoCarryInsts())
10083 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10085 : RS.scavengeRegisterBackwards(
10086 *RI.getBoolRC(),
I,
false,
10099 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10100 case AMDGPU::SI_KILL_I1_TERMINATOR:
10109 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10110 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10111 case AMDGPU::SI_KILL_I1_PSEUDO:
10112 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10124 const unsigned OffsetBits =
10126 return (1 << OffsetBits) - 1;
10130 if (!ST.isWave32())
10133 if (
MI.isInlineAsm())
10136 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10139 for (
auto &
Op :
MI.implicit_operands()) {
10140 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10141 Op.setReg(AMDGPU::VCC_LO);
10150 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10154 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10155 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10171 if (Imm > MaxImm) {
10172 if (Imm <= MaxImm + 64) {
10174 Overflow = Imm - MaxImm;
10193 if (Overflow > 0) {
10201 if (ST.hasRestrictedSOffset())
10206 SOffset = Overflow;
10244 if (!ST.hasFlatInstOffsets())
10252 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10264std::pair<int64_t, int64_t>
10267 int64_t RemainderOffset = COffsetVal;
10268 int64_t ImmField = 0;
10273 if (AllowNegative) {
10275 int64_t
D = 1LL << NumBits;
10276 RemainderOffset = (COffsetVal /
D) *
D;
10277 ImmField = COffsetVal - RemainderOffset;
10279 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10281 (ImmField % 4) != 0) {
10283 RemainderOffset += ImmField % 4;
10284 ImmField -= ImmField % 4;
10286 }
else if (COffsetVal >= 0) {
10288 RemainderOffset = COffsetVal - ImmField;
10292 assert(RemainderOffset + ImmField == COffsetVal);
10293 return {ImmField, RemainderOffset};
10297 if (ST.hasNegativeScratchOffsetBug() &&
10305 switch (ST.getGeneration()) {
10334 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10335 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10336 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10337 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10338 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10339 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10340 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10341 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10348#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10349 case OPCODE##_dpp: \
10350 case OPCODE##_e32: \
10351 case OPCODE##_e64: \
10352 case OPCODE##_e64_dpp: \
10353 case OPCODE##_sdwa:
10367 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10368 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10369 case AMDGPU::V_FMA_F16_gfx9_e64:
10370 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10371 case AMDGPU::V_INTERP_P2_F16:
10372 case AMDGPU::V_MAD_F16_e64:
10373 case AMDGPU::V_MAD_U16_e64:
10374 case AMDGPU::V_MAD_I16_e64:
10383 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10397 switch (ST.getGeneration()) {
10410 if (
isMAI(Opcode)) {
10418 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10421 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10428 if (ST.hasGFX90AInsts()) {
10429 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10430 if (ST.hasGFX940Insts())
10432 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10434 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10436 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10442 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10461 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10462 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10463 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10475 switch (
MI.getOpcode()) {
10477 case AMDGPU::REG_SEQUENCE:
10481 case AMDGPU::INSERT_SUBREG:
10482 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10499 if (!
P.Reg.isVirtual())
10504 while (
auto *
MI = DefInst) {
10506 switch (
MI->getOpcode()) {
10508 case AMDGPU::V_MOV_B32_e32: {
10509 auto &Op1 =
MI->getOperand(1);
10538 auto *DefBB =
DefMI.getParent();
10542 if (
UseMI.getParent() != DefBB)
10545 const int MaxInstScan = 20;
10549 auto E =
UseMI.getIterator();
10550 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10551 if (
I->isDebugInstr())
10554 if (++NumInst > MaxInstScan)
10557 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10570 auto *DefBB =
DefMI.getParent();
10572 const int MaxUseScan = 10;
10576 auto &UseInst = *
Use.getParent();
10579 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10582 if (++NumUse > MaxUseScan)
10589 const int MaxInstScan = 20;
10593 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10596 if (
I->isDebugInstr())
10599 if (++NumInst > MaxInstScan)
10612 if (Reg == VReg && --NumUse == 0)
10614 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10623 auto Cur =
MBB.begin();
10624 if (Cur !=
MBB.end())
10626 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10629 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10638 if (InsPt !=
MBB.end() &&
10639 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10640 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10641 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10642 InsPt->definesRegister(Src,
nullptr)) {
10646 .
addReg(Src, {}, SrcSubReg)
10671 if (isFullCopyInstr(
MI)) {
10672 Register DstReg =
MI.getOperand(0).getReg();
10673 Register SrcReg =
MI.getOperand(1).getReg();
10695 unsigned *PredCost)
const {
10696 if (
MI.isBundle()) {
10699 unsigned Lat = 0,
Count = 0;
10700 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10702 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10704 return Lat +
Count - 1;
10707 return SchedModel.computeInstrLatency(&
MI);
10714 return *CallAddrOp;
10721 unsigned Opcode =
MI.getOpcode();
10723 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10726 :
MI.getOperand(1).getReg();
10730 unsigned SrcAS = SrcTy.getAddressSpace();
10733 ST.hasGloballyAddressableScratch()
10741 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10742 return HandleAddrSpaceCast(
MI);
10745 auto IID = GI->getIntrinsicID();
10752 case Intrinsic::amdgcn_addrspacecast_nonnull:
10753 return HandleAddrSpaceCast(
MI);
10754 case Intrinsic::amdgcn_if:
10755 case Intrinsic::amdgcn_else:
10769 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10770 Opcode == AMDGPU::G_SEXTLOAD) {
10771 if (
MI.memoperands_empty())
10775 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10776 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10784 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10785 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10786 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10795 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10796 return Formatter.get();
10805 unsigned opcode =
MI.getOpcode();
10806 if (opcode == AMDGPU::V_READLANE_B32 ||
10807 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10808 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10811 if (isCopyInstr(
MI)) {
10815 RI.getPhysRegBaseClass(srcOp.
getReg());
10823 if (
MI.isPreISelOpcode())
10838 if (
MI.memoperands_empty())
10842 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10843 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10858 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10860 if (!
SrcOp.isReg())
10864 if (!Reg || !
SrcOp.readsReg())
10870 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10897 F,
"ds_ordered_count unsupported for this calling conv"));
10911 Register &SrcReg2, int64_t &CmpMask,
10912 int64_t &CmpValue)
const {
10913 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10916 switch (
MI.getOpcode()) {
10919 case AMDGPU::S_CMP_EQ_U32:
10920 case AMDGPU::S_CMP_EQ_I32:
10921 case AMDGPU::S_CMP_LG_U32:
10922 case AMDGPU::S_CMP_LG_I32:
10923 case AMDGPU::S_CMP_LT_U32:
10924 case AMDGPU::S_CMP_LT_I32:
10925 case AMDGPU::S_CMP_GT_U32:
10926 case AMDGPU::S_CMP_GT_I32:
10927 case AMDGPU::S_CMP_LE_U32:
10928 case AMDGPU::S_CMP_LE_I32:
10929 case AMDGPU::S_CMP_GE_U32:
10930 case AMDGPU::S_CMP_GE_I32:
10931 case AMDGPU::S_CMP_EQ_U64:
10932 case AMDGPU::S_CMP_LG_U64:
10933 SrcReg =
MI.getOperand(0).getReg();
10934 if (
MI.getOperand(1).isReg()) {
10935 if (
MI.getOperand(1).getSubReg())
10937 SrcReg2 =
MI.getOperand(1).getReg();
10939 }
else if (
MI.getOperand(1).isImm()) {
10941 CmpValue =
MI.getOperand(1).getImm();
10947 case AMDGPU::S_CMPK_EQ_U32:
10948 case AMDGPU::S_CMPK_EQ_I32:
10949 case AMDGPU::S_CMPK_LG_U32:
10950 case AMDGPU::S_CMPK_LG_I32:
10951 case AMDGPU::S_CMPK_LT_U32:
10952 case AMDGPU::S_CMPK_LT_I32:
10953 case AMDGPU::S_CMPK_GT_U32:
10954 case AMDGPU::S_CMPK_GT_I32:
10955 case AMDGPU::S_CMPK_LE_U32:
10956 case AMDGPU::S_CMPK_LE_I32:
10957 case AMDGPU::S_CMPK_GE_U32:
10958 case AMDGPU::S_CMPK_GE_I32:
10959 SrcReg =
MI.getOperand(0).getReg();
10961 CmpValue =
MI.getOperand(1).getImm();
10971 if (S->isLiveIn(AMDGPU::SCC))
10980bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10983 bool SCCIsDead =
false;
10986 constexpr unsigned ScanLimit = 12;
10987 unsigned Count = 0;
10988 for (MachineInstr &
MI :
10990 if (++
Count > ScanLimit)
10992 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10993 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10994 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10995 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10996 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11001 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11014 for (MachineInstr *
MI : InvertInstr) {
11015 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11016 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11018 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11019 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11020 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11021 ? AMDGPU::S_CBRANCH_SCC1
11022 : AMDGPU::S_CBRANCH_SCC0));
11035 bool NeedInversion)
const {
11036 MachineInstr *KillsSCC =
nullptr;
11041 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11043 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11046 if (NeedInversion && !invertSCCUse(SCCRedefine))
11048 if (MachineOperand *SccDef =
11050 SccDef->setIsDead(
false);
11058 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11059 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11061 bool Op1IsNonZeroImm =
11062 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11063 bool Op2IsZeroImm =
11064 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11065 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11071 unsigned &NewDefOpc) {
11074 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11075 Def.getOpcode() != AMDGPU::S_ADD_U32)
11081 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11087 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11089 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11092 NewDefOpc = AMDGPU::S_ADD_U32;
11094 NeedInversion = !NeedInversion;
11099 Register SrcReg2, int64_t CmpMask,
11108 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11109 this](
bool NeedInversion) ->
bool {
11133 unsigned NewDefOpc = Def->getOpcode();
11139 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11142 if (NewDefOpc != Def->getOpcode())
11143 Def->setDesc(
get(NewDefOpc));
11152 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11159 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11167 optimizeSCC(
Select, Def,
false);
11174 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11175 this](int64_t ExpectedValue,
unsigned SrcSize,
11176 bool IsReversible,
bool IsSigned) ->
bool {
11204 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11205 Def->getOpcode() != AMDGPU::S_AND_B64)
11209 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11220 SrcOp = &Def->getOperand(2);
11221 else if (isMask(&Def->getOperand(2)))
11222 SrcOp = &Def->getOperand(1);
11230 if (IsSigned && BitNo == SrcSize - 1)
11233 ExpectedValue <<= BitNo;
11235 bool IsReversedCC =
false;
11236 if (CmpValue != ExpectedValue) {
11239 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11244 Register DefReg = Def->getOperand(0).getReg();
11248 if (!optimizeSCC(Def, &CmpInstr,
false))
11259 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11260 : AMDGPU::S_BITCMP1_B32
11261 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11262 : AMDGPU::S_BITCMP1_B64;
11267 Def->eraseFromParent();
11275 case AMDGPU::S_CMP_EQ_U32:
11276 case AMDGPU::S_CMP_EQ_I32:
11277 case AMDGPU::S_CMPK_EQ_U32:
11278 case AMDGPU::S_CMPK_EQ_I32:
11279 return optimizeCmpAnd(1, 32,
true,
false) ||
11280 optimizeCmpSelect(
true);
11281 case AMDGPU::S_CMP_GE_U32:
11282 case AMDGPU::S_CMPK_GE_U32:
11283 return optimizeCmpAnd(1, 32,
false,
false);
11284 case AMDGPU::S_CMP_GE_I32:
11285 case AMDGPU::S_CMPK_GE_I32:
11286 return optimizeCmpAnd(1, 32,
false,
true);
11287 case AMDGPU::S_CMP_EQ_U64:
11288 return optimizeCmpAnd(1, 64,
true,
false);
11289 case AMDGPU::S_CMP_LG_U32:
11290 case AMDGPU::S_CMP_LG_I32:
11291 case AMDGPU::S_CMPK_LG_U32:
11292 case AMDGPU::S_CMPK_LG_I32:
11293 return optimizeCmpAnd(0, 32,
true,
false) ||
11294 optimizeCmpSelect(
false);
11295 case AMDGPU::S_CMP_GT_U32:
11296 case AMDGPU::S_CMPK_GT_U32:
11297 return optimizeCmpAnd(0, 32,
false,
false);
11298 case AMDGPU::S_CMP_GT_I32:
11299 case AMDGPU::S_CMPK_GT_I32:
11300 return optimizeCmpAnd(0, 32,
false,
true);
11301 case AMDGPU::S_CMP_LG_U64:
11302 return optimizeCmpAnd(0, 64,
true,
false) ||
11303 optimizeCmpSelect(
false);
11310 AMDGPU::OpName
OpName)
const {
11311 if (!ST.needsAlignedVGPRs())
11314 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11326 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11328 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11332 : &AMDGPU::VReg_64_Align2RegClass);
11334 .
addReg(DataReg, {},
Op.getSubReg())
11339 Op.setSubReg(AMDGPU::sub0);
11354 if (ST.hasGFX1250Insts())
11361 unsigned Opcode =
MI.getOpcode();
11367 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11368 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11371 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static MachineBasicBlock * loadScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.