33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO:
1330 case AMDGPU::V_MOV_B16_t16_e32: {
1334 return MI.getOperand(0).getReg() == Reg;
1339 case AMDGPU::V_MOV_B16_t16_e64: {
1341 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_BREV_B32:
1349 case AMDGPU::V_BFREV_B32_e32:
1350 case AMDGPU::V_BFREV_B32_e64: {
1354 return MI.getOperand(0).getReg() == Reg;
1359 case AMDGPU::S_NOT_B32:
1360 case AMDGPU::V_NOT_B32_e32:
1361 case AMDGPU::V_NOT_B32_e64: {
1364 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1365 return MI.getOperand(0).getReg() == Reg;
1375std::optional<int64_t>
1380 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1381 return std::nullopt;
1384 if (Def && Def->isMoveImmediate()) {
1390 return std::nullopt;
1395 if (RI.isAGPRClass(DstRC))
1396 return AMDGPU::COPY;
1397 if (RI.getRegSizeInBits(*DstRC) == 16) {
1400 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1402 if (RI.getRegSizeInBits(*DstRC) == 32)
1403 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1404 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1405 return AMDGPU::S_MOV_B64;
1406 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1407 return AMDGPU::V_MOV_B64_PSEUDO;
1408 return AMDGPU::COPY;
1413 bool IsIndirectSrc)
const {
1414 if (IsIndirectSrc) {
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1432 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1434 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1436 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1438 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1440 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1441 if (VecSize <= 1024)
1442 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1460 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1462 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1464 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1466 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1468 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1470 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1472 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1473 if (VecSize <= 1024)
1474 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1491 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1493 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1495 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1524 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1526 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1538 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1539 if (VecSize <= 1024)
1540 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1547 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1549 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1551 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1553 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1554 if (VecSize <= 1024)
1555 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1562 bool IsSGPR)
const {
1574 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1581 return AMDGPU::SI_SPILL_S32_SAVE;
1583 return AMDGPU::SI_SPILL_S64_SAVE;
1585 return AMDGPU::SI_SPILL_S96_SAVE;
1587 return AMDGPU::SI_SPILL_S128_SAVE;
1589 return AMDGPU::SI_SPILL_S160_SAVE;
1591 return AMDGPU::SI_SPILL_S192_SAVE;
1593 return AMDGPU::SI_SPILL_S224_SAVE;
1595 return AMDGPU::SI_SPILL_S256_SAVE;
1597 return AMDGPU::SI_SPILL_S288_SAVE;
1599 return AMDGPU::SI_SPILL_S320_SAVE;
1601 return AMDGPU::SI_SPILL_S352_SAVE;
1603 return AMDGPU::SI_SPILL_S384_SAVE;
1605 return AMDGPU::SI_SPILL_S512_SAVE;
1607 return AMDGPU::SI_SPILL_S1024_SAVE;
1616 return AMDGPU::SI_SPILL_V16_SAVE;
1618 return AMDGPU::SI_SPILL_V32_SAVE;
1620 return AMDGPU::SI_SPILL_V64_SAVE;
1622 return AMDGPU::SI_SPILL_V96_SAVE;
1624 return AMDGPU::SI_SPILL_V128_SAVE;
1626 return AMDGPU::SI_SPILL_V160_SAVE;
1628 return AMDGPU::SI_SPILL_V192_SAVE;
1630 return AMDGPU::SI_SPILL_V224_SAVE;
1632 return AMDGPU::SI_SPILL_V256_SAVE;
1634 return AMDGPU::SI_SPILL_V288_SAVE;
1636 return AMDGPU::SI_SPILL_V320_SAVE;
1638 return AMDGPU::SI_SPILL_V352_SAVE;
1640 return AMDGPU::SI_SPILL_V384_SAVE;
1642 return AMDGPU::SI_SPILL_V512_SAVE;
1644 return AMDGPU::SI_SPILL_V1024_SAVE;
1653 return AMDGPU::SI_SPILL_AV32_SAVE;
1655 return AMDGPU::SI_SPILL_AV64_SAVE;
1657 return AMDGPU::SI_SPILL_AV96_SAVE;
1659 return AMDGPU::SI_SPILL_AV128_SAVE;
1661 return AMDGPU::SI_SPILL_AV160_SAVE;
1663 return AMDGPU::SI_SPILL_AV192_SAVE;
1665 return AMDGPU::SI_SPILL_AV224_SAVE;
1667 return AMDGPU::SI_SPILL_AV256_SAVE;
1669 return AMDGPU::SI_SPILL_AV288_SAVE;
1671 return AMDGPU::SI_SPILL_AV320_SAVE;
1673 return AMDGPU::SI_SPILL_AV352_SAVE;
1675 return AMDGPU::SI_SPILL_AV384_SAVE;
1677 return AMDGPU::SI_SPILL_AV512_SAVE;
1679 return AMDGPU::SI_SPILL_AV1024_SAVE;
1686 bool IsVectorSuperClass) {
1691 if (IsVectorSuperClass)
1692 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1694 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1700 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1707 if (ST.hasMAIInsts())
1726 FrameInfo.getObjectAlign(FrameIndex));
1727 unsigned SpillSize = RI.getSpillSize(*RC);
1730 if (RI.isSGPRClass(RC)) {
1732 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1733 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1734 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1742 if (SrcReg.
isVirtual() && SpillSize == 4) {
1743 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1752 if (RI.spillSGPRToVGPR())
1772 return AMDGPU::SI_SPILL_S32_RESTORE;
1774 return AMDGPU::SI_SPILL_S64_RESTORE;
1776 return AMDGPU::SI_SPILL_S96_RESTORE;
1778 return AMDGPU::SI_SPILL_S128_RESTORE;
1780 return AMDGPU::SI_SPILL_S160_RESTORE;
1782 return AMDGPU::SI_SPILL_S192_RESTORE;
1784 return AMDGPU::SI_SPILL_S224_RESTORE;
1786 return AMDGPU::SI_SPILL_S256_RESTORE;
1788 return AMDGPU::SI_SPILL_S288_RESTORE;
1790 return AMDGPU::SI_SPILL_S320_RESTORE;
1792 return AMDGPU::SI_SPILL_S352_RESTORE;
1794 return AMDGPU::SI_SPILL_S384_RESTORE;
1796 return AMDGPU::SI_SPILL_S512_RESTORE;
1798 return AMDGPU::SI_SPILL_S1024_RESTORE;
1807 return AMDGPU::SI_SPILL_V16_RESTORE;
1809 return AMDGPU::SI_SPILL_V32_RESTORE;
1811 return AMDGPU::SI_SPILL_V64_RESTORE;
1813 return AMDGPU::SI_SPILL_V96_RESTORE;
1815 return AMDGPU::SI_SPILL_V128_RESTORE;
1817 return AMDGPU::SI_SPILL_V160_RESTORE;
1819 return AMDGPU::SI_SPILL_V192_RESTORE;
1821 return AMDGPU::SI_SPILL_V224_RESTORE;
1823 return AMDGPU::SI_SPILL_V256_RESTORE;
1825 return AMDGPU::SI_SPILL_V288_RESTORE;
1827 return AMDGPU::SI_SPILL_V320_RESTORE;
1829 return AMDGPU::SI_SPILL_V352_RESTORE;
1831 return AMDGPU::SI_SPILL_V384_RESTORE;
1833 return AMDGPU::SI_SPILL_V512_RESTORE;
1835 return AMDGPU::SI_SPILL_V1024_RESTORE;
1844 return AMDGPU::SI_SPILL_AV32_RESTORE;
1846 return AMDGPU::SI_SPILL_AV64_RESTORE;
1848 return AMDGPU::SI_SPILL_AV96_RESTORE;
1850 return AMDGPU::SI_SPILL_AV128_RESTORE;
1852 return AMDGPU::SI_SPILL_AV160_RESTORE;
1854 return AMDGPU::SI_SPILL_AV192_RESTORE;
1856 return AMDGPU::SI_SPILL_AV224_RESTORE;
1858 return AMDGPU::SI_SPILL_AV256_RESTORE;
1860 return AMDGPU::SI_SPILL_AV288_RESTORE;
1862 return AMDGPU::SI_SPILL_AV320_RESTORE;
1864 return AMDGPU::SI_SPILL_AV352_RESTORE;
1866 return AMDGPU::SI_SPILL_AV384_RESTORE;
1868 return AMDGPU::SI_SPILL_AV512_RESTORE;
1870 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1877 bool IsVectorSuperClass) {
1882 if (IsVectorSuperClass)
1883 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1885 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1891 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1898 if (ST.hasMAIInsts())
1901 assert(!RI.isAGPRClass(RC));
1915 unsigned SpillSize = RI.getSpillSize(*RC);
1922 FrameInfo.getObjectAlign(FrameIndex));
1924 if (RI.isSGPRClass(RC)) {
1926 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1927 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1928 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1933 if (DestReg.
isVirtual() && SpillSize == 4) {
1935 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1938 if (RI.spillSGPRToVGPR())
1964 unsigned Quantity)
const {
1966 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1967 while (Quantity > 0) {
1968 unsigned Arg = std::min(Quantity, MaxSNopCount);
1975 auto *MF =
MBB.getParent();
1978 assert(Info->isEntryFunction());
1980 if (
MBB.succ_empty()) {
1981 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1982 if (HasNoTerminator) {
1983 if (Info->returnsVoid()) {
1997 constexpr unsigned DoorbellIDMask = 0x3ff;
1998 constexpr unsigned ECQueueWaveAbort = 0x400;
2003 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2004 MBB.splitAt(
MI,
false);
2008 MBB.addSuccessor(TrapBB);
2014 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2021 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2026 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2027 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2028 .
addUse(DoorbellRegMasked)
2029 .
addImm(ECQueueWaveAbort);
2030 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2031 .
addUse(SetWaveAbortBit);
2034 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2045 return MBB.getNextNode();
2049 switch (
MI.getOpcode()) {
2051 if (
MI.isMetaInstruction())
2056 return MI.getOperand(0).getImm() + 1;
2066 switch (
MI.getOpcode()) {
2068 case AMDGPU::S_MOV_B64_term:
2071 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2074 case AMDGPU::S_MOV_B32_term:
2077 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2080 case AMDGPU::S_XOR_B64_term:
2083 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2086 case AMDGPU::S_XOR_B32_term:
2089 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2091 case AMDGPU::S_OR_B64_term:
2094 MI.setDesc(
get(AMDGPU::S_OR_B64));
2096 case AMDGPU::S_OR_B32_term:
2099 MI.setDesc(
get(AMDGPU::S_OR_B32));
2102 case AMDGPU::S_ANDN2_B64_term:
2105 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2108 case AMDGPU::S_ANDN2_B32_term:
2111 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2114 case AMDGPU::S_AND_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_B64));
2120 case AMDGPU::S_AND_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_AND_B32));
2126 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2129 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2132 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2135 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2138 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2139 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2142 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2143 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2145 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2149 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2152 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2155 int64_t Imm =
MI.getOperand(1).getImm();
2157 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2158 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2165 MI.eraseFromParent();
2171 case AMDGPU::V_MOV_B64_PSEUDO: {
2173 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2174 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2182 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2183 MI.setDesc(Mov64Desc);
2188 if (
SrcOp.isImm()) {
2190 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2191 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2217 if (ST.hasPkMovB32() &&
2238 MI.eraseFromParent();
2241 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2245 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2249 if (ST.has64BitLiterals()) {
2250 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2256 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2261 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2262 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2264 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2265 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2272 MI.eraseFromParent();
2275 case AMDGPU::V_SET_INACTIVE_B32: {
2279 .
add(
MI.getOperand(3))
2280 .
add(
MI.getOperand(4))
2281 .
add(
MI.getOperand(1))
2282 .
add(
MI.getOperand(2))
2283 .
add(
MI.getOperand(5));
2284 MI.eraseFromParent();
2287 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2288 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2289 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2290 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2301 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2302 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2303 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2304 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2323 if (RI.hasVGPRs(EltRC)) {
2324 Opc = AMDGPU::V_MOVRELD_B32_e32;
2326 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2327 : AMDGPU::S_MOVRELD_B32;
2332 bool IsUndef =
MI.getOperand(1).isUndef();
2333 unsigned SubReg =
MI.getOperand(3).getImm();
2334 assert(VecReg ==
MI.getOperand(1).getReg());
2339 .
add(
MI.getOperand(2))
2343 const int ImpDefIdx =
2345 const int ImpUseIdx = ImpDefIdx + 1;
2347 MI.eraseFromParent();
2350 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2351 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2364 assert(ST.useVGPRIndexMode());
2366 bool IsUndef =
MI.getOperand(1).isUndef();
2375 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2379 .
add(
MI.getOperand(2))
2383 const int ImpDefIdx =
2385 const int ImpUseIdx = ImpDefIdx + 1;
2392 MI.eraseFromParent();
2395 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2409 assert(ST.useVGPRIndexMode());
2412 bool IsUndef =
MI.getOperand(1).isUndef();
2430 MI.eraseFromParent();
2433 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2436 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2437 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2456 if (ST.hasGetPCZeroExtension()) {
2460 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2467 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2477 MI.eraseFromParent();
2480 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2490 Op.setOffset(
Op.getOffset() + 4);
2492 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2496 MI.eraseFromParent();
2499 case AMDGPU::ENTER_STRICT_WWM: {
2505 case AMDGPU::ENTER_STRICT_WQM: {
2512 MI.eraseFromParent();
2515 case AMDGPU::EXIT_STRICT_WWM:
2516 case AMDGPU::EXIT_STRICT_WQM: {
2522 case AMDGPU::SI_RETURN: {
2536 MI.eraseFromParent();
2540 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2541 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2542 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2545 case AMDGPU::S_GETPC_B64_pseudo:
2546 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2547 if (ST.hasGetPCZeroExtension()) {
2549 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2558 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2559 assert(ST.hasBF16PackedInsts());
2560 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2585 case AMDGPU::S_LOAD_DWORDX16_IMM:
2586 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2599 for (
auto &CandMO :
I->operands()) {
2600 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2608 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2612 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2616 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2618 unsigned NewOpcode = -1;
2619 if (SubregSize == 256)
2620 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2621 else if (SubregSize == 128)
2622 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2629 MRI.setRegClass(DestReg, NewRC);
2632 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2637 MI->getOperand(0).setReg(DestReg);
2638 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2642 OffsetMO->
setImm(FinalOffset);
2648 MI->setMemRefs(*MF, NewMMOs);
2661std::pair<MachineInstr*, MachineInstr*>
2663 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2665 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2668 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2669 return std::pair(&
MI,
nullptr);
2680 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2682 if (Dst.isPhysical()) {
2683 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2686 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2690 for (
unsigned I = 1;
I <= 2; ++
I) {
2693 if (
SrcOp.isImm()) {
2695 Imm.ashrInPlace(Part * 32);
2696 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2700 if (Src.isPhysical())
2701 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2708 MovDPP.addImm(MO.getImm());
2710 Split[Part] = MovDPP;
2714 if (Dst.isVirtual())
2721 MI.eraseFromParent();
2722 return std::pair(Split[0], Split[1]);
2725std::optional<DestSourcePair>
2727 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2730 return std::nullopt;
2734 AMDGPU::OpName Src0OpName,
2736 AMDGPU::OpName Src1OpName)
const {
2743 "All commutable instructions have both src0 and src1 modifiers");
2745 int Src0ModsVal = Src0Mods->
getImm();
2746 int Src1ModsVal = Src1Mods->
getImm();
2748 Src1Mods->
setImm(Src0ModsVal);
2749 Src0Mods->
setImm(Src1ModsVal);
2758 bool IsKill = RegOp.
isKill();
2760 bool IsUndef = RegOp.
isUndef();
2761 bool IsDebug = RegOp.
isDebug();
2763 if (NonRegOp.
isImm())
2765 else if (NonRegOp.
isFI())
2786 int64_t NonRegVal = NonRegOp1.
getImm();
2789 NonRegOp2.
setImm(NonRegVal);
2796 unsigned OpIdx1)
const {
2801 unsigned Opc =
MI.getOpcode();
2802 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2812 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2815 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2820 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2826 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2841 unsigned Src1Idx)
const {
2842 assert(!NewMI &&
"this should never be used");
2844 unsigned Opc =
MI.getOpcode();
2846 if (CommutedOpcode == -1)
2849 if (Src0Idx > Src1Idx)
2852 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2853 static_cast<int>(Src0Idx) &&
2854 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2855 static_cast<int>(Src1Idx) &&
2856 "inconsistency with findCommutedOpIndices");
2881 Src1, AMDGPU::OpName::src1_modifiers);
2884 AMDGPU::OpName::src1_sel);
2896 unsigned &SrcOpIdx0,
2897 unsigned &SrcOpIdx1)
const {
2902 unsigned &SrcOpIdx0,
2903 unsigned &SrcOpIdx1)
const {
2904 if (!
Desc.isCommutable())
2907 unsigned Opc =
Desc.getOpcode();
2908 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2912 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2916 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2920 int64_t BrOffset)
const {
2937 return MI.getOperand(0).getMBB();
2942 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2943 MI.getOpcode() == AMDGPU::SI_LOOP)
2955 "new block should be inserted for expanding unconditional branch");
2958 "restore block should be inserted for restoring clobbered registers");
2966 if (ST.useAddPC64Inst()) {
2968 MCCtx.createTempSymbol(
"offset",
true);
2972 MCCtx.createTempSymbol(
"post_addpc",
true);
2973 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2977 Offset->setVariableValue(OffsetExpr);
2981 assert(RS &&
"RegScavenger required for long branching");
2985 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2989 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2990 ST.hasVALUReadSGPRHazard();
2991 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2992 if (FlushSGPRWrites)
3000 ApplyHazardWorkarounds();
3003 MCCtx.createTempSymbol(
"post_getpc",
true);
3007 MCCtx.createTempSymbol(
"offset_lo",
true);
3009 MCCtx.createTempSymbol(
"offset_hi",
true);
3012 .
addReg(PCReg, {}, AMDGPU::sub0)
3016 .
addReg(PCReg, {}, AMDGPU::sub1)
3018 ApplyHazardWorkarounds();
3059 if (LongBranchReservedReg) {
3060 RS->enterBasicBlock(
MBB);
3061 Scav = LongBranchReservedReg;
3063 RS->enterBasicBlockEnd(
MBB);
3064 Scav = RS->scavengeRegisterBackwards(
3069 RS->setRegUsed(Scav);
3070 MRI.replaceRegWith(PCReg, Scav);
3071 MRI.clearVirtRegs();
3077 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3078 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3079 MRI.clearVirtRegs();
3094unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3096 case SIInstrInfo::SCC_TRUE:
3097 return AMDGPU::S_CBRANCH_SCC1;
3098 case SIInstrInfo::SCC_FALSE:
3099 return AMDGPU::S_CBRANCH_SCC0;
3100 case SIInstrInfo::VCCNZ:
3101 return AMDGPU::S_CBRANCH_VCCNZ;
3102 case SIInstrInfo::VCCZ:
3103 return AMDGPU::S_CBRANCH_VCCZ;
3104 case SIInstrInfo::EXECNZ:
3105 return AMDGPU::S_CBRANCH_EXECNZ;
3106 case SIInstrInfo::EXECZ:
3107 return AMDGPU::S_CBRANCH_EXECZ;
3113SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3115 case AMDGPU::S_CBRANCH_SCC0:
3117 case AMDGPU::S_CBRANCH_SCC1:
3119 case AMDGPU::S_CBRANCH_VCCNZ:
3121 case AMDGPU::S_CBRANCH_VCCZ:
3123 case AMDGPU::S_CBRANCH_EXECNZ:
3125 case AMDGPU::S_CBRANCH_EXECZ:
3137 bool AllowModify)
const {
3138 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3140 TBB =
I->getOperand(0).getMBB();
3144 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3145 if (Pred == INVALID_BR)
3150 Cond.push_back(
I->getOperand(1));
3154 if (
I ==
MBB.end()) {
3160 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3162 FBB =
I->getOperand(0).getMBB();
3172 bool AllowModify)
const {
3180 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3181 switch (
I->getOpcode()) {
3182 case AMDGPU::S_MOV_B64_term:
3183 case AMDGPU::S_XOR_B64_term:
3184 case AMDGPU::S_OR_B64_term:
3185 case AMDGPU::S_ANDN2_B64_term:
3186 case AMDGPU::S_AND_B64_term:
3187 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3188 case AMDGPU::S_MOV_B32_term:
3189 case AMDGPU::S_XOR_B32_term:
3190 case AMDGPU::S_OR_B32_term:
3191 case AMDGPU::S_ANDN2_B32_term:
3192 case AMDGPU::S_AND_B32_term:
3193 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3196 case AMDGPU::SI_ELSE:
3197 case AMDGPU::SI_KILL_I1_TERMINATOR:
3198 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3215 int *BytesRemoved)
const {
3217 unsigned RemovedSize = 0;
3220 if (
MI.isBranch() ||
MI.isReturn()) {
3222 MI.eraseFromParent();
3228 *BytesRemoved = RemovedSize;
3245 int *BytesAdded)
const {
3246 if (!FBB &&
Cond.empty()) {
3250 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3257 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3269 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3287 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3294 if (
Cond.size() != 2) {
3298 if (
Cond[0].isImm()) {
3309 Register FalseReg,
int &CondCycles,
3310 int &TrueCycles,
int &FalseCycles)
const {
3316 if (
MRI.getRegClass(FalseReg) != RC)
3320 CondCycles = TrueCycles = FalseCycles = NumInsts;
3323 return RI.hasVGPRs(RC) && NumInsts <= 6;
3331 if (
MRI.getRegClass(FalseReg) != RC)
3337 if (NumInsts % 2 == 0)
3340 CondCycles = TrueCycles = FalseCycles = NumInsts;
3341 return RI.isSGPRClass(RC);
3352 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3353 if (Pred == VCCZ || Pred == SCC_FALSE) {
3354 Pred =
static_cast<BranchPredicate
>(-Pred);
3360 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3362 if (DstSize == 32) {
3364 if (Pred == SCC_TRUE) {
3379 if (DstSize == 64 && Pred == SCC_TRUE) {
3389 static const int16_t Sub0_15[] = {
3390 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3391 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3392 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3393 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3396 static const int16_t Sub0_15_64[] = {
3397 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3398 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3399 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3400 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3403 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3405 const int16_t *SubIndices = Sub0_15;
3406 int NElts = DstSize / 32;
3410 if (Pred == SCC_TRUE) {
3412 SelOp = AMDGPU::S_CSELECT_B32;
3413 EltRC = &AMDGPU::SGPR_32RegClass;
3415 SelOp = AMDGPU::S_CSELECT_B64;
3416 EltRC = &AMDGPU::SGPR_64RegClass;
3417 SubIndices = Sub0_15_64;
3423 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3428 for (
int Idx = 0; Idx != NElts; ++Idx) {
3429 Register DstElt =
MRI.createVirtualRegister(EltRC);
3432 unsigned SubIdx = SubIndices[Idx];
3435 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3437 .
addReg(FalseReg, {}, SubIdx)
3438 .addReg(TrueReg, {}, SubIdx);
3441 .
addReg(TrueReg, {}, SubIdx)
3442 .addReg(FalseReg, {}, SubIdx);
3454 switch (
MI.getOpcode()) {
3455 case AMDGPU::V_MOV_B16_t16_e32:
3456 case AMDGPU::V_MOV_B16_t16_e64:
3457 case AMDGPU::V_MOV_B32_e32:
3458 case AMDGPU::V_MOV_B32_e64:
3459 case AMDGPU::V_MOV_B64_PSEUDO:
3460 case AMDGPU::V_MOV_B64_e32:
3461 case AMDGPU::V_MOV_B64_e64:
3462 case AMDGPU::S_MOV_B32:
3463 case AMDGPU::S_MOV_B64:
3464 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3466 case AMDGPU::WWM_COPY:
3467 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3468 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3469 case AMDGPU::V_ACCVGPR_MOV_B32:
3470 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3471 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3479 switch (
MI.getOpcode()) {
3480 case AMDGPU::V_MOV_B16_t16_e32:
3481 case AMDGPU::V_MOV_B16_t16_e64:
3483 case AMDGPU::V_MOV_B32_e32:
3484 case AMDGPU::V_MOV_B32_e64:
3485 case AMDGPU::V_MOV_B64_PSEUDO:
3486 case AMDGPU::V_MOV_B64_e32:
3487 case AMDGPU::V_MOV_B64_e64:
3488 case AMDGPU::S_MOV_B32:
3489 case AMDGPU::S_MOV_B64:
3490 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3492 case AMDGPU::WWM_COPY:
3493 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3494 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3495 case AMDGPU::V_ACCVGPR_MOV_B32:
3496 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3497 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3505 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3506 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3507 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3510 unsigned Opc =
MI.getOpcode();
3512 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3514 MI.removeOperand(Idx);
3520 MI.setDesc(NewDesc);
3526 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3527 Desc.implicit_defs().size();
3529 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3530 MI.removeOperand(
I);
3534 unsigned SubRegIndex) {
3535 switch (SubRegIndex) {
3536 case AMDGPU::NoSubRegister:
3546 case AMDGPU::sub1_lo16:
3548 case AMDGPU::sub1_hi16:
3551 return std::nullopt;
3559 case AMDGPU::V_MAC_F16_e32:
3560 case AMDGPU::V_MAC_F16_e64:
3561 case AMDGPU::V_MAD_F16_e64:
3562 return AMDGPU::V_MADAK_F16;
3563 case AMDGPU::V_MAC_F32_e32:
3564 case AMDGPU::V_MAC_F32_e64:
3565 case AMDGPU::V_MAD_F32_e64:
3566 return AMDGPU::V_MADAK_F32;
3567 case AMDGPU::V_FMAC_F32_e32:
3568 case AMDGPU::V_FMAC_F32_e64:
3569 case AMDGPU::V_FMA_F32_e64:
3570 return AMDGPU::V_FMAAK_F32;
3571 case AMDGPU::V_FMAC_F16_e32:
3572 case AMDGPU::V_FMAC_F16_e64:
3573 case AMDGPU::V_FMAC_F16_t16_e64:
3574 case AMDGPU::V_FMAC_F16_fake16_e64:
3575 case AMDGPU::V_FMAC_F16_t16_e32:
3576 case AMDGPU::V_FMAC_F16_fake16_e32:
3577 case AMDGPU::V_FMA_F16_e64:
3578 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3579 ? AMDGPU::V_FMAAK_F16_t16
3580 : AMDGPU::V_FMAAK_F16_fake16
3581 : AMDGPU::V_FMAAK_F16;
3582 case AMDGPU::V_FMAC_F64_e32:
3583 case AMDGPU::V_FMAC_F64_e64:
3584 case AMDGPU::V_FMA_F64_e64:
3585 return AMDGPU::V_FMAAK_F64;
3593 case AMDGPU::V_MAC_F16_e32:
3594 case AMDGPU::V_MAC_F16_e64:
3595 case AMDGPU::V_MAD_F16_e64:
3596 return AMDGPU::V_MADMK_F16;
3597 case AMDGPU::V_MAC_F32_e32:
3598 case AMDGPU::V_MAC_F32_e64:
3599 case AMDGPU::V_MAD_F32_e64:
3600 return AMDGPU::V_MADMK_F32;
3601 case AMDGPU::V_FMAC_F32_e32:
3602 case AMDGPU::V_FMAC_F32_e64:
3603 case AMDGPU::V_FMA_F32_e64:
3604 return AMDGPU::V_FMAMK_F32;
3605 case AMDGPU::V_FMAC_F16_e32:
3606 case AMDGPU::V_FMAC_F16_e64:
3607 case AMDGPU::V_FMAC_F16_t16_e64:
3608 case AMDGPU::V_FMAC_F16_fake16_e64:
3609 case AMDGPU::V_FMAC_F16_t16_e32:
3610 case AMDGPU::V_FMAC_F16_fake16_e32:
3611 case AMDGPU::V_FMA_F16_e64:
3612 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3613 ? AMDGPU::V_FMAMK_F16_t16
3614 : AMDGPU::V_FMAMK_F16_fake16
3615 : AMDGPU::V_FMAMK_F16;
3616 case AMDGPU::V_FMAC_F64_e32:
3617 case AMDGPU::V_FMAC_F64_e64:
3618 case AMDGPU::V_FMA_F64_e64:
3619 return AMDGPU::V_FMAMK_F64;
3631 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3633 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3636 if (
Opc == AMDGPU::COPY) {
3637 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3644 if (HasMultipleUses) {
3647 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3650 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3658 if (ImmDefSize == 32 &&
3663 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3664 RI.getSubRegIdxSize(UseSubReg) == 16;
3667 if (RI.hasVGPRs(DstRC))
3670 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3676 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3683 for (
unsigned MovOp :
3684 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3685 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3693 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3697 if (MovDstPhysReg) {
3701 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3708 if (MovDstPhysReg) {
3709 if (!MovDstRC->
contains(MovDstPhysReg))
3711 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3725 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3733 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3737 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3739 UseMI.getOperand(0).setReg(MovDstPhysReg);
3744 UseMI.setDesc(NewMCID);
3745 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3746 UseMI.addImplicitDefUseOperands(*MF);
3750 if (HasMultipleUses)
3753 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3754 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3755 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3756 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3757 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3758 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3759 Opc == AMDGPU::V_FMAC_F64_e64) {
3768 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3779 auto CopyRegOperandToNarrowerRC =
3782 if (!
MI.getOperand(OpNo).isReg())
3786 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3789 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3790 get(AMDGPU::COPY), Tmp)
3792 MI.getOperand(OpNo).setReg(Tmp);
3793 MI.getOperand(OpNo).setIsKill();
3800 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3801 if (!RegSrc->
isReg())
3803 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3804 ST.getConstantBusLimit(
Opc) < 2)
3807 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3819 if (Def && Def->isMoveImmediate() &&
3834 unsigned SrcSubReg = RegSrc->
getSubReg();
3839 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3840 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3841 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3842 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3843 UseMI.untieRegOperand(
3844 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3851 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3852 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3856 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3857 UseMI.getOperand(0).getReg())
3859 UseMI.getOperand(0).setReg(Tmp);
3860 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3861 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3864 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3866 DefMI.eraseFromParent();
3873 if (ST.getConstantBusLimit(
Opc) < 2) {
3876 bool Src0Inlined =
false;
3877 if (Src0->
isReg()) {
3882 if (Def && Def->isMoveImmediate() &&
3887 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3894 if (Src1->
isReg() && !Src0Inlined) {
3897 if (Def && Def->isMoveImmediate() &&
3899 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3901 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3914 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3915 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3916 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3917 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3918 UseMI.untieRegOperand(
3919 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3921 const std::optional<int64_t> SubRegImm =
3931 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3932 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3936 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3937 UseMI.getOperand(0).getReg())
3939 UseMI.getOperand(0).setReg(Tmp);
3940 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3941 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3949 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3951 DefMI.eraseFromParent();
3963 if (BaseOps1.
size() != BaseOps2.
size())
3965 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3966 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3974 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3975 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3976 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3978 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3981bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3984 int64_t Offset0, Offset1;
3987 bool Offset0IsScalable, Offset1IsScalable;
4001 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4002 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4009 "MIa must load from or modify a memory location");
4011 "MIb must load from or modify a memory location");
4033 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4040 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4050 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4064 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4075 if (
Reg.isPhysical())
4077 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4079 Imm = Def->getOperand(1).getImm();
4099 unsigned NumOps =
MI.getNumOperands();
4102 if (
Op.isReg() &&
Op.isKill())
4110 case AMDGPU::V_MAC_F16_e32:
4111 case AMDGPU::V_MAC_F16_e64:
4112 return AMDGPU::V_MAD_F16_e64;
4113 case AMDGPU::V_MAC_F32_e32:
4114 case AMDGPU::V_MAC_F32_e64:
4115 return AMDGPU::V_MAD_F32_e64;
4116 case AMDGPU::V_MAC_LEGACY_F32_e32:
4117 case AMDGPU::V_MAC_LEGACY_F32_e64:
4118 return AMDGPU::V_MAD_LEGACY_F32_e64;
4119 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4120 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4121 return AMDGPU::V_FMA_LEGACY_F32_e64;
4122 case AMDGPU::V_FMAC_F16_e32:
4123 case AMDGPU::V_FMAC_F16_e64:
4124 case AMDGPU::V_FMAC_F16_t16_e64:
4125 case AMDGPU::V_FMAC_F16_fake16_e64:
4126 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4127 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4128 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4129 : AMDGPU::V_FMA_F16_gfx9_e64;
4130 case AMDGPU::V_FMAC_F32_e32:
4131 case AMDGPU::V_FMAC_F32_e64:
4132 return AMDGPU::V_FMA_F32_e64;
4133 case AMDGPU::V_FMAC_F64_e32:
4134 case AMDGPU::V_FMAC_F64_e64:
4135 return AMDGPU::V_FMA_F64_e64;
4155 if (
MI.isBundle()) {
4158 if (
MI.getBundleSize() != 1)
4160 CandidateMI =
MI.getNextNode();
4164 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4168 if (
MI.isBundle()) {
4173 MI.untieRegOperand(MO.getOperandNo());
4181 if (Def.isEarlyClobber() && Def.isReg() &&
4186 auto UpdateDefIndex = [&](
LiveRange &LR) {
4187 auto *S = LR.find(OldIndex);
4188 if (S != LR.end() && S->start == OldIndex) {
4189 assert(S->valno && S->valno->def == OldIndex);
4190 S->start = NewIndex;
4191 S->valno->def = NewIndex;
4195 for (
auto &SR : LI.subranges())
4201 if (U.RemoveMIUse) {
4204 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4206 if (
MRI.hasOneNonDBGUse(DefReg)) {
4208 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4209 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4210 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4211 U.RemoveMIUse->removeOperand(
I);
4216 if (
MI.isBundle()) {
4220 if (MO.isReg() && MO.getReg() == DefReg) {
4221 assert(MO.getSubReg() == 0 &&
4222 "tied sub-registers in bundles currently not supported");
4223 MI.removeOperand(MO.getOperandNo());
4238 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4240 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4241 MIOp.setIsUndef(
true);
4242 MIOp.setReg(DummyReg);
4246 if (
MI.isBundle()) {
4250 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4251 MIOp.setIsUndef(
true);
4252 MIOp.setReg(DummyReg);
4265 return MI.isBundle() ? &
MI : NewMI;
4270 ThreeAddressUpdates &U)
const {
4272 unsigned Opc =
MI.getOpcode();
4276 if (NewMFMAOpc != -1) {
4279 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4280 MIB.
add(
MI.getOperand(
I));
4288 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4293 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4294 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4295 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4299 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4300 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4301 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4302 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4303 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4304 bool Src0Literal =
false;
4309 case AMDGPU::V_MAC_F16_e64:
4310 case AMDGPU::V_FMAC_F16_e64:
4311 case AMDGPU::V_FMAC_F16_t16_e64:
4312 case AMDGPU::V_FMAC_F16_fake16_e64:
4313 case AMDGPU::V_MAC_F32_e64:
4314 case AMDGPU::V_MAC_LEGACY_F32_e64:
4315 case AMDGPU::V_FMAC_F32_e64:
4316 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4317 case AMDGPU::V_FMAC_F64_e64:
4319 case AMDGPU::V_MAC_F16_e32:
4320 case AMDGPU::V_FMAC_F16_e32:
4321 case AMDGPU::V_MAC_F32_e32:
4322 case AMDGPU::V_MAC_LEGACY_F32_e32:
4323 case AMDGPU::V_FMAC_F32_e32:
4324 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4325 case AMDGPU::V_FMAC_F64_e32: {
4326 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4327 AMDGPU::OpName::src0);
4328 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4339 MachineInstrBuilder MIB;
4342 const MachineOperand *Src0Mods =
4345 const MachineOperand *Src1Mods =
4348 const MachineOperand *Src2Mods =
4354 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4355 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4357 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4359 MachineInstr *
DefMI;
4395 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4411 if (Src0Literal && !ST.hasVOP3Literal())
4439 switch (
MI.getOpcode()) {
4440 case AMDGPU::S_SET_GPR_IDX_ON:
4441 case AMDGPU::S_SET_GPR_IDX_MODE:
4442 case AMDGPU::S_SET_GPR_IDX_OFF:
4460 if (
MI.isTerminator() ||
MI.isPosition())
4464 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4467 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4473 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4474 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4475 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4476 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4477 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4482 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4483 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4484 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4498 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4503 if (
MI.memoperands_empty())
4508 unsigned AS = Memop->getAddrSpace();
4509 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4510 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4511 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4512 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4527 if (
MI.memoperands_empty())
4536 unsigned AS = Memop->getAddrSpace();
4553 if (ST.isTgSplitEnabled())
4558 if (
MI.memoperands_empty())
4563 unsigned AS = Memop->getAddrSpace();
4579 unsigned Opcode =
MI.getOpcode();
4594 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4595 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4596 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4599 if (
MI.isCall() ||
MI.isInlineAsm())
4615 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4616 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4617 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4618 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4626 if (
MI.isMetaInstruction())
4630 if (
MI.isCopyLike()) {
4631 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4635 return MI.readsRegister(AMDGPU::EXEC, &RI);
4646 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4650 switch (Imm.getBitWidth()) {
4656 ST.hasInv2PiInlineImm());
4659 ST.hasInv2PiInlineImm());
4661 return ST.has16BitInsts() &&
4663 ST.hasInv2PiInlineImm());
4670 APInt IntImm = Imm.bitcastToAPInt();
4672 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4680 return ST.has16BitInsts() &&
4683 return ST.has16BitInsts() &&
4693 switch (OperandType) {
4703 int32_t Trunc =
static_cast<int32_t
>(Imm);
4745 int16_t Trunc =
static_cast<int16_t
>(Imm);
4746 return ST.has16BitInsts() &&
4755 int16_t Trunc =
static_cast<int16_t
>(Imm);
4756 return ST.has16BitInsts() &&
4807 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4813 return ST.hasVOP3Literal();
4817 int64_t ImmVal)
const {
4820 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4821 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4822 AMDGPU::OpName::src2))
4824 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4836 "unexpected imm-like operand kind");
4849 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4867 AMDGPU::OpName
OpName)
const {
4869 return Mods && Mods->
getImm();
4882 switch (
MI.getOpcode()) {
4883 default:
return false;
4885 case AMDGPU::V_ADDC_U32_e64:
4886 case AMDGPU::V_SUBB_U32_e64:
4887 case AMDGPU::V_SUBBREV_U32_e64: {
4895 case AMDGPU::V_MAC_F16_e64:
4896 case AMDGPU::V_MAC_F32_e64:
4897 case AMDGPU::V_MAC_LEGACY_F32_e64:
4898 case AMDGPU::V_FMAC_F16_e64:
4899 case AMDGPU::V_FMAC_F16_t16_e64:
4900 case AMDGPU::V_FMAC_F16_fake16_e64:
4901 case AMDGPU::V_FMAC_F32_e64:
4902 case AMDGPU::V_FMAC_F64_e64:
4903 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4909 case AMDGPU::V_CNDMASK_B32_e64:
4915 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4945 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4954 unsigned Op32)
const {
4968 Inst32.
add(
MI.getOperand(
I));
4972 int Idx =
MI.getNumExplicitDefs();
4974 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4979 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5001 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5009 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5012 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5013 AMDGPU::SReg_64RegClass.contains(Reg);
5019 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5031 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5041 switch (MO.getReg()) {
5043 case AMDGPU::VCC_LO:
5044 case AMDGPU::VCC_HI:
5046 case AMDGPU::FLAT_SCR:
5059 switch (
MI.getOpcode()) {
5060 case AMDGPU::V_READLANE_B32:
5061 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5062 case AMDGPU::V_WRITELANE_B32:
5063 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5070 if (
MI.isPreISelOpcode() ||
5071 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5086 if (
SubReg.getReg().isPhysical())
5089 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5100 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5101 ErrInfo =
"illegal copy from vector register to SGPR";
5119 if (!
MRI.isSSA() &&
MI.isCopy())
5120 return verifyCopy(
MI,
MRI, ErrInfo);
5122 if (SIInstrInfo::isGenericOpcode(Opcode))
5125 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5126 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5127 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5129 if (Src0Idx == -1) {
5131 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5132 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5133 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5134 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5139 if (!
Desc.isVariadic() &&
5140 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5141 ErrInfo =
"Instruction has wrong number of operands.";
5145 if (
MI.isInlineAsm()) {
5158 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5159 ErrInfo =
"inlineasm operand has incorrect register class.";
5167 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5168 ErrInfo =
"missing memory operand from image instruction.";
5173 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5176 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5177 "all fp values to integers.";
5182 int16_t RegClass = getOpRegClassID(OpInfo);
5184 switch (OpInfo.OperandType) {
5186 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5187 ErrInfo =
"Illegal immediate value for operand.";
5222 ErrInfo =
"Illegal immediate value for operand.";
5229 ErrInfo =
"Expected inline constant for operand.";
5244 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5245 ErrInfo =
"Expected immediate, but got non-immediate";
5254 if (OpInfo.isGenericType())
5269 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5270 Opcode != AMDGPU::V_MOV_B64_PSEUDO) {
5272 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5274 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5275 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5282 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5283 ErrInfo =
"Subtarget requires even aligned vector registers";
5288 if (RegClass != -1) {
5289 if (Reg.isVirtual())
5294 ErrInfo =
"Operand has incorrect register class.";
5302 if (!ST.hasSDWA()) {
5303 ErrInfo =
"SDWA is not supported on this target";
5307 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5308 AMDGPU::OpName::dst_sel}) {
5312 int64_t Imm = MO->
getImm();
5314 ErrInfo =
"Invalid SDWA selection";
5319 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5321 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5326 if (!ST.hasSDWAScalar()) {
5328 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5329 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5336 "Only reg allowed as operands in SDWA instructions on GFX9+";
5342 if (!ST.hasSDWAOmod()) {
5345 if (OMod !=
nullptr &&
5347 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5352 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5353 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5354 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5355 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5358 unsigned Mods = Src0ModsMO->
getImm();
5361 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5367 if (
isVOPC(BasicOpcode)) {
5368 if (!ST.hasSDWASdst() && DstIdx != -1) {
5371 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5372 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5375 }
else if (!ST.hasSDWAOutModsVOPC()) {
5378 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5379 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5385 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5386 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5393 if (DstUnused && DstUnused->isImm() &&
5396 if (!Dst.isReg() || !Dst.isTied()) {
5397 ErrInfo =
"Dst register should have tied register";
5402 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5405 "Dst register should be tied to implicit use of preserved register";
5409 ErrInfo =
"Dst register should use same physical register as preserved";
5416 if (
isImage(Opcode) && !
MI.mayStore()) {
5428 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5436 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5440 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5441 if (RegCount > DstSize) {
5442 ErrInfo =
"Image instruction returns too many registers for dst "
5451 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5452 unsigned ConstantBusCount = 0;
5453 bool UsesLiteral =
false;
5456 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5460 LiteralVal = &
MI.getOperand(ImmIdx);
5469 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5480 }
else if (!MO.
isFI()) {
5487 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5497 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5498 return !RI.regsOverlap(SGPRUsed, SGPR);
5507 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5508 Opcode != AMDGPU::V_WRITELANE_B32) {
5509 ErrInfo =
"VOP* instruction violates constant bus restriction";
5513 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5514 ErrInfo =
"VOP3 instruction uses literal";
5521 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5522 unsigned SGPRCount = 0;
5525 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5533 if (MO.
getReg() != SGPRUsed)
5538 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5539 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5546 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5547 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5554 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5564 ErrInfo =
"ABS not allowed in VOP3B instructions";
5577 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5584 if (
Desc.isBranch()) {
5586 ErrInfo =
"invalid branch target for SOPK instruction";
5593 ErrInfo =
"invalid immediate for SOPK instruction";
5598 ErrInfo =
"invalid immediate for SOPK instruction";
5605 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5606 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5607 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5608 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5609 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5610 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5612 const unsigned StaticNumOps =
5613 Desc.getNumOperands() +
Desc.implicit_uses().size();
5614 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5620 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5621 ErrInfo =
"missing implicit register operands";
5627 if (!Dst->isUse()) {
5628 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5633 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5634 UseOpIdx != StaticNumOps + 1) {
5635 ErrInfo =
"movrel implicit operands should be tied";
5642 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5644 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5645 ErrInfo =
"src0 should be subreg of implicit vector use";
5653 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5654 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5660 if (
MI.mayStore() &&
5665 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5666 ErrInfo =
"scalar stores must use m0 as offset register";
5672 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5674 if (
Offset->getImm() != 0) {
5675 ErrInfo =
"subtarget does not support offsets in flat instructions";
5680 if (
isDS(
MI) && !ST.hasGDS()) {
5682 if (GDSOp && GDSOp->
getImm() != 0) {
5683 ErrInfo =
"GDS is not supported on this subtarget";
5691 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5692 AMDGPU::OpName::vaddr0);
5693 AMDGPU::OpName RSrcOpName =
5694 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5695 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5703 ErrInfo =
"dim is out of range";
5708 if (ST.hasR128A16()) {
5710 IsA16 = R128A16->
getImm() != 0;
5711 }
else if (ST.hasA16()) {
5713 IsA16 = A16->
getImm() != 0;
5716 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5718 unsigned AddrWords =
5721 unsigned VAddrWords;
5723 VAddrWords = RsrcIdx - VAddr0Idx;
5724 if (ST.hasPartialNSAEncoding() &&
5726 unsigned LastVAddrIdx = RsrcIdx - 1;
5727 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5735 if (VAddrWords != AddrWords) {
5737 <<
" but got " << VAddrWords <<
"\n");
5738 ErrInfo =
"bad vaddr size";
5748 unsigned DC = DppCt->
getImm();
5749 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5750 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5751 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5752 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5753 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5754 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5755 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5756 ErrInfo =
"Invalid dpp_ctrl value";
5759 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5761 ErrInfo =
"Invalid dpp_ctrl value: "
5762 "wavefront shifts are not supported on GFX10+";
5765 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5767 ErrInfo =
"Invalid dpp_ctrl value: "
5768 "broadcasts are not supported on GFX10+";
5771 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5773 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5774 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5775 !ST.hasGFX90AInsts()) {
5776 ErrInfo =
"Invalid dpp_ctrl value: "
5777 "row_newbroadcast/row_share is not supported before "
5781 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5782 ErrInfo =
"Invalid dpp_ctrl value: "
5783 "row_share and row_xmask are not supported before GFX10";
5788 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5791 ErrInfo =
"Invalid dpp_ctrl value: "
5792 "DP ALU dpp only support row_newbcast";
5799 AMDGPU::OpName DataName =
5800 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5806 if (ST.hasGFX90AInsts()) {
5807 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5808 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5809 ErrInfo =
"Invalid register class: "
5810 "vdata and vdst should be both VGPR or AGPR";
5813 if (
Data && Data2 &&
5815 ErrInfo =
"Invalid register class: "
5816 "both data operands should be VGPR or AGPR";
5820 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5822 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5823 ErrInfo =
"Invalid register class: "
5824 "agpr loads and stores not supported on this GPU";
5830 if (ST.needsAlignedVGPRs()) {
5831 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5836 if (Reg.isPhysical())
5837 return !(RI.getHWRegIndex(Reg) & 1);
5839 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5840 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5843 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5844 Opcode == AMDGPU::DS_GWS_BARRIER) {
5846 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5847 ErrInfo =
"Subtarget requires even aligned vector registers "
5848 "for DS_GWS instructions";
5854 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5855 ErrInfo =
"Subtarget requires even aligned vector registers "
5856 "for vaddr operand of image instructions";
5862 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5864 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5865 ErrInfo =
"Invalid register class: "
5866 "v_accvgpr_write with an SGPR is not supported on this GPU";
5871 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5874 ErrInfo =
"pseudo expects only physical SGPRs";
5881 if (!ST.hasScaleOffset()) {
5882 ErrInfo =
"Subtarget does not support offset scaling";
5886 ErrInfo =
"Instruction does not support offset scaling";
5895 for (
unsigned I = 0;
I < 3; ++
I) {
5901 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5902 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5904 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5905 &AMDGPU::SReg_64RegClass) ||
5906 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5907 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5919 switch (
MI.getOpcode()) {
5920 default:
return AMDGPU::INSTRUCTION_LIST_END;
5921 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5922 case AMDGPU::COPY:
return AMDGPU::COPY;
5923 case AMDGPU::PHI:
return AMDGPU::PHI;
5924 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5925 case AMDGPU::WQM:
return AMDGPU::WQM;
5926 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5927 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5928 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5929 case AMDGPU::S_MOV_B32: {
5931 return MI.getOperand(1).isReg() ||
5932 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5933 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5935 case AMDGPU::S_ADD_I32:
5936 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5937 case AMDGPU::S_ADDC_U32:
5938 return AMDGPU::V_ADDC_U32_e32;
5939 case AMDGPU::S_SUB_I32:
5940 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5943 case AMDGPU::S_ADD_U32:
5944 return AMDGPU::V_ADD_CO_U32_e32;
5945 case AMDGPU::S_SUB_U32:
5946 return AMDGPU::V_SUB_CO_U32_e32;
5947 case AMDGPU::S_ADD_U64_PSEUDO:
5948 return AMDGPU::V_ADD_U64_PSEUDO;
5949 case AMDGPU::S_SUB_U64_PSEUDO:
5950 return AMDGPU::V_SUB_U64_PSEUDO;
5951 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5952 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5953 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5954 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5955 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5956 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5957 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5958 case AMDGPU::S_XNOR_B32:
5959 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5960 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5961 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5962 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5963 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5964 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5965 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5966 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5967 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5968 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5969 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5970 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5971 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5972 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5973 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5974 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5975 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5976 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5977 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5978 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5979 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5980 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5981 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5982 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5983 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5984 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5985 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5986 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5987 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5988 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5989 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5990 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5991 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5992 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5993 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5994 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5995 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5996 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5997 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5998 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5999 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6000 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6001 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6002 case AMDGPU::S_CVT_F32_F16:
6003 case AMDGPU::S_CVT_HI_F32_F16:
6004 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6005 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6006 case AMDGPU::S_CVT_F16_F32:
6007 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6008 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6009 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6010 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6011 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6012 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6013 case AMDGPU::S_CEIL_F16:
6014 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6015 : AMDGPU::V_CEIL_F16_fake16_e64;
6016 case AMDGPU::S_FLOOR_F16:
6017 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6018 : AMDGPU::V_FLOOR_F16_fake16_e64;
6019 case AMDGPU::S_TRUNC_F16:
6020 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6021 : AMDGPU::V_TRUNC_F16_fake16_e64;
6022 case AMDGPU::S_RNDNE_F16:
6023 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6024 : AMDGPU::V_RNDNE_F16_fake16_e64;
6025 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6026 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6027 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6028 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6029 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6030 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6031 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6032 case AMDGPU::S_ADD_F16:
6033 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6034 : AMDGPU::V_ADD_F16_fake16_e64;
6035 case AMDGPU::S_SUB_F16:
6036 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6037 : AMDGPU::V_SUB_F16_fake16_e64;
6038 case AMDGPU::S_MIN_F16:
6039 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6040 : AMDGPU::V_MIN_F16_fake16_e64;
6041 case AMDGPU::S_MAX_F16:
6042 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6043 : AMDGPU::V_MAX_F16_fake16_e64;
6044 case AMDGPU::S_MINIMUM_F16:
6045 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6046 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6047 case AMDGPU::S_MAXIMUM_F16:
6048 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6049 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6050 case AMDGPU::S_MUL_F16:
6051 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6052 : AMDGPU::V_MUL_F16_fake16_e64;
6053 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6054 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6055 case AMDGPU::S_FMAC_F16:
6056 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6057 : AMDGPU::V_FMAC_F16_fake16_e64;
6058 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6059 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6060 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6061 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6062 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6063 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6064 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6065 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6066 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6067 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6068 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6069 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6070 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6071 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6072 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6073 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6074 case AMDGPU::S_CMP_LT_F16:
6075 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6076 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6077 case AMDGPU::S_CMP_EQ_F16:
6078 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6079 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6080 case AMDGPU::S_CMP_LE_F16:
6081 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6082 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6083 case AMDGPU::S_CMP_GT_F16:
6084 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6085 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6086 case AMDGPU::S_CMP_LG_F16:
6087 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6088 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6089 case AMDGPU::S_CMP_GE_F16:
6090 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6091 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6092 case AMDGPU::S_CMP_O_F16:
6093 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6094 : AMDGPU::V_CMP_O_F16_fake16_e64;
6095 case AMDGPU::S_CMP_U_F16:
6096 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6097 : AMDGPU::V_CMP_U_F16_fake16_e64;
6098 case AMDGPU::S_CMP_NGE_F16:
6099 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6100 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6101 case AMDGPU::S_CMP_NLG_F16:
6102 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6103 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6104 case AMDGPU::S_CMP_NGT_F16:
6105 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6106 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6107 case AMDGPU::S_CMP_NLE_F16:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6109 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6110 case AMDGPU::S_CMP_NEQ_F16:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6112 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6113 case AMDGPU::S_CMP_NLT_F16:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6115 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6116 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6117 case AMDGPU::V_S_EXP_F16_e64:
6118 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6119 : AMDGPU::V_EXP_F16_fake16_e64;
6120 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6121 case AMDGPU::V_S_LOG_F16_e64:
6122 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6123 : AMDGPU::V_LOG_F16_fake16_e64;
6124 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6125 case AMDGPU::V_S_RCP_F16_e64:
6126 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6127 : AMDGPU::V_RCP_F16_fake16_e64;
6128 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6129 case AMDGPU::V_S_RSQ_F16_e64:
6130 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6131 : AMDGPU::V_RSQ_F16_fake16_e64;
6132 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6133 case AMDGPU::V_S_SQRT_F16_e64:
6134 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6135 : AMDGPU::V_SQRT_F16_fake16_e64;
6138 "Unexpected scalar opcode without corresponding vector one!");
6187 "Not a whole wave func");
6190 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6191 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6198 unsigned OpNo)
const {
6200 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6201 Desc.operands()[OpNo].RegClass == -1) {
6204 if (Reg.isVirtual()) {
6206 return MRI.getRegClass(Reg);
6208 return RI.getPhysRegBaseClass(Reg);
6211 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6212 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6220 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6222 unsigned Size = RI.getRegSizeInBits(*RC);
6223 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6224 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6225 : AMDGPU::V_MOV_B32_e32;
6227 Opcode = AMDGPU::COPY;
6228 else if (RI.isSGPRClass(RC))
6229 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6243 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6249 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6260 if (SubIdx == AMDGPU::sub0)
6262 if (SubIdx == AMDGPU::sub1)
6274void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6290 if (Reg.isPhysical())
6300 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6303 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6310 unsigned Opc =
MI.getOpcode();
6316 constexpr AMDGPU::OpName OpNames[] = {
6317 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6320 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6321 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6331 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6332 if (IsAGPR && !ST.hasMAIInsts())
6334 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6338 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6339 const int DataIdx = AMDGPU::getNamedOperandIdx(
6340 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6341 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6342 MI.getOperand(DataIdx).isReg() &&
6343 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6345 if ((
int)
OpIdx == DataIdx) {
6346 if (VDstIdx != -1 &&
6347 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6350 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6351 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6352 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6357 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6358 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6362 if (ST.hasFlatScratchHiInB64InstHazard() &&
6369 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6390 constexpr unsigned NumOps = 3;
6391 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6392 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6393 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6394 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6399 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6402 MO = &
MI.getOperand(SrcIdx);
6409 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6413 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6417 return !OpSel && !OpSelHi;
6426 int64_t RegClass = getOpRegClassID(OpInfo);
6428 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6437 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6438 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6442 if (!LiteralLimit--)
6452 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6460 if (--ConstantBusLimit <= 0)
6472 if (!LiteralLimit--)
6474 if (--ConstantBusLimit <= 0)
6480 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6484 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6486 !
Op.isIdenticalTo(*MO))
6496 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6510 bool Is64BitOp = Is64BitFPOp ||
6517 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6526 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6544 bool IsGFX950Only = ST.hasGFX950Insts();
6545 bool IsGFX940Only = ST.hasGFX940Insts();
6547 if (!IsGFX950Only && !IsGFX940Only)
6565 unsigned Opcode =
MI.getOpcode();
6567 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6568 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6569 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6570 case AMDGPU::V_MQSAD_U32_U8_e64:
6571 case AMDGPU::V_PK_ADD_F16:
6572 case AMDGPU::V_PK_ADD_F32:
6573 case AMDGPU::V_PK_ADD_I16:
6574 case AMDGPU::V_PK_ADD_U16:
6575 case AMDGPU::V_PK_ASHRREV_I16:
6576 case AMDGPU::V_PK_FMA_F16:
6577 case AMDGPU::V_PK_FMA_F32:
6578 case AMDGPU::V_PK_FMAC_F16_e32:
6579 case AMDGPU::V_PK_FMAC_F16_e64:
6580 case AMDGPU::V_PK_LSHLREV_B16:
6581 case AMDGPU::V_PK_LSHRREV_B16:
6582 case AMDGPU::V_PK_MAD_I16:
6583 case AMDGPU::V_PK_MAD_U16:
6584 case AMDGPU::V_PK_MAX_F16:
6585 case AMDGPU::V_PK_MAX_I16:
6586 case AMDGPU::V_PK_MAX_U16:
6587 case AMDGPU::V_PK_MIN_F16:
6588 case AMDGPU::V_PK_MIN_I16:
6589 case AMDGPU::V_PK_MIN_U16:
6590 case AMDGPU::V_PK_MOV_B32:
6591 case AMDGPU::V_PK_MUL_F16:
6592 case AMDGPU::V_PK_MUL_F32:
6593 case AMDGPU::V_PK_MUL_LO_U16:
6594 case AMDGPU::V_PK_SUB_I16:
6595 case AMDGPU::V_PK_SUB_U16:
6596 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6605 unsigned Opc =
MI.getOpcode();
6608 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6611 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6617 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6624 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6627 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6633 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6643 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6644 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6645 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6657 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6659 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6671 if (HasImplicitSGPR || !
MI.isCommutable()) {
6688 if (CommutedOpc == -1) {
6693 MI.setDesc(
get(CommutedOpc));
6697 bool Src0Kill = Src0.
isKill();
6701 else if (Src1.
isReg()) {
6716 unsigned Opc =
MI.getOpcode();
6719 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6720 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6721 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6724 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6725 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6726 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6727 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6728 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6729 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6730 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6734 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6735 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6740 if (VOP3Idx[2] != -1) {
6742 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6743 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6752 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6753 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6755 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6757 SGPRsUsed.
insert(SGPRReg);
6761 for (
int Idx : VOP3Idx) {
6770 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6782 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6789 if (ConstantBusLimit > 0) {
6801 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6802 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6808 for (
unsigned I = 0;
I < 3; ++
I) {
6821 SRC = RI.getCommonSubClass(SRC, DstRC);
6824 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6826 if (RI.hasAGPRs(VRC)) {
6827 VRC = RI.getEquivalentVGPRClass(VRC);
6828 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6830 get(TargetOpcode::COPY), NewSrcReg)
6837 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6843 for (
unsigned i = 0; i < SubRegs; ++i) {
6844 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6846 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6847 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6853 get(AMDGPU::REG_SEQUENCE), DstReg);
6854 for (
unsigned i = 0; i < SubRegs; ++i) {
6856 MIB.
addImm(RI.getSubRegFromChannel(i));
6869 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6871 SBase->setReg(SGPR);
6874 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6882 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6883 if (OldSAddrIdx < 0)
6899 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6900 if (NewVAddrIdx < 0)
6903 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6907 if (OldVAddrIdx >= 0) {
6909 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6921 if (OldVAddrIdx == NewVAddrIdx) {
6924 MRI.removeRegOperandFromUseList(&NewVAddr);
6925 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6929 MRI.removeRegOperandFromUseList(&NewVAddr);
6930 MRI.addRegOperandToUseList(&NewVAddr);
6932 assert(OldSAddrIdx == NewVAddrIdx);
6934 if (OldVAddrIdx >= 0) {
6935 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6936 AMDGPU::OpName::vdst_in);
6940 if (NewVDstIn != -1) {
6941 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6947 if (NewVDstIn != -1) {
6948 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6969 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6989 unsigned OpSubReg =
Op.getSubReg();
6992 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6998 Register DstReg =
MRI.createVirtualRegister(DstRC);
7008 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7011 bool ImpDef = Def->isImplicitDef();
7012 while (!ImpDef && Def && Def->isCopy()) {
7013 if (Def->getOperand(1).getReg().isPhysical())
7015 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
7016 ImpDef = Def && Def->isImplicitDef();
7018 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7037 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7043 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7044 unsigned NumSubRegs =
RegSize / 32;
7045 Register VScalarOp = ScalarOp->getReg();
7047 if (NumSubRegs == 1) {
7048 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7050 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7053 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7055 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7061 CondReg = NewCondReg;
7063 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7071 ScalarOp->setReg(CurReg);
7072 ScalarOp->setIsKill();
7076 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7077 "Unhandled register size");
7079 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7081 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7083 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7086 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7087 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7090 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7091 .
addReg(VScalarOp, VScalarOpUndef,
7092 TRI->getSubRegFromChannel(Idx + 1));
7098 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7099 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7105 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7106 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7109 if (NumSubRegs <= 2)
7110 Cmp.addReg(VScalarOp);
7112 Cmp.addReg(VScalarOp, VScalarOpUndef,
7113 TRI->getSubRegFromChannel(Idx, 2));
7117 CondReg = NewCondReg;
7119 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7127 const auto *SScalarOpRC =
7128 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7129 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7133 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7134 unsigned Channel = 0;
7135 for (
Register Piece : ReadlanePieces) {
7136 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7140 ScalarOp->setReg(SScalarOp);
7141 ScalarOp->setIsKill();
7145 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7146 MRI.setSimpleHint(SaveExec, CondReg);
7177 if (!Begin.isValid())
7179 if (!End.isValid()) {
7185 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7193 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7194 std::numeric_limits<unsigned>::max()) !=
7197 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7203 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7212 for (
auto I = Begin;
I != AfterMI;
I++) {
7213 for (
auto &MO :
I->all_uses())
7214 MRI.clearKillFlags(MO.getReg());
7239 MBB.addSuccessor(LoopBB);
7249 for (
auto &Succ : RemainderBB->
successors()) {
7273static std::tuple<unsigned, unsigned>
7281 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7282 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7285 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7286 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7287 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7288 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7289 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7306 .
addImm(AMDGPU::sub0_sub1)
7312 return std::tuple(RsrcPtr, NewSRsrc);
7349 if (
MI.getOpcode() == AMDGPU::PHI) {
7351 assert(!RI.isSGPRClass(VRC));
7354 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7356 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7372 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7375 if (RI.hasVGPRs(DstRC)) {
7379 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7381 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7399 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7404 if (DstRC != Src0RC) {
7413 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7415 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7421 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7422 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7423 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7424 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7425 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7426 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7427 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7429 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7442 ? AMDGPU::OpName::rsrc
7443 : AMDGPU::OpName::srsrc;
7445 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7448 AMDGPU::OpName SampOpName =
7449 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7451 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7458 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7460 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7464 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7465 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7470 while (Start->getOpcode() != FrameSetupOpcode)
7473 while (End->getOpcode() != FrameDestroyOpcode)
7477 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7478 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7486 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7488 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7490 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7500 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7501 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7502 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7503 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7505 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7512 bool isSoffsetLegal =
true;
7514 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7515 if (SoffsetIdx != -1) {
7518 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7519 isSoffsetLegal =
false;
7523 bool isRsrcLegal =
true;
7525 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7526 if (RsrcIdx != -1) {
7529 isRsrcLegal =
false;
7533 if (isRsrcLegal && isSoffsetLegal)
7557 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7558 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7559 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7561 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7562 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7563 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7565 unsigned RsrcPtr, NewSRsrc;
7572 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7573 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7579 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7580 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7593 }
else if (!VAddr && ST.hasAddr64()) {
7597 "FIXME: Need to emit flat atomics here");
7599 unsigned RsrcPtr, NewSRsrc;
7602 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7625 MIB.
addImm(CPol->getImm());
7630 MIB.
addImm(TFE->getImm());
7650 MI.removeFromParent();
7655 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7656 .addImm(AMDGPU::sub0)
7657 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7658 .addImm(AMDGPU::sub1);
7661 if (!isSoffsetLegal) {
7673 if (!isSoffsetLegal) {
7685 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7686 if (RsrcIdx != -1) {
7687 DeferredList.insert(
MI);
7692 return DeferredList.contains(
MI);
7702 if (!ST.useRealTrue16Insts())
7705 unsigned Opcode =
MI.getOpcode();
7709 OpIdx >=
get(Opcode).getNumOperands() ||
7710 get(Opcode).operands()[
OpIdx].RegClass == -1)
7714 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7718 if (!RI.isVGPRClass(CurrRC))
7721 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7723 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7724 Op.setSubReg(AMDGPU::lo16);
7725 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7727 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7735 Op.setReg(NewDstReg);
7747 while (!Worklist.
empty()) {
7761 "Deferred MachineInstr are not supposed to re-populate worklist");
7781 case AMDGPU::S_ADD_I32:
7782 case AMDGPU::S_SUB_I32: {
7786 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7794 case AMDGPU::S_MUL_U64:
7795 if (ST.hasVectorMulU64()) {
7796 NewOpcode = AMDGPU::V_MUL_U64_e64;
7800 splitScalarSMulU64(Worklist, Inst, MDT);
7804 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7805 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7808 splitScalarSMulPseudo(Worklist, Inst, MDT);
7812 case AMDGPU::S_AND_B64:
7813 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7817 case AMDGPU::S_OR_B64:
7818 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7822 case AMDGPU::S_XOR_B64:
7823 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7827 case AMDGPU::S_NAND_B64:
7828 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7832 case AMDGPU::S_NOR_B64:
7833 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7837 case AMDGPU::S_XNOR_B64:
7838 if (ST.hasDLInsts())
7839 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7841 splitScalar64BitXnor(Worklist, Inst, MDT);
7845 case AMDGPU::S_ANDN2_B64:
7846 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7850 case AMDGPU::S_ORN2_B64:
7851 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7855 case AMDGPU::S_BREV_B64:
7856 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7860 case AMDGPU::S_NOT_B64:
7861 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7865 case AMDGPU::S_BCNT1_I32_B64:
7866 splitScalar64BitBCNT(Worklist, Inst);
7870 case AMDGPU::S_BFE_I64:
7871 splitScalar64BitBFE(Worklist, Inst);
7875 case AMDGPU::S_FLBIT_I32_B64:
7876 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7879 case AMDGPU::S_FF1_I32_B64:
7880 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7884 case AMDGPU::S_LSHL_B32:
7885 if (ST.hasOnlyRevVALUShifts()) {
7886 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7890 case AMDGPU::S_ASHR_I32:
7891 if (ST.hasOnlyRevVALUShifts()) {
7892 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7896 case AMDGPU::S_LSHR_B32:
7897 if (ST.hasOnlyRevVALUShifts()) {
7898 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7902 case AMDGPU::S_LSHL_B64:
7903 if (ST.hasOnlyRevVALUShifts()) {
7905 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7906 : AMDGPU::V_LSHLREV_B64_e64;
7910 case AMDGPU::S_ASHR_I64:
7911 if (ST.hasOnlyRevVALUShifts()) {
7912 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7916 case AMDGPU::S_LSHR_B64:
7917 if (ST.hasOnlyRevVALUShifts()) {
7918 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7923 case AMDGPU::S_ABS_I32:
7924 lowerScalarAbs(Worklist, Inst);
7928 case AMDGPU::S_ABSDIFF_I32:
7929 lowerScalarAbsDiff(Worklist, Inst);
7933 case AMDGPU::S_CBRANCH_SCC0:
7934 case AMDGPU::S_CBRANCH_SCC1: {
7937 bool IsSCC = CondReg == AMDGPU::SCC;
7945 case AMDGPU::S_BFE_U64:
7946 case AMDGPU::S_BFM_B64:
7949 case AMDGPU::S_PACK_LL_B32_B16:
7950 case AMDGPU::S_PACK_LH_B32_B16:
7951 case AMDGPU::S_PACK_HL_B32_B16:
7952 case AMDGPU::S_PACK_HH_B32_B16:
7953 movePackToVALU(Worklist,
MRI, Inst);
7957 case AMDGPU::S_XNOR_B32:
7958 lowerScalarXnor(Worklist, Inst);
7962 case AMDGPU::S_NAND_B32:
7963 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7967 case AMDGPU::S_NOR_B32:
7968 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7972 case AMDGPU::S_ANDN2_B32:
7973 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7977 case AMDGPU::S_ORN2_B32:
7978 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7986 case AMDGPU::S_ADD_CO_PSEUDO:
7987 case AMDGPU::S_SUB_CO_PSEUDO: {
7988 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7989 ? AMDGPU::V_ADDC_U32_e64
7990 : AMDGPU::V_SUBB_U32_e64;
7991 const auto *CarryRC = RI.getWaveMaskRegClass();
7994 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7995 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
8002 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8013 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8017 case AMDGPU::S_UADDO_PSEUDO:
8018 case AMDGPU::S_USUBO_PSEUDO: {
8024 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8025 ? AMDGPU::V_ADD_CO_U32_e64
8026 : AMDGPU::V_SUB_CO_U32_e64;
8028 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8029 Register DestReg =
MRI.createVirtualRegister(NewRC);
8037 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8038 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8042 case AMDGPU::S_LSHL1_ADD_U32:
8043 case AMDGPU::S_LSHL2_ADD_U32:
8044 case AMDGPU::S_LSHL3_ADD_U32:
8045 case AMDGPU::S_LSHL4_ADD_U32: {
8049 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8050 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8051 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8055 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8056 Register DestReg =
MRI.createVirtualRegister(NewRC);
8064 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8065 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8069 case AMDGPU::S_CSELECT_B32:
8070 case AMDGPU::S_CSELECT_B64:
8071 lowerSelect(Worklist, Inst, MDT);
8074 case AMDGPU::S_CMP_EQ_I32:
8075 case AMDGPU::S_CMP_LG_I32:
8076 case AMDGPU::S_CMP_GT_I32:
8077 case AMDGPU::S_CMP_GE_I32:
8078 case AMDGPU::S_CMP_LT_I32:
8079 case AMDGPU::S_CMP_LE_I32:
8080 case AMDGPU::S_CMP_EQ_U32:
8081 case AMDGPU::S_CMP_LG_U32:
8082 case AMDGPU::S_CMP_GT_U32:
8083 case AMDGPU::S_CMP_GE_U32:
8084 case AMDGPU::S_CMP_LT_U32:
8085 case AMDGPU::S_CMP_LE_U32:
8086 case AMDGPU::S_CMP_EQ_U64:
8087 case AMDGPU::S_CMP_LG_U64:
8088 case AMDGPU::S_CMP_LT_F32:
8089 case AMDGPU::S_CMP_EQ_F32:
8090 case AMDGPU::S_CMP_LE_F32:
8091 case AMDGPU::S_CMP_GT_F32:
8092 case AMDGPU::S_CMP_LG_F32:
8093 case AMDGPU::S_CMP_GE_F32:
8094 case AMDGPU::S_CMP_O_F32:
8095 case AMDGPU::S_CMP_U_F32:
8096 case AMDGPU::S_CMP_NGE_F32:
8097 case AMDGPU::S_CMP_NLG_F32:
8098 case AMDGPU::S_CMP_NGT_F32:
8099 case AMDGPU::S_CMP_NLE_F32:
8100 case AMDGPU::S_CMP_NEQ_F32:
8101 case AMDGPU::S_CMP_NLT_F32: {
8102 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8106 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8120 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8124 case AMDGPU::S_CMP_LT_F16:
8125 case AMDGPU::S_CMP_EQ_F16:
8126 case AMDGPU::S_CMP_LE_F16:
8127 case AMDGPU::S_CMP_GT_F16:
8128 case AMDGPU::S_CMP_LG_F16:
8129 case AMDGPU::S_CMP_GE_F16:
8130 case AMDGPU::S_CMP_O_F16:
8131 case AMDGPU::S_CMP_U_F16:
8132 case AMDGPU::S_CMP_NGE_F16:
8133 case AMDGPU::S_CMP_NLG_F16:
8134 case AMDGPU::S_CMP_NGT_F16:
8135 case AMDGPU::S_CMP_NLE_F16:
8136 case AMDGPU::S_CMP_NEQ_F16:
8137 case AMDGPU::S_CMP_NLT_F16: {
8138 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8160 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8164 case AMDGPU::S_CVT_HI_F32_F16: {
8165 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8166 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8167 if (ST.useRealTrue16Insts()) {
8172 .
addReg(TmpReg, {}, AMDGPU::hi16)
8188 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8192 case AMDGPU::S_MINIMUM_F32:
8193 case AMDGPU::S_MAXIMUM_F32: {
8194 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8205 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8209 case AMDGPU::S_MINIMUM_F16:
8210 case AMDGPU::S_MAXIMUM_F16: {
8211 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8212 ? &AMDGPU::VGPR_16RegClass
8213 : &AMDGPU::VGPR_32RegClass);
8225 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8229 case AMDGPU::V_S_EXP_F16_e64:
8230 case AMDGPU::V_S_LOG_F16_e64:
8231 case AMDGPU::V_S_RCP_F16_e64:
8232 case AMDGPU::V_S_RSQ_F16_e64:
8233 case AMDGPU::V_S_SQRT_F16_e64: {
8234 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8235 ? &AMDGPU::VGPR_16RegClass
8236 : &AMDGPU::VGPR_32RegClass);
8248 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8254 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8262 if (NewOpcode == Opcode) {
8270 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8272 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8286 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8293 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8294 MRI.replaceRegWith(DstReg, NewDstReg);
8295 MRI.clearKillFlags(NewDstReg);
8298 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8315 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8319 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8320 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8325 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8331 MRI.replaceRegWith(DstReg, NewDstReg);
8332 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8334 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8337 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8338 MRI.replaceRegWith(DstReg, NewDstReg);
8339 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8344 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8345 MRI.replaceRegWith(DstReg, NewDstReg);
8347 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8357 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8358 AMDGPU::OpName::src0_modifiers) >= 0)
8362 NewInstr->addOperand(Src);
8365 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8368 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8370 NewInstr.addImm(
Size);
8371 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8375 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8380 "Scalar BFE is only implemented for constant width and offset");
8388 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8389 AMDGPU::OpName::src1_modifiers) >= 0)
8391 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8393 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8394 AMDGPU::OpName::src2_modifiers) >= 0)
8396 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8398 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8400 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8402 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8408 NewInstr->addOperand(
Op);
8415 if (
Op.getReg() == AMDGPU::SCC) {
8417 if (
Op.isDef() && !
Op.isDead())
8418 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8420 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8425 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8426 Register DstReg = NewInstr->getOperand(0).getReg();
8431 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8432 MRI.replaceRegWith(DstReg, NewDstReg);
8441 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8445std::pair<bool, MachineBasicBlock *>
8448 if (ST.hasAddNoCarryInsts()) {
8457 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8460 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8462 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8463 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8471 MRI.replaceRegWith(OldDstReg, ResultReg);
8474 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8475 return std::pair(
true, NewBB);
8478 return std::pair(
false,
nullptr);
8495 bool IsSCC = (CondReg == AMDGPU::SCC);
8503 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8509 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8510 NewCondReg =
MRI.createVirtualRegister(TC);
8514 bool CopyFound =
false;
8515 for (MachineInstr &CandI :
8518 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8520 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8522 .
addReg(CandI.getOperand(1).getReg());
8534 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8542 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8543 MachineInstr *NewInst;
8544 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8545 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8558 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8560 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8572 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8573 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8575 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8576 : AMDGPU::V_SUB_CO_U32_e32;
8586 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8587 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8600 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8601 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8602 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8604 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8605 : AMDGPU::V_SUB_CO_U32_e32;
8617 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8618 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8632 if (ST.hasDLInsts()) {
8633 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8641 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8642 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8648 bool Src0IsSGPR = Src0.
isReg() &&
8649 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8650 bool Src1IsSGPR = Src1.
isReg() &&
8651 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8653 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8654 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8664 }
else if (Src1IsSGPR) {
8678 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8682 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8688 unsigned Opcode)
const {
8698 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8699 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8711 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8712 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8717 unsigned Opcode)
const {
8727 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8728 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8740 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8741 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8756 const MCInstrDesc &InstDesc =
get(Opcode);
8757 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8759 &AMDGPU::SGPR_32RegClass;
8761 const TargetRegisterClass *Src0SubRC =
8762 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8765 AMDGPU::sub0, Src0SubRC);
8767 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8768 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8769 const TargetRegisterClass *NewDestSubRC =
8770 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8772 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8773 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8776 AMDGPU::sub1, Src0SubRC);
8778 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8779 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8784 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8791 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8793 Worklist.
insert(&LoHalf);
8794 Worklist.
insert(&HiHalf);
8800 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8811 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8812 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8813 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8821 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8822 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8823 const TargetRegisterClass *Src0SubRC =
8824 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8825 if (RI.isSGPRClass(Src0SubRC))
8826 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8827 const TargetRegisterClass *Src1SubRC =
8828 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8829 if (RI.isSGPRClass(Src1SubRC))
8830 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8834 MachineOperand Op0L =
8836 MachineOperand Op1L =
8838 MachineOperand Op0H =
8840 MachineOperand Op1H =
8858 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8859 MachineInstr *Op1L_Op0H =
8864 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8865 MachineInstr *Op1H_Op0L =
8870 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8871 MachineInstr *Carry =
8876 MachineInstr *LoHalf =
8881 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8886 MachineInstr *HiHalf =
8897 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8909 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8920 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8921 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8922 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8930 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8931 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8932 const TargetRegisterClass *Src0SubRC =
8933 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8934 if (RI.isSGPRClass(Src0SubRC))
8935 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8936 const TargetRegisterClass *Src1SubRC =
8937 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8938 if (RI.isSGPRClass(Src1SubRC))
8939 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8943 MachineOperand Op0L =
8945 MachineOperand Op1L =
8949 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8950 ? AMDGPU::V_MUL_HI_U32_e64
8951 : AMDGPU::V_MUL_HI_I32_e64;
8952 MachineInstr *HiHalf =
8955 MachineInstr *LoHalf =
8966 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8974 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8990 const MCInstrDesc &InstDesc =
get(Opcode);
8991 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8993 &AMDGPU::SGPR_32RegClass;
8995 const TargetRegisterClass *Src0SubRC =
8996 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8997 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8999 &AMDGPU::SGPR_32RegClass;
9001 const TargetRegisterClass *Src1SubRC =
9002 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9005 AMDGPU::sub0, Src0SubRC);
9007 AMDGPU::sub0, Src1SubRC);
9009 AMDGPU::sub1, Src0SubRC);
9011 AMDGPU::sub1, Src1SubRC);
9013 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9014 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9015 const TargetRegisterClass *NewDestSubRC =
9016 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9018 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9019 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9023 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9024 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9028 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9035 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9037 Worklist.
insert(&LoHalf);
9038 Worklist.
insert(&HiHalf);
9041 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9057 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9059 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9061 MachineOperand* Op0;
9062 MachineOperand* Op1;
9075 Register NewDest =
MRI.createVirtualRegister(DestRC);
9081 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9097 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9098 const TargetRegisterClass *SrcRC = Src.isReg() ?
9099 MRI.getRegClass(Src.getReg()) :
9100 &AMDGPU::SGPR_32RegClass;
9102 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9103 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9105 const TargetRegisterClass *SrcSubRC =
9106 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9109 AMDGPU::sub0, SrcSubRC);
9111 AMDGPU::sub1, SrcSubRC);
9117 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9121 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9140 Offset == 0 &&
"Not implemented");
9143 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9144 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9145 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9162 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9163 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9168 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9169 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9173 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9176 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9181 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9182 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9201 const MCInstrDesc &InstDesc =
get(Opcode);
9203 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9204 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9205 : AMDGPU::V_ADD_CO_U32_e32;
9207 const TargetRegisterClass *SrcRC =
9208 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9209 const TargetRegisterClass *SrcSubRC =
9210 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9212 MachineOperand SrcRegSub0 =
9214 MachineOperand SrcRegSub1 =
9217 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9218 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9219 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9220 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9227 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9233 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9235 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9237 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9240void SIInstrInfo::addUsersToMoveToVALUWorklist(
9244 MachineInstr &
UseMI = *MO.getParent();
9248 switch (
UseMI.getOpcode()) {
9251 case AMDGPU::SOFT_WQM:
9252 case AMDGPU::STRICT_WWM:
9253 case AMDGPU::STRICT_WQM:
9254 case AMDGPU::REG_SEQUENCE:
9256 case AMDGPU::INSERT_SUBREG:
9259 OpNo = MO.getOperandNo();
9264 MRI.constrainRegClass(DstReg, OpRC);
9266 if (!RI.hasVectorRegisters(OpRC))
9277 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9283 if (ST.useRealTrue16Insts()) {
9286 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9293 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9299 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9300 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9302 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9304 case AMDGPU::S_PACK_LL_B32_B16:
9306 .addReg(SrcReg0, {},
9307 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9308 .addImm(AMDGPU::lo16)
9309 .addReg(SrcReg1, {},
9310 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9311 .addImm(AMDGPU::hi16);
9313 case AMDGPU::S_PACK_LH_B32_B16:
9315 .addReg(SrcReg0, {},
9316 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9317 .addImm(AMDGPU::lo16)
9318 .addReg(SrcReg1, {}, AMDGPU::hi16)
9319 .addImm(AMDGPU::hi16);
9321 case AMDGPU::S_PACK_HL_B32_B16:
9322 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9323 .addImm(AMDGPU::lo16)
9324 .addReg(SrcReg1, {},
9325 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9326 .addImm(AMDGPU::hi16);
9328 case AMDGPU::S_PACK_HH_B32_B16:
9329 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9330 .addImm(AMDGPU::lo16)
9331 .addReg(SrcReg1, {}, AMDGPU::hi16)
9332 .addImm(AMDGPU::hi16);
9339 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9340 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9345 case AMDGPU::S_PACK_LL_B32_B16: {
9346 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9347 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9364 case AMDGPU::S_PACK_LH_B32_B16: {
9365 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9374 case AMDGPU::S_PACK_HL_B32_B16: {
9375 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9385 case AMDGPU::S_PACK_HH_B32_B16: {
9386 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9387 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9404 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9405 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9414 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9415 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9416 SmallVector<MachineInstr *, 4> CopyToDelete;
9419 for (MachineInstr &
MI :
9423 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9426 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9427 Register DestReg =
MI.getOperand(0).getReg();
9429 MRI.replaceRegWith(DestReg, NewCond);
9434 MI.getOperand(SCCIdx).setReg(NewCond);
9440 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9443 for (
auto &Copy : CopyToDelete)
9444 Copy->eraseFromParent();
9452void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9458 for (MachineInstr &
MI :
9461 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9463 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9472 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9480 case AMDGPU::REG_SEQUENCE:
9481 case AMDGPU::INSERT_SUBREG:
9483 case AMDGPU::SOFT_WQM:
9484 case AMDGPU::STRICT_WWM:
9485 case AMDGPU::STRICT_WQM: {
9487 if (RI.isAGPRClass(SrcRC)) {
9488 if (RI.isAGPRClass(NewDstRC))
9493 case AMDGPU::REG_SEQUENCE:
9494 case AMDGPU::INSERT_SUBREG:
9495 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9498 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9504 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9507 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9521 int OpIndices[3])
const {
9522 const MCInstrDesc &
Desc =
MI.getDesc();
9538 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9540 for (
unsigned i = 0; i < 3; ++i) {
9541 int Idx = OpIndices[i];
9545 const MachineOperand &MO =
MI.getOperand(Idx);
9551 const TargetRegisterClass *OpRC =
9552 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9553 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9559 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9560 if (RI.isSGPRClass(RegRC))
9578 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9579 SGPRReg = UsedSGPRs[0];
9582 if (!SGPRReg && UsedSGPRs[1]) {
9583 if (UsedSGPRs[1] == UsedSGPRs[2])
9584 SGPRReg = UsedSGPRs[1];
9591 AMDGPU::OpName OperandName)
const {
9592 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9595 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9599 return &
MI.getOperand(Idx);
9613 if (ST.isAmdHsaOS()) {
9616 RsrcDataFormat |= (1ULL << 56);
9621 RsrcDataFormat |= (2ULL << 59);
9624 return RsrcDataFormat;
9634 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9639 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9646 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9652 unsigned Opc =
MI.getOpcode();
9658 return get(
Opc).mayLoad() &&
9663 int &FrameIndex)
const {
9665 if (!Addr || !Addr->
isFI())
9676 int &FrameIndex)
const {
9684 int &FrameIndex)
const {
9698 int &FrameIndex)
const {
9715 while (++
I != E &&
I->isInsideBundle()) {
9716 assert(!
I->isBundle() &&
"No nested bundle!");
9724 unsigned Opc =
MI.getOpcode();
9726 unsigned DescSize =
Desc.getSize();
9731 unsigned Size = DescSize;
9735 if (
MI.isBranch() && ST.hasOffset3fBug())
9746 bool HasLiteral =
false;
9747 unsigned LiteralSize = 4;
9748 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9753 if (ST.has64BitLiterals()) {
9754 switch (OpInfo.OperandType) {
9770 return HasLiteral ? DescSize + LiteralSize : DescSize;
9775 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9779 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9780 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9784 case TargetOpcode::BUNDLE:
9786 case TargetOpcode::INLINEASM:
9787 case TargetOpcode::INLINEASM_BR: {
9789 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9793 if (
MI.isMetaInstruction())
9797 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9800 unsigned LoInstOpcode = D16Info->LoOp;
9802 DescSize =
Desc.getSize();
9806 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9809 DescSize =
Desc.getSize();
9820 if (
MI.memoperands_empty())
9832 static const std::pair<int, const char *> TargetIndices[] = {
9870std::pair<unsigned, unsigned>
9877 static const std::pair<unsigned, const char *> TargetFlags[] = {
9895 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9910 return AMDGPU::WWM_COPY;
9912 return AMDGPU::COPY;
9929 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9933 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9934 return IsLRSplitInst;
9947 bool IsNullOrVectorRegister =
true;
9951 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9954 return IsNullOrVectorRegister &&
9956 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
9957 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9965 if (ST.hasAddNoCarryInsts())
9969 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9970 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9981 if (ST.hasAddNoCarryInsts())
9985 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9987 : RS.scavengeRegisterBackwards(
9988 *RI.getBoolRC(),
I,
false,
10001 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10002 case AMDGPU::SI_KILL_I1_TERMINATOR:
10011 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10012 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10013 case AMDGPU::SI_KILL_I1_PSEUDO:
10014 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10026 const unsigned OffsetBits =
10028 return (1 << OffsetBits) - 1;
10032 if (!ST.isWave32())
10035 if (
MI.isInlineAsm())
10038 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10041 for (
auto &
Op :
MI.implicit_operands()) {
10042 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10043 Op.setReg(AMDGPU::VCC_LO);
10052 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10056 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10057 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10073 if (Imm > MaxImm) {
10074 if (Imm <= MaxImm + 64) {
10076 Overflow = Imm - MaxImm;
10095 if (Overflow > 0) {
10103 if (ST.hasRestrictedSOffset())
10108 SOffset = Overflow;
10146 if (!ST.hasFlatInstOffsets())
10154 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10166std::pair<int64_t, int64_t>
10169 int64_t RemainderOffset = COffsetVal;
10170 int64_t ImmField = 0;
10175 if (AllowNegative) {
10177 int64_t
D = 1LL << NumBits;
10178 RemainderOffset = (COffsetVal /
D) *
D;
10179 ImmField = COffsetVal - RemainderOffset;
10181 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10183 (ImmField % 4) != 0) {
10185 RemainderOffset += ImmField % 4;
10186 ImmField -= ImmField % 4;
10188 }
else if (COffsetVal >= 0) {
10190 RemainderOffset = COffsetVal - ImmField;
10194 assert(RemainderOffset + ImmField == COffsetVal);
10195 return {ImmField, RemainderOffset};
10199 if (ST.hasNegativeScratchOffsetBug() &&
10207 switch (ST.getGeneration()) {
10235 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10236 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10237 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10238 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10239 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10240 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10241 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10242 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10249#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10250 case OPCODE##_dpp: \
10251 case OPCODE##_e32: \
10252 case OPCODE##_e64: \
10253 case OPCODE##_e64_dpp: \
10254 case OPCODE##_sdwa:
10268 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10269 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10270 case AMDGPU::V_FMA_F16_gfx9_e64:
10271 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10272 case AMDGPU::V_INTERP_P2_F16:
10273 case AMDGPU::V_MAD_F16_e64:
10274 case AMDGPU::V_MAD_U16_e64:
10275 case AMDGPU::V_MAD_I16_e64:
10284 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10298 switch (ST.getGeneration()) {
10311 if (
isMAI(Opcode)) {
10319 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10326 if (ST.hasGFX90AInsts()) {
10328 if (ST.hasGFX940Insts())
10359 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10360 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10361 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10373 switch (
MI.getOpcode()) {
10375 case AMDGPU::REG_SEQUENCE:
10379 case AMDGPU::INSERT_SUBREG:
10380 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10397 if (!
P.Reg.isVirtual())
10401 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10402 while (
auto *
MI = DefInst) {
10404 switch (
MI->getOpcode()) {
10406 case AMDGPU::V_MOV_B32_e32: {
10407 auto &Op1 =
MI->getOperand(1);
10412 DefInst =
MRI.getVRegDef(RSR.Reg);
10420 DefInst =
MRI.getVRegDef(RSR.Reg);
10433 assert(
MRI.isSSA() &&
"Must be run on SSA");
10435 auto *
TRI =
MRI.getTargetRegisterInfo();
10436 auto *DefBB =
DefMI.getParent();
10440 if (
UseMI.getParent() != DefBB)
10443 const int MaxInstScan = 20;
10447 auto E =
UseMI.getIterator();
10448 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10449 if (
I->isDebugInstr())
10452 if (++NumInst > MaxInstScan)
10455 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10465 assert(
MRI.isSSA() &&
"Must be run on SSA");
10467 auto *
TRI =
MRI.getTargetRegisterInfo();
10468 auto *DefBB =
DefMI.getParent();
10470 const int MaxUseScan = 10;
10473 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10474 auto &UseInst = *
Use.getParent();
10477 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10480 if (++NumUse > MaxUseScan)
10487 const int MaxInstScan = 20;
10491 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10494 if (
I->isDebugInstr())
10497 if (++NumInst > MaxInstScan)
10510 if (Reg == VReg && --NumUse == 0)
10512 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10521 auto Cur =
MBB.begin();
10522 if (Cur !=
MBB.end())
10524 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10527 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10536 if (InsPt !=
MBB.end() &&
10537 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10538 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10539 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10540 InsPt->definesRegister(Src,
nullptr)) {
10544 .
addReg(Src, {}, SrcSubReg)
10569 if (isFullCopyInstr(
MI)) {
10570 Register DstReg =
MI.getOperand(0).getReg();
10571 Register SrcReg =
MI.getOperand(1).getReg();
10578 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10582 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10593 unsigned *PredCost)
const {
10594 if (
MI.isBundle()) {
10597 unsigned Lat = 0,
Count = 0;
10598 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10600 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10602 return Lat +
Count - 1;
10605 return SchedModel.computeInstrLatency(&
MI);
10612 return *CallAddrOp;
10619 unsigned Opcode =
MI.getOpcode();
10624 :
MI.getOperand(1).getReg();
10625 LLT DstTy =
MRI.getType(Dst);
10626 LLT SrcTy =
MRI.getType(Src);
10628 unsigned SrcAS = SrcTy.getAddressSpace();
10631 ST.hasGloballyAddressableScratch()
10639 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10640 return HandleAddrSpaceCast(
MI);
10643 auto IID = GI->getIntrinsicID();
10650 case Intrinsic::amdgcn_addrspacecast_nonnull:
10651 return HandleAddrSpaceCast(
MI);
10652 case Intrinsic::amdgcn_if:
10653 case Intrinsic::amdgcn_else:
10667 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10668 Opcode == AMDGPU::G_SEXTLOAD) {
10669 if (
MI.memoperands_empty())
10673 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10674 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10682 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10683 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10684 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10693 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10694 return Formatter.get();
10703 unsigned opcode =
MI.getOpcode();
10704 if (opcode == AMDGPU::V_READLANE_B32 ||
10705 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10706 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10709 if (isCopyInstr(
MI)) {
10713 RI.getPhysRegBaseClass(srcOp.
getReg());
10721 if (
MI.isPreISelOpcode())
10736 if (
MI.memoperands_empty())
10740 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10741 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10756 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10758 if (!
SrcOp.isReg())
10762 if (!Reg || !
SrcOp.readsReg())
10768 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10795 F,
"ds_ordered_count unsupported for this calling conv"));
10809 Register &SrcReg2, int64_t &CmpMask,
10810 int64_t &CmpValue)
const {
10811 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10814 switch (
MI.getOpcode()) {
10817 case AMDGPU::S_CMP_EQ_U32:
10818 case AMDGPU::S_CMP_EQ_I32:
10819 case AMDGPU::S_CMP_LG_U32:
10820 case AMDGPU::S_CMP_LG_I32:
10821 case AMDGPU::S_CMP_LT_U32:
10822 case AMDGPU::S_CMP_LT_I32:
10823 case AMDGPU::S_CMP_GT_U32:
10824 case AMDGPU::S_CMP_GT_I32:
10825 case AMDGPU::S_CMP_LE_U32:
10826 case AMDGPU::S_CMP_LE_I32:
10827 case AMDGPU::S_CMP_GE_U32:
10828 case AMDGPU::S_CMP_GE_I32:
10829 case AMDGPU::S_CMP_EQ_U64:
10830 case AMDGPU::S_CMP_LG_U64:
10831 SrcReg =
MI.getOperand(0).getReg();
10832 if (
MI.getOperand(1).isReg()) {
10833 if (
MI.getOperand(1).getSubReg())
10835 SrcReg2 =
MI.getOperand(1).getReg();
10837 }
else if (
MI.getOperand(1).isImm()) {
10839 CmpValue =
MI.getOperand(1).getImm();
10845 case AMDGPU::S_CMPK_EQ_U32:
10846 case AMDGPU::S_CMPK_EQ_I32:
10847 case AMDGPU::S_CMPK_LG_U32:
10848 case AMDGPU::S_CMPK_LG_I32:
10849 case AMDGPU::S_CMPK_LT_U32:
10850 case AMDGPU::S_CMPK_LT_I32:
10851 case AMDGPU::S_CMPK_GT_U32:
10852 case AMDGPU::S_CMPK_GT_I32:
10853 case AMDGPU::S_CMPK_LE_U32:
10854 case AMDGPU::S_CMPK_LE_I32:
10855 case AMDGPU::S_CMPK_GE_U32:
10856 case AMDGPU::S_CMPK_GE_I32:
10857 SrcReg =
MI.getOperand(0).getReg();
10859 CmpValue =
MI.getOperand(1).getImm();
10869 if (S->isLiveIn(AMDGPU::SCC))
10878bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10881 bool SCCIsDead =
false;
10884 constexpr unsigned ScanLimit = 12;
10885 unsigned Count = 0;
10886 for (MachineInstr &
MI :
10888 if (++
Count > ScanLimit)
10890 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10891 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10892 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10893 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10894 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10899 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10912 for (MachineInstr *
MI : InvertInstr) {
10913 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10914 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10916 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10917 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10918 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10919 ? AMDGPU::S_CBRANCH_SCC1
10920 : AMDGPU::S_CBRANCH_SCC0));
10933 bool NeedInversion)
const {
10934 MachineInstr *KillsSCC =
nullptr;
10939 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10941 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10944 if (NeedInversion && !invertSCCUse(SCCRedefine))
10946 if (MachineOperand *SccDef =
10948 SccDef->setIsDead(
false);
10956 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10957 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10959 bool Op1IsNonZeroImm =
10960 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10961 bool Op2IsZeroImm =
10962 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10963 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10969 unsigned &NewDefOpc) {
10972 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
10973 Def.getOpcode() != AMDGPU::S_ADD_U32)
10979 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
10985 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
10987 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10990 NewDefOpc = AMDGPU::S_ADD_U32;
10992 NeedInversion = !NeedInversion;
10997 Register SrcReg2, int64_t CmpMask,
11006 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
11007 this](
bool NeedInversion) ->
bool {
11031 unsigned NewDefOpc = Def->getOpcode();
11037 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11040 if (NewDefOpc != Def->getOpcode())
11041 Def->setDesc(
get(NewDefOpc));
11050 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11051 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11057 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11065 optimizeSCC(
Select, Def,
false);
11072 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11073 this](int64_t ExpectedValue,
unsigned SrcSize,
11074 bool IsReversible,
bool IsSigned) ->
bool {
11102 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11103 Def->getOpcode() != AMDGPU::S_AND_B64)
11107 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11118 SrcOp = &Def->getOperand(2);
11119 else if (isMask(&Def->getOperand(2)))
11120 SrcOp = &Def->getOperand(1);
11128 if (IsSigned && BitNo == SrcSize - 1)
11131 ExpectedValue <<= BitNo;
11133 bool IsReversedCC =
false;
11134 if (CmpValue != ExpectedValue) {
11137 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11142 Register DefReg = Def->getOperand(0).getReg();
11143 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11146 if (!optimizeSCC(Def, &CmpInstr,
false))
11149 if (!
MRI->use_nodbg_empty(DefReg)) {
11157 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11158 : AMDGPU::S_BITCMP1_B32
11159 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11160 : AMDGPU::S_BITCMP1_B64;
11165 Def->eraseFromParent();
11173 case AMDGPU::S_CMP_EQ_U32:
11174 case AMDGPU::S_CMP_EQ_I32:
11175 case AMDGPU::S_CMPK_EQ_U32:
11176 case AMDGPU::S_CMPK_EQ_I32:
11177 return optimizeCmpAnd(1, 32,
true,
false) ||
11178 optimizeCmpSelect(
true);
11179 case AMDGPU::S_CMP_GE_U32:
11180 case AMDGPU::S_CMPK_GE_U32:
11181 return optimizeCmpAnd(1, 32,
false,
false);
11182 case AMDGPU::S_CMP_GE_I32:
11183 case AMDGPU::S_CMPK_GE_I32:
11184 return optimizeCmpAnd(1, 32,
false,
true);
11185 case AMDGPU::S_CMP_EQ_U64:
11186 return optimizeCmpAnd(1, 64,
true,
false);
11187 case AMDGPU::S_CMP_LG_U32:
11188 case AMDGPU::S_CMP_LG_I32:
11189 case AMDGPU::S_CMPK_LG_U32:
11190 case AMDGPU::S_CMPK_LG_I32:
11191 return optimizeCmpAnd(0, 32,
true,
false) ||
11192 optimizeCmpSelect(
false);
11193 case AMDGPU::S_CMP_GT_U32:
11194 case AMDGPU::S_CMPK_GT_U32:
11195 return optimizeCmpAnd(0, 32,
false,
false);
11196 case AMDGPU::S_CMP_GT_I32:
11197 case AMDGPU::S_CMPK_GT_I32:
11198 return optimizeCmpAnd(0, 32,
false,
true);
11199 case AMDGPU::S_CMP_LG_U64:
11200 return optimizeCmpAnd(0, 64,
true,
false) ||
11201 optimizeCmpSelect(
false);
11208 AMDGPU::OpName
OpName)
const {
11209 if (!ST.needsAlignedVGPRs())
11212 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11224 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11226 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11229 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11230 : &AMDGPU::VReg_64_Align2RegClass);
11232 .
addReg(DataReg, {},
Op.getSubReg())
11237 Op.setSubReg(AMDGPU::sub0);
11252 if (ST.hasGFX1250Insts())
11259 unsigned Opcode =
MI.getOpcode();
11265 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11266 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11269 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.