33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO:
1330 case AMDGPU::V_MOV_B16_t16_e32: {
1334 return MI.getOperand(0).getReg() == Reg;
1339 case AMDGPU::V_MOV_B16_t16_e64: {
1341 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_BREV_B32:
1349 case AMDGPU::V_BFREV_B32_e32:
1350 case AMDGPU::V_BFREV_B32_e64: {
1354 return MI.getOperand(0).getReg() == Reg;
1359 case AMDGPU::S_NOT_B32:
1360 case AMDGPU::V_NOT_B32_e32:
1361 case AMDGPU::V_NOT_B32_e64: {
1364 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1365 return MI.getOperand(0).getReg() == Reg;
1377 if (RI.isAGPRClass(DstRC))
1378 return AMDGPU::COPY;
1379 if (RI.getRegSizeInBits(*DstRC) == 16) {
1382 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1384 if (RI.getRegSizeInBits(*DstRC) == 32)
1385 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1386 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1387 return AMDGPU::S_MOV_B64;
1388 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1389 return AMDGPU::V_MOV_B64_PSEUDO;
1390 return AMDGPU::COPY;
1395 bool IsIndirectSrc)
const {
1396 if (IsIndirectSrc) {
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1455 if (VecSize <= 1024)
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1488 if (VecSize <= 1024)
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1536 if (VecSize <= 1024)
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1544 bool IsSGPR)
const {
1556 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1563 return AMDGPU::SI_SPILL_S32_SAVE;
1565 return AMDGPU::SI_SPILL_S64_SAVE;
1567 return AMDGPU::SI_SPILL_S96_SAVE;
1569 return AMDGPU::SI_SPILL_S128_SAVE;
1571 return AMDGPU::SI_SPILL_S160_SAVE;
1573 return AMDGPU::SI_SPILL_S192_SAVE;
1575 return AMDGPU::SI_SPILL_S224_SAVE;
1577 return AMDGPU::SI_SPILL_S256_SAVE;
1579 return AMDGPU::SI_SPILL_S288_SAVE;
1581 return AMDGPU::SI_SPILL_S320_SAVE;
1583 return AMDGPU::SI_SPILL_S352_SAVE;
1585 return AMDGPU::SI_SPILL_S384_SAVE;
1587 return AMDGPU::SI_SPILL_S512_SAVE;
1589 return AMDGPU::SI_SPILL_S1024_SAVE;
1598 return AMDGPU::SI_SPILL_V16_SAVE;
1600 return AMDGPU::SI_SPILL_V32_SAVE;
1602 return AMDGPU::SI_SPILL_V64_SAVE;
1604 return AMDGPU::SI_SPILL_V96_SAVE;
1606 return AMDGPU::SI_SPILL_V128_SAVE;
1608 return AMDGPU::SI_SPILL_V160_SAVE;
1610 return AMDGPU::SI_SPILL_V192_SAVE;
1612 return AMDGPU::SI_SPILL_V224_SAVE;
1614 return AMDGPU::SI_SPILL_V256_SAVE;
1616 return AMDGPU::SI_SPILL_V288_SAVE;
1618 return AMDGPU::SI_SPILL_V320_SAVE;
1620 return AMDGPU::SI_SPILL_V352_SAVE;
1622 return AMDGPU::SI_SPILL_V384_SAVE;
1624 return AMDGPU::SI_SPILL_V512_SAVE;
1626 return AMDGPU::SI_SPILL_V1024_SAVE;
1635 return AMDGPU::SI_SPILL_AV32_SAVE;
1637 return AMDGPU::SI_SPILL_AV64_SAVE;
1639 return AMDGPU::SI_SPILL_AV96_SAVE;
1641 return AMDGPU::SI_SPILL_AV128_SAVE;
1643 return AMDGPU::SI_SPILL_AV160_SAVE;
1645 return AMDGPU::SI_SPILL_AV192_SAVE;
1647 return AMDGPU::SI_SPILL_AV224_SAVE;
1649 return AMDGPU::SI_SPILL_AV256_SAVE;
1651 return AMDGPU::SI_SPILL_AV288_SAVE;
1653 return AMDGPU::SI_SPILL_AV320_SAVE;
1655 return AMDGPU::SI_SPILL_AV352_SAVE;
1657 return AMDGPU::SI_SPILL_AV384_SAVE;
1659 return AMDGPU::SI_SPILL_AV512_SAVE;
1661 return AMDGPU::SI_SPILL_AV1024_SAVE;
1668 bool IsVectorSuperClass) {
1673 if (IsVectorSuperClass)
1674 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1676 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1682 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1689 if (ST.hasMAIInsts())
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize = RI.getSpillSize(*RC);
1712 if (RI.isSGPRClass(RC)) {
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1734 if (RI.spillSGPRToVGPR())
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1873 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1880 if (ST.hasMAIInsts())
1883 assert(!RI.isAGPRClass(RC));
1897 unsigned SpillSize = RI.getSpillSize(*RC);
1904 FrameInfo.getObjectAlign(FrameIndex));
1906 if (RI.isSGPRClass(RC)) {
1908 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1909 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1910 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1915 if (DestReg.
isVirtual() && SpillSize == 4) {
1917 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1920 if (RI.spillSGPRToVGPR())
1946 unsigned Quantity)
const {
1948 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, MaxSNopCount);
1957 auto *MF =
MBB.getParent();
1960 assert(Info->isEntryFunction());
1962 if (
MBB.succ_empty()) {
1963 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1964 if (HasNoTerminator) {
1965 if (Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1986 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1987 ContBB =
MBB.splitAt(
MI,
false);
1991 MBB.addSuccessor(TrapBB);
1995 ContBB = HaltLoopBB;
2002 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2006 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2009 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2010 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2014 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2015 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2016 .
addUse(DoorbellRegMasked)
2017 .
addImm(ECQueueWaveAbort);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2019 .
addUse(SetWaveAbortBit);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2037 switch (
MI.getOpcode()) {
2039 if (
MI.isMetaInstruction())
2044 return MI.getOperand(0).getImm() + 1;
2054 switch (
MI.getOpcode()) {
2056 case AMDGPU::S_MOV_B64_term:
2059 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2062 case AMDGPU::S_MOV_B32_term:
2065 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2068 case AMDGPU::S_XOR_B64_term:
2071 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2074 case AMDGPU::S_XOR_B32_term:
2077 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2079 case AMDGPU::S_OR_B64_term:
2082 MI.setDesc(
get(AMDGPU::S_OR_B64));
2084 case AMDGPU::S_OR_B32_term:
2087 MI.setDesc(
get(AMDGPU::S_OR_B32));
2090 case AMDGPU::S_ANDN2_B64_term:
2093 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2096 case AMDGPU::S_ANDN2_B32_term:
2099 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2102 case AMDGPU::S_AND_B64_term:
2105 MI.setDesc(
get(AMDGPU::S_AND_B64));
2108 case AMDGPU::S_AND_B32_term:
2111 MI.setDesc(
get(AMDGPU::S_AND_B32));
2114 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2120 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2126 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2127 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2130 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2131 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2133 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2137 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2140 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2143 int64_t Imm =
MI.getOperand(1).getImm();
2145 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2146 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2153 MI.eraseFromParent();
2159 case AMDGPU::V_MOV_B64_PSEUDO: {
2161 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2162 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2167 if (ST.hasMovB64()) {
2168 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2173 if (
SrcOp.isImm()) {
2175 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2176 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2198 if (ST.hasPkMovB32() &&
2219 MI.eraseFromParent();
2222 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2226 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2230 if (ST.has64BitLiterals()) {
2231 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2237 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2242 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2243 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2245 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2246 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2253 MI.eraseFromParent();
2256 case AMDGPU::V_SET_INACTIVE_B32: {
2260 .
add(
MI.getOperand(3))
2261 .
add(
MI.getOperand(4))
2262 .
add(
MI.getOperand(1))
2263 .
add(
MI.getOperand(2))
2264 .
add(
MI.getOperand(5));
2265 MI.eraseFromParent();
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2276 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2277 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2278 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2279 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2280 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2281 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2294 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2295 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2296 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2297 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2298 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2299 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2300 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2304 if (RI.hasVGPRs(EltRC)) {
2305 Opc = AMDGPU::V_MOVRELD_B32_e32;
2307 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2308 : AMDGPU::S_MOVRELD_B32;
2313 bool IsUndef =
MI.getOperand(1).isUndef();
2314 unsigned SubReg =
MI.getOperand(3).getImm();
2315 assert(VecReg ==
MI.getOperand(1).getReg());
2320 .
add(
MI.getOperand(2))
2324 const int ImpDefIdx =
2326 const int ImpUseIdx = ImpDefIdx + 1;
2328 MI.eraseFromParent();
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2336 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2337 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2338 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2339 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2340 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2341 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2342 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2343 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2344 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2345 assert(ST.useVGPRIndexMode());
2347 bool IsUndef =
MI.getOperand(1).isUndef();
2356 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2360 .
add(
MI.getOperand(2))
2365 const int ImpDefIdx =
2367 const int ImpUseIdx = ImpDefIdx + 1;
2374 MI.eraseFromParent();
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2380 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2381 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2382 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2383 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2384 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2385 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2386 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2387 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2388 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2389 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2390 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2391 assert(ST.useVGPRIndexMode());
2394 bool IsUndef =
MI.getOperand(1).isUndef();
2412 MI.eraseFromParent();
2415 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2418 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2419 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2438 if (ST.hasGetPCZeroExtension()) {
2442 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2449 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2459 MI.eraseFromParent();
2462 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2472 Op.setOffset(
Op.getOffset() + 4);
2474 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2478 MI.eraseFromParent();
2481 case AMDGPU::ENTER_STRICT_WWM: {
2487 case AMDGPU::ENTER_STRICT_WQM: {
2494 MI.eraseFromParent();
2497 case AMDGPU::EXIT_STRICT_WWM:
2498 case AMDGPU::EXIT_STRICT_WQM: {
2504 case AMDGPU::SI_RETURN: {
2518 MI.eraseFromParent();
2522 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2523 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2524 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2527 case AMDGPU::S_GETPC_B64_pseudo:
2528 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2529 if (ST.hasGetPCZeroExtension()) {
2531 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2540 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2541 assert(ST.hasBF16PackedInsts());
2542 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2566 case AMDGPU::S_LOAD_DWORDX16_IMM:
2567 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2580 for (
auto &CandMO :
I->operands()) {
2581 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2589 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2593 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2597 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2599 unsigned NewOpcode = -1;
2600 if (SubregSize == 256)
2601 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2602 else if (SubregSize == 128)
2603 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2610 MRI.setRegClass(DestReg, NewRC);
2613 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2618 MI->getOperand(0).setReg(DestReg);
2619 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2623 OffsetMO->
setImm(FinalOffset);
2629 MI->setMemRefs(*MF, NewMMOs);
2642std::pair<MachineInstr*, MachineInstr*>
2644 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2646 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2649 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2650 return std::pair(&
MI,
nullptr);
2661 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2663 if (Dst.isPhysical()) {
2664 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2667 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2671 for (
unsigned I = 1;
I <= 2; ++
I) {
2674 if (
SrcOp.isImm()) {
2676 Imm.ashrInPlace(Part * 32);
2677 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2681 if (Src.isPhysical())
2682 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2689 MovDPP.addImm(MO.getImm());
2691 Split[Part] = MovDPP;
2695 if (Dst.isVirtual())
2702 MI.eraseFromParent();
2703 return std::pair(Split[0], Split[1]);
2706std::optional<DestSourcePair>
2708 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2711 return std::nullopt;
2715 AMDGPU::OpName Src0OpName,
2717 AMDGPU::OpName Src1OpName)
const {
2724 "All commutable instructions have both src0 and src1 modifiers");
2726 int Src0ModsVal = Src0Mods->
getImm();
2727 int Src1ModsVal = Src1Mods->
getImm();
2729 Src1Mods->
setImm(Src0ModsVal);
2730 Src0Mods->
setImm(Src1ModsVal);
2739 bool IsKill = RegOp.
isKill();
2741 bool IsUndef = RegOp.
isUndef();
2742 bool IsDebug = RegOp.
isDebug();
2744 if (NonRegOp.
isImm())
2746 else if (NonRegOp.
isFI())
2767 int64_t NonRegVal = NonRegOp1.
getImm();
2770 NonRegOp2.
setImm(NonRegVal);
2777 unsigned OpIdx1)
const {
2782 unsigned Opc =
MI.getOpcode();
2783 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2793 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2796 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2801 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2807 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2822 unsigned Src1Idx)
const {
2823 assert(!NewMI &&
"this should never be used");
2825 unsigned Opc =
MI.getOpcode();
2827 if (CommutedOpcode == -1)
2830 if (Src0Idx > Src1Idx)
2833 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2834 static_cast<int>(Src0Idx) &&
2835 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2836 static_cast<int>(Src1Idx) &&
2837 "inconsistency with findCommutedOpIndices");
2862 Src1, AMDGPU::OpName::src1_modifiers);
2865 AMDGPU::OpName::src1_sel);
2877 unsigned &SrcOpIdx0,
2878 unsigned &SrcOpIdx1)
const {
2883 unsigned &SrcOpIdx0,
2884 unsigned &SrcOpIdx1)
const {
2885 if (!
Desc.isCommutable())
2888 unsigned Opc =
Desc.getOpcode();
2889 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2893 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2897 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2901 int64_t BrOffset)
const {
2918 return MI.getOperand(0).getMBB();
2923 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2924 MI.getOpcode() == AMDGPU::SI_LOOP)
2936 "new block should be inserted for expanding unconditional branch");
2939 "restore block should be inserted for restoring clobbered registers");
2947 if (ST.useAddPC64Inst()) {
2949 MCCtx.createTempSymbol(
"offset",
true);
2953 MCCtx.createTempSymbol(
"post_addpc",
true);
2954 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2958 Offset->setVariableValue(OffsetExpr);
2962 assert(RS &&
"RegScavenger required for long branching");
2966 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2970 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2971 ST.hasVALUReadSGPRHazard();
2972 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2973 if (FlushSGPRWrites)
2981 ApplyHazardWorkarounds();
2984 MCCtx.createTempSymbol(
"post_getpc",
true);
2988 MCCtx.createTempSymbol(
"offset_lo",
true);
2990 MCCtx.createTempSymbol(
"offset_hi",
true);
2993 .
addReg(PCReg, 0, AMDGPU::sub0)
2997 .
addReg(PCReg, 0, AMDGPU::sub1)
2999 ApplyHazardWorkarounds();
3040 if (LongBranchReservedReg) {
3041 RS->enterBasicBlock(
MBB);
3042 Scav = LongBranchReservedReg;
3044 RS->enterBasicBlockEnd(
MBB);
3045 Scav = RS->scavengeRegisterBackwards(
3050 RS->setRegUsed(Scav);
3051 MRI.replaceRegWith(PCReg, Scav);
3052 MRI.clearVirtRegs();
3058 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3059 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3060 MRI.clearVirtRegs();
3075unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3077 case SIInstrInfo::SCC_TRUE:
3078 return AMDGPU::S_CBRANCH_SCC1;
3079 case SIInstrInfo::SCC_FALSE:
3080 return AMDGPU::S_CBRANCH_SCC0;
3081 case SIInstrInfo::VCCNZ:
3082 return AMDGPU::S_CBRANCH_VCCNZ;
3083 case SIInstrInfo::VCCZ:
3084 return AMDGPU::S_CBRANCH_VCCZ;
3085 case SIInstrInfo::EXECNZ:
3086 return AMDGPU::S_CBRANCH_EXECNZ;
3087 case SIInstrInfo::EXECZ:
3088 return AMDGPU::S_CBRANCH_EXECZ;
3094SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3096 case AMDGPU::S_CBRANCH_SCC0:
3098 case AMDGPU::S_CBRANCH_SCC1:
3100 case AMDGPU::S_CBRANCH_VCCNZ:
3102 case AMDGPU::S_CBRANCH_VCCZ:
3104 case AMDGPU::S_CBRANCH_EXECNZ:
3106 case AMDGPU::S_CBRANCH_EXECZ:
3118 bool AllowModify)
const {
3119 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3121 TBB =
I->getOperand(0).getMBB();
3125 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3126 if (Pred == INVALID_BR)
3131 Cond.push_back(
I->getOperand(1));
3135 if (
I ==
MBB.end()) {
3141 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3143 FBB =
I->getOperand(0).getMBB();
3153 bool AllowModify)
const {
3161 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3162 switch (
I->getOpcode()) {
3163 case AMDGPU::S_MOV_B64_term:
3164 case AMDGPU::S_XOR_B64_term:
3165 case AMDGPU::S_OR_B64_term:
3166 case AMDGPU::S_ANDN2_B64_term:
3167 case AMDGPU::S_AND_B64_term:
3168 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3169 case AMDGPU::S_MOV_B32_term:
3170 case AMDGPU::S_XOR_B32_term:
3171 case AMDGPU::S_OR_B32_term:
3172 case AMDGPU::S_ANDN2_B32_term:
3173 case AMDGPU::S_AND_B32_term:
3174 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3177 case AMDGPU::SI_ELSE:
3178 case AMDGPU::SI_KILL_I1_TERMINATOR:
3179 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3196 int *BytesRemoved)
const {
3198 unsigned RemovedSize = 0;
3201 if (
MI.isBranch() ||
MI.isReturn()) {
3203 MI.eraseFromParent();
3209 *BytesRemoved = RemovedSize;
3226 int *BytesAdded)
const {
3227 if (!FBB &&
Cond.empty()) {
3231 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3238 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3250 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3268 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3275 if (
Cond.size() != 2) {
3279 if (
Cond[0].isImm()) {
3290 Register FalseReg,
int &CondCycles,
3291 int &TrueCycles,
int &FalseCycles)
const {
3297 if (
MRI.getRegClass(FalseReg) != RC)
3301 CondCycles = TrueCycles = FalseCycles = NumInsts;
3304 return RI.hasVGPRs(RC) && NumInsts <= 6;
3312 if (
MRI.getRegClass(FalseReg) != RC)
3318 if (NumInsts % 2 == 0)
3321 CondCycles = TrueCycles = FalseCycles = NumInsts;
3322 return RI.isSGPRClass(RC);
3333 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3334 if (Pred == VCCZ || Pred == SCC_FALSE) {
3335 Pred =
static_cast<BranchPredicate
>(-Pred);
3341 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3343 if (DstSize == 32) {
3345 if (Pred == SCC_TRUE) {
3360 if (DstSize == 64 && Pred == SCC_TRUE) {
3370 static const int16_t Sub0_15[] = {
3371 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3372 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3373 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3374 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3377 static const int16_t Sub0_15_64[] = {
3378 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3379 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3380 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3381 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3384 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3386 const int16_t *SubIndices = Sub0_15;
3387 int NElts = DstSize / 32;
3391 if (Pred == SCC_TRUE) {
3393 SelOp = AMDGPU::S_CSELECT_B32;
3394 EltRC = &AMDGPU::SGPR_32RegClass;
3396 SelOp = AMDGPU::S_CSELECT_B64;
3397 EltRC = &AMDGPU::SGPR_64RegClass;
3398 SubIndices = Sub0_15_64;
3404 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3409 for (
int Idx = 0; Idx != NElts; ++Idx) {
3410 Register DstElt =
MRI.createVirtualRegister(EltRC);
3413 unsigned SubIdx = SubIndices[Idx];
3416 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3419 .
addReg(FalseReg, 0, SubIdx)
3420 .
addReg(TrueReg, 0, SubIdx);
3424 .
addReg(TrueReg, 0, SubIdx)
3425 .
addReg(FalseReg, 0, SubIdx);
3437 switch (
MI.getOpcode()) {
3438 case AMDGPU::V_MOV_B16_t16_e32:
3439 case AMDGPU::V_MOV_B16_t16_e64:
3440 case AMDGPU::V_MOV_B32_e32:
3441 case AMDGPU::V_MOV_B32_e64:
3442 case AMDGPU::V_MOV_B64_PSEUDO:
3443 case AMDGPU::V_MOV_B64_e32:
3444 case AMDGPU::V_MOV_B64_e64:
3445 case AMDGPU::S_MOV_B32:
3446 case AMDGPU::S_MOV_B64:
3447 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3449 case AMDGPU::WWM_COPY:
3450 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3451 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3452 case AMDGPU::V_ACCVGPR_MOV_B32:
3453 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3454 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3462 switch (
MI.getOpcode()) {
3463 case AMDGPU::V_MOV_B16_t16_e32:
3464 case AMDGPU::V_MOV_B16_t16_e64:
3466 case AMDGPU::V_MOV_B32_e32:
3467 case AMDGPU::V_MOV_B32_e64:
3468 case AMDGPU::V_MOV_B64_PSEUDO:
3469 case AMDGPU::V_MOV_B64_e32:
3470 case AMDGPU::V_MOV_B64_e64:
3471 case AMDGPU::S_MOV_B32:
3472 case AMDGPU::S_MOV_B64:
3473 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3475 case AMDGPU::WWM_COPY:
3476 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3477 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3478 case AMDGPU::V_ACCVGPR_MOV_B32:
3479 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3480 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3488 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3489 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3490 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3493 unsigned Opc =
MI.getOpcode();
3495 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3497 MI.removeOperand(Idx);
3503 MI.setDesc(NewDesc);
3509 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3510 Desc.implicit_defs().size();
3512 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3513 MI.removeOperand(
I);
3517 unsigned SubRegIndex) {
3518 switch (SubRegIndex) {
3519 case AMDGPU::NoSubRegister:
3529 case AMDGPU::sub1_lo16:
3531 case AMDGPU::sub1_hi16:
3534 return std::nullopt;
3542 case AMDGPU::V_MAC_F16_e32:
3543 case AMDGPU::V_MAC_F16_e64:
3544 case AMDGPU::V_MAD_F16_e64:
3545 return AMDGPU::V_MADAK_F16;
3546 case AMDGPU::V_MAC_F32_e32:
3547 case AMDGPU::V_MAC_F32_e64:
3548 case AMDGPU::V_MAD_F32_e64:
3549 return AMDGPU::V_MADAK_F32;
3550 case AMDGPU::V_FMAC_F32_e32:
3551 case AMDGPU::V_FMAC_F32_e64:
3552 case AMDGPU::V_FMA_F32_e64:
3553 return AMDGPU::V_FMAAK_F32;
3554 case AMDGPU::V_FMAC_F16_e32:
3555 case AMDGPU::V_FMAC_F16_e64:
3556 case AMDGPU::V_FMAC_F16_t16_e64:
3557 case AMDGPU::V_FMAC_F16_fake16_e64:
3558 case AMDGPU::V_FMAC_F16_t16_e32:
3559 case AMDGPU::V_FMAC_F16_fake16_e32:
3560 case AMDGPU::V_FMA_F16_e64:
3561 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3562 ? AMDGPU::V_FMAAK_F16_t16
3563 : AMDGPU::V_FMAAK_F16_fake16
3564 : AMDGPU::V_FMAAK_F16;
3565 case AMDGPU::V_FMAC_F64_e32:
3566 case AMDGPU::V_FMAC_F64_e64:
3567 case AMDGPU::V_FMA_F64_e64:
3568 return AMDGPU::V_FMAAK_F64;
3576 case AMDGPU::V_MAC_F16_e32:
3577 case AMDGPU::V_MAC_F16_e64:
3578 case AMDGPU::V_MAD_F16_e64:
3579 return AMDGPU::V_MADMK_F16;
3580 case AMDGPU::V_MAC_F32_e32:
3581 case AMDGPU::V_MAC_F32_e64:
3582 case AMDGPU::V_MAD_F32_e64:
3583 return AMDGPU::V_MADMK_F32;
3584 case AMDGPU::V_FMAC_F32_e32:
3585 case AMDGPU::V_FMAC_F32_e64:
3586 case AMDGPU::V_FMA_F32_e64:
3587 return AMDGPU::V_FMAMK_F32;
3588 case AMDGPU::V_FMAC_F16_e32:
3589 case AMDGPU::V_FMAC_F16_e64:
3590 case AMDGPU::V_FMAC_F16_t16_e64:
3591 case AMDGPU::V_FMAC_F16_fake16_e64:
3592 case AMDGPU::V_FMAC_F16_t16_e32:
3593 case AMDGPU::V_FMAC_F16_fake16_e32:
3594 case AMDGPU::V_FMA_F16_e64:
3595 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3596 ? AMDGPU::V_FMAMK_F16_t16
3597 : AMDGPU::V_FMAMK_F16_fake16
3598 : AMDGPU::V_FMAMK_F16;
3599 case AMDGPU::V_FMAC_F64_e32:
3600 case AMDGPU::V_FMAC_F64_e64:
3601 case AMDGPU::V_FMA_F64_e64:
3602 return AMDGPU::V_FMAMK_F64;
3614 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3616 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3619 if (
Opc == AMDGPU::COPY) {
3620 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3627 if (HasMultipleUses) {
3630 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3633 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3641 if (ImmDefSize == 32 &&
3646 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3647 RI.getSubRegIdxSize(UseSubReg) == 16;
3650 if (RI.hasVGPRs(DstRC))
3653 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3659 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3666 for (
unsigned MovOp :
3667 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3668 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3676 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3680 if (MovDstPhysReg) {
3684 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3691 if (MovDstPhysReg) {
3692 if (!MovDstRC->
contains(MovDstPhysReg))
3694 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3708 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3716 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3720 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3722 UseMI.getOperand(0).setReg(MovDstPhysReg);
3727 UseMI.setDesc(NewMCID);
3728 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3729 UseMI.addImplicitDefUseOperands(*MF);
3733 if (HasMultipleUses)
3736 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3737 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3738 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3739 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3740 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3741 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3742 Opc == AMDGPU::V_FMAC_F64_e64) {
3751 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3766 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3767 if (!RegSrc->
isReg())
3769 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3770 ST.getConstantBusLimit(
Opc) < 2)
3773 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3785 if (Def && Def->isMoveImmediate() &&
3800 unsigned SrcSubReg = RegSrc->
getSubReg();
3805 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3806 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3807 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3808 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3809 UseMI.untieRegOperand(
3810 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3817 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3818 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3820 auto Tmp =
MRI->createVirtualRegister(NewRC);
3822 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3823 UseMI.getOperand(0).getReg())
3825 UseMI.getOperand(0).setReg(Tmp);
3826 if (
UseMI.getOperand(1).isReg() &&
3827 RI.isVGPR(*
MRI,
UseMI.getOperand(1).getReg())) {
3828 auto Tmp =
MRI->createVirtualRegister(NewRC);
3830 get(AMDGPU::COPY), Tmp)
3832 UseMI.getOperand(1).setReg(Tmp);
3833 UseMI.getOperand(1).setIsKill();
3835 if (
UseMI.getOperand(3).isReg() &&
3836 RI.isVGPR(*
MRI,
UseMI.getOperand(3).getReg())) {
3837 auto Tmp =
MRI->createVirtualRegister(NewRC);
3839 get(AMDGPU::COPY), Tmp)
3841 UseMI.getOperand(3).setReg(Tmp);
3842 UseMI.getOperand(3).setIsKill();
3846 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3848 DefMI.eraseFromParent();
3855 if (ST.getConstantBusLimit(
Opc) < 2) {
3858 bool Src0Inlined =
false;
3859 if (Src0->
isReg()) {
3864 if (Def && Def->isMoveImmediate() &&
3869 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3876 if (Src1->
isReg() && !Src0Inlined) {
3879 if (Def && Def->isMoveImmediate() &&
3881 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3883 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3896 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3897 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3898 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3899 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3900 UseMI.untieRegOperand(
3901 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3903 const std::optional<int64_t> SubRegImm =
3913 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3914 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3916 auto Tmp =
MRI->createVirtualRegister(NewRC);
3918 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3919 UseMI.getOperand(0).getReg())
3921 UseMI.getOperand(0).setReg(Tmp);
3922 if (
UseMI.getOperand(1).isReg() &&
3923 RI.isVGPR(*
MRI,
UseMI.getOperand(1).getReg())) {
3924 auto Tmp =
MRI->createVirtualRegister(NewRC);
3926 get(AMDGPU::COPY), Tmp)
3928 UseMI.getOperand(1).setReg(Tmp);
3929 UseMI.getOperand(1).setIsKill();
3931 if (
UseMI.getOperand(2).isReg() &&
3932 RI.isVGPR(*
MRI,
UseMI.getOperand(2).getReg())) {
3933 auto Tmp =
MRI->createVirtualRegister(NewRC);
3935 get(AMDGPU::COPY), Tmp)
3937 UseMI.getOperand(2).setReg(Tmp);
3938 UseMI.getOperand(2).setIsKill();
3947 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3949 DefMI.eraseFromParent();
3961 if (BaseOps1.
size() != BaseOps2.
size())
3963 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3964 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3972 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3973 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3974 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3976 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3979bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3982 int64_t Offset0, Offset1;
3985 bool Offset0IsScalable, Offset1IsScalable;
3999 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4000 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4007 "MIa must load from or modify a memory location");
4009 "MIb must load from or modify a memory location");
4031 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4038 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4048 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4062 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4073 if (
Reg.isPhysical())
4075 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4077 Imm = Def->getOperand(1).getImm();
4097 unsigned NumOps =
MI.getNumOperands();
4100 if (
Op.isReg() &&
Op.isKill())
4108 case AMDGPU::V_MAC_F16_e32:
4109 case AMDGPU::V_MAC_F16_e64:
4110 return AMDGPU::V_MAD_F16_e64;
4111 case AMDGPU::V_MAC_F32_e32:
4112 case AMDGPU::V_MAC_F32_e64:
4113 return AMDGPU::V_MAD_F32_e64;
4114 case AMDGPU::V_MAC_LEGACY_F32_e32:
4115 case AMDGPU::V_MAC_LEGACY_F32_e64:
4116 return AMDGPU::V_MAD_LEGACY_F32_e64;
4117 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4118 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4119 return AMDGPU::V_FMA_LEGACY_F32_e64;
4120 case AMDGPU::V_FMAC_F16_e32:
4121 case AMDGPU::V_FMAC_F16_e64:
4122 case AMDGPU::V_FMAC_F16_t16_e64:
4123 case AMDGPU::V_FMAC_F16_fake16_e64:
4124 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4125 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4126 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4127 : AMDGPU::V_FMA_F16_gfx9_e64;
4128 case AMDGPU::V_FMAC_F32_e32:
4129 case AMDGPU::V_FMAC_F32_e64:
4130 return AMDGPU::V_FMA_F32_e64;
4131 case AMDGPU::V_FMAC_F64_e32:
4132 case AMDGPU::V_FMAC_F64_e64:
4133 return AMDGPU::V_FMA_F64_e64;
4153 if (
MI.isBundle()) {
4156 if (
MI.getBundleSize() != 1)
4158 CandidateMI =
MI.getNextNode();
4162 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4166 if (
MI.isBundle()) {
4171 MI.untieRegOperand(MO.getOperandNo());
4179 if (Def.isEarlyClobber() && Def.isReg() &&
4184 auto UpdateDefIndex = [&](
LiveRange &LR) {
4185 auto *S = LR.find(OldIndex);
4186 if (S != LR.end() && S->start == OldIndex) {
4187 assert(S->valno && S->valno->def == OldIndex);
4188 S->start = NewIndex;
4189 S->valno->def = NewIndex;
4193 for (
auto &SR : LI.subranges())
4199 if (U.RemoveMIUse) {
4202 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4204 if (
MRI.hasOneNonDBGUse(DefReg)) {
4206 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4207 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4208 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4209 U.RemoveMIUse->removeOperand(
I);
4214 if (
MI.isBundle()) {
4218 if (MO.isReg() && MO.getReg() == DefReg) {
4219 assert(MO.getSubReg() == 0 &&
4220 "tied sub-registers in bundles currently not supported");
4221 MI.removeOperand(MO.getOperandNo());
4236 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4238 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4239 MIOp.setIsUndef(
true);
4240 MIOp.setReg(DummyReg);
4244 if (
MI.isBundle()) {
4248 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4249 MIOp.setIsUndef(
true);
4250 MIOp.setReg(DummyReg);
4263 return MI.isBundle() ? &
MI : NewMI;
4268 ThreeAddressUpdates &U)
const {
4270 unsigned Opc =
MI.getOpcode();
4274 if (NewMFMAOpc != -1) {
4277 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4278 MIB.
add(
MI.getOperand(
I));
4286 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4291 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4292 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4293 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4297 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4298 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4299 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4300 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4301 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4302 bool Src0Literal =
false;
4307 case AMDGPU::V_MAC_F16_e64:
4308 case AMDGPU::V_FMAC_F16_e64:
4309 case AMDGPU::V_FMAC_F16_t16_e64:
4310 case AMDGPU::V_FMAC_F16_fake16_e64:
4311 case AMDGPU::V_MAC_F32_e64:
4312 case AMDGPU::V_MAC_LEGACY_F32_e64:
4313 case AMDGPU::V_FMAC_F32_e64:
4314 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4315 case AMDGPU::V_FMAC_F64_e64:
4317 case AMDGPU::V_MAC_F16_e32:
4318 case AMDGPU::V_FMAC_F16_e32:
4319 case AMDGPU::V_MAC_F32_e32:
4320 case AMDGPU::V_MAC_LEGACY_F32_e32:
4321 case AMDGPU::V_FMAC_F32_e32:
4322 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4323 case AMDGPU::V_FMAC_F64_e32: {
4324 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4325 AMDGPU::OpName::src0);
4326 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4337 MachineInstrBuilder MIB;
4340 const MachineOperand *Src0Mods =
4343 const MachineOperand *Src1Mods =
4346 const MachineOperand *Src2Mods =
4352 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4353 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4355 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4357 MachineInstr *
DefMI;
4393 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4409 if (Src0Literal && !ST.hasVOP3Literal())
4437 switch (
MI.getOpcode()) {
4438 case AMDGPU::S_SET_GPR_IDX_ON:
4439 case AMDGPU::S_SET_GPR_IDX_MODE:
4440 case AMDGPU::S_SET_GPR_IDX_OFF:
4458 if (
MI.isTerminator() ||
MI.isPosition())
4462 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4465 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4471 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4472 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4473 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4474 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4475 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4480 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4481 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4482 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4491 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4500 if (
MI.memoperands_empty())
4505 unsigned AS = Memop->getAddrSpace();
4506 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4507 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4508 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4509 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4524 if (
MI.memoperands_empty())
4533 unsigned AS = Memop->getAddrSpace();
4550 if (ST.isTgSplitEnabled())
4555 if (
MI.memoperands_empty())
4560 unsigned AS = Memop->getAddrSpace();
4576 unsigned Opcode =
MI.getOpcode();
4591 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4592 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4593 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4596 if (
MI.isCall() ||
MI.isInlineAsm())
4612 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4613 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4614 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4615 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4623 if (
MI.isMetaInstruction())
4627 if (
MI.isCopyLike()) {
4628 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4632 return MI.readsRegister(AMDGPU::EXEC, &RI);
4643 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4647 switch (Imm.getBitWidth()) {
4653 ST.hasInv2PiInlineImm());
4656 ST.hasInv2PiInlineImm());
4658 return ST.has16BitInsts() &&
4660 ST.hasInv2PiInlineImm());
4667 APInt IntImm = Imm.bitcastToAPInt();
4669 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4677 return ST.has16BitInsts() &&
4680 return ST.has16BitInsts() &&
4690 switch (OperandType) {
4700 int32_t Trunc =
static_cast<int32_t
>(Imm);
4740 int16_t Trunc =
static_cast<int16_t
>(Imm);
4741 return ST.has16BitInsts() &&
4750 int16_t Trunc =
static_cast<int16_t
>(Imm);
4751 return ST.has16BitInsts() &&
4802 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4808 return ST.hasVOP3Literal();
4812 int64_t ImmVal)
const {
4815 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4816 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4817 AMDGPU::OpName::src2))
4819 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4831 "unexpected imm-like operand kind");
4844 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4862 AMDGPU::OpName
OpName)
const {
4864 return Mods && Mods->
getImm();
4877 switch (
MI.getOpcode()) {
4878 default:
return false;
4880 case AMDGPU::V_ADDC_U32_e64:
4881 case AMDGPU::V_SUBB_U32_e64:
4882 case AMDGPU::V_SUBBREV_U32_e64: {
4890 case AMDGPU::V_MAC_F16_e64:
4891 case AMDGPU::V_MAC_F32_e64:
4892 case AMDGPU::V_MAC_LEGACY_F32_e64:
4893 case AMDGPU::V_FMAC_F16_e64:
4894 case AMDGPU::V_FMAC_F16_t16_e64:
4895 case AMDGPU::V_FMAC_F16_fake16_e64:
4896 case AMDGPU::V_FMAC_F32_e64:
4897 case AMDGPU::V_FMAC_F64_e64:
4898 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4904 case AMDGPU::V_CNDMASK_B32_e64:
4910 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4940 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4949 unsigned Op32)
const {
4963 Inst32.
add(
MI.getOperand(
I));
4967 int Idx =
MI.getNumExplicitDefs();
4969 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4974 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4996 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5004 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5007 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5008 AMDGPU::SReg_64RegClass.contains(Reg);
5014 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5026 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5036 switch (MO.getReg()) {
5038 case AMDGPU::VCC_LO:
5039 case AMDGPU::VCC_HI:
5041 case AMDGPU::FLAT_SCR:
5054 switch (
MI.getOpcode()) {
5055 case AMDGPU::V_READLANE_B32:
5056 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5057 case AMDGPU::V_WRITELANE_B32:
5058 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5065 if (
MI.isPreISelOpcode() ||
5066 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5081 if (
SubReg.getReg().isPhysical())
5084 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5095 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5096 ErrInfo =
"illegal copy from vector register to SGPR";
5114 if (!
MRI.isSSA() &&
MI.isCopy())
5115 return verifyCopy(
MI,
MRI, ErrInfo);
5117 if (SIInstrInfo::isGenericOpcode(Opcode))
5120 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5121 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5122 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5124 if (Src0Idx == -1) {
5126 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5127 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5128 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5129 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5134 if (!
Desc.isVariadic() &&
5135 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5136 ErrInfo =
"Instruction has wrong number of operands.";
5140 if (
MI.isInlineAsm()) {
5153 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5154 ErrInfo =
"inlineasm operand has incorrect register class.";
5162 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5163 ErrInfo =
"missing memory operand from image instruction.";
5168 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5171 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5172 "all fp values to integers.";
5177 int16_t RegClass = getOpRegClassID(OpInfo);
5179 switch (OpInfo.OperandType) {
5181 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5182 ErrInfo =
"Illegal immediate value for operand.";
5216 ErrInfo =
"Illegal immediate value for operand.";
5223 ErrInfo =
"Expected inline constant for operand.";
5238 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5239 ErrInfo =
"Expected immediate, but got non-immediate";
5248 if (OpInfo.isGenericType())
5263 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5265 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5267 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5268 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5275 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5276 ErrInfo =
"Subtarget requires even aligned vector registers";
5281 if (RegClass != -1) {
5282 if (Reg.isVirtual())
5287 ErrInfo =
"Operand has incorrect register class.";
5295 if (!ST.hasSDWA()) {
5296 ErrInfo =
"SDWA is not supported on this target";
5300 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5301 AMDGPU::OpName::dst_sel}) {
5305 int64_t Imm = MO->
getImm();
5307 ErrInfo =
"Invalid SDWA selection";
5312 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5314 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5319 if (!ST.hasSDWAScalar()) {
5321 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5322 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5329 "Only reg allowed as operands in SDWA instructions on GFX9+";
5335 if (!ST.hasSDWAOmod()) {
5338 if (OMod !=
nullptr &&
5340 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5345 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5346 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5347 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5348 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5351 unsigned Mods = Src0ModsMO->
getImm();
5354 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5360 if (
isVOPC(BasicOpcode)) {
5361 if (!ST.hasSDWASdst() && DstIdx != -1) {
5364 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5365 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5368 }
else if (!ST.hasSDWAOutModsVOPC()) {
5371 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5372 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5378 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5379 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5386 if (DstUnused && DstUnused->isImm() &&
5389 if (!Dst.isReg() || !Dst.isTied()) {
5390 ErrInfo =
"Dst register should have tied register";
5395 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5398 "Dst register should be tied to implicit use of preserved register";
5402 ErrInfo =
"Dst register should use same physical register as preserved";
5409 if (
isImage(Opcode) && !
MI.mayStore()) {
5421 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5429 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5433 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5434 if (RegCount > DstSize) {
5435 ErrInfo =
"Image instruction returns too many registers for dst "
5444 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5445 unsigned ConstantBusCount = 0;
5446 bool UsesLiteral =
false;
5449 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5453 LiteralVal = &
MI.getOperand(ImmIdx);
5462 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5473 }
else if (!MO.
isFI()) {
5480 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5490 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5491 return !RI.regsOverlap(SGPRUsed, SGPR);
5500 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5501 Opcode != AMDGPU::V_WRITELANE_B32) {
5502 ErrInfo =
"VOP* instruction violates constant bus restriction";
5506 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5507 ErrInfo =
"VOP3 instruction uses literal";
5514 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5515 unsigned SGPRCount = 0;
5518 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5526 if (MO.
getReg() != SGPRUsed)
5531 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5532 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5539 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5540 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5547 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5557 ErrInfo =
"ABS not allowed in VOP3B instructions";
5570 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5577 if (
Desc.isBranch()) {
5579 ErrInfo =
"invalid branch target for SOPK instruction";
5586 ErrInfo =
"invalid immediate for SOPK instruction";
5591 ErrInfo =
"invalid immediate for SOPK instruction";
5598 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5599 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5600 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5601 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5602 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5603 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5605 const unsigned StaticNumOps =
5606 Desc.getNumOperands() +
Desc.implicit_uses().size();
5607 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5613 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5614 ErrInfo =
"missing implicit register operands";
5620 if (!Dst->isUse()) {
5621 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5626 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5627 UseOpIdx != StaticNumOps + 1) {
5628 ErrInfo =
"movrel implicit operands should be tied";
5635 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5637 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5638 ErrInfo =
"src0 should be subreg of implicit vector use";
5646 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5647 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5653 if (
MI.mayStore() &&
5658 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5659 ErrInfo =
"scalar stores must use m0 as offset register";
5665 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5667 if (
Offset->getImm() != 0) {
5668 ErrInfo =
"subtarget does not support offsets in flat instructions";
5673 if (
isDS(
MI) && !ST.hasGDS()) {
5675 if (GDSOp && GDSOp->
getImm() != 0) {
5676 ErrInfo =
"GDS is not supported on this subtarget";
5684 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5685 AMDGPU::OpName::vaddr0);
5686 AMDGPU::OpName RSrcOpName =
5687 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5688 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5696 ErrInfo =
"dim is out of range";
5701 if (ST.hasR128A16()) {
5703 IsA16 = R128A16->
getImm() != 0;
5704 }
else if (ST.hasA16()) {
5706 IsA16 = A16->
getImm() != 0;
5709 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5711 unsigned AddrWords =
5714 unsigned VAddrWords;
5716 VAddrWords = RsrcIdx - VAddr0Idx;
5717 if (ST.hasPartialNSAEncoding() &&
5719 unsigned LastVAddrIdx = RsrcIdx - 1;
5720 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5728 if (VAddrWords != AddrWords) {
5730 <<
" but got " << VAddrWords <<
"\n");
5731 ErrInfo =
"bad vaddr size";
5741 unsigned DC = DppCt->
getImm();
5742 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5743 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5744 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5745 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5746 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5747 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5748 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5749 ErrInfo =
"Invalid dpp_ctrl value";
5752 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5754 ErrInfo =
"Invalid dpp_ctrl value: "
5755 "wavefront shifts are not supported on GFX10+";
5758 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5760 ErrInfo =
"Invalid dpp_ctrl value: "
5761 "broadcasts are not supported on GFX10+";
5764 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5766 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5767 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5768 !ST.hasGFX90AInsts()) {
5769 ErrInfo =
"Invalid dpp_ctrl value: "
5770 "row_newbroadcast/row_share is not supported before "
5774 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5775 ErrInfo =
"Invalid dpp_ctrl value: "
5776 "row_share and row_xmask are not supported before GFX10";
5781 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5784 ErrInfo =
"Invalid dpp_ctrl value: "
5785 "DP ALU dpp only support row_newbcast";
5792 AMDGPU::OpName DataName =
5793 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5799 if (ST.hasGFX90AInsts()) {
5800 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5801 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5802 ErrInfo =
"Invalid register class: "
5803 "vdata and vdst should be both VGPR or AGPR";
5806 if (
Data && Data2 &&
5808 ErrInfo =
"Invalid register class: "
5809 "both data operands should be VGPR or AGPR";
5813 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5815 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5816 ErrInfo =
"Invalid register class: "
5817 "agpr loads and stores not supported on this GPU";
5823 if (ST.needsAlignedVGPRs()) {
5824 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5829 if (Reg.isPhysical())
5830 return !(RI.getHWRegIndex(Reg) & 1);
5832 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5833 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5836 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5837 Opcode == AMDGPU::DS_GWS_BARRIER) {
5839 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5840 ErrInfo =
"Subtarget requires even aligned vector registers "
5841 "for DS_GWS instructions";
5847 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5848 ErrInfo =
"Subtarget requires even aligned vector registers "
5849 "for vaddr operand of image instructions";
5855 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5857 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5858 ErrInfo =
"Invalid register class: "
5859 "v_accvgpr_write with an SGPR is not supported on this GPU";
5864 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5867 ErrInfo =
"pseudo expects only physical SGPRs";
5874 if (!ST.hasScaleOffset()) {
5875 ErrInfo =
"Subtarget does not support offset scaling";
5879 ErrInfo =
"Instruction does not support offset scaling";
5888 for (
unsigned I = 0;
I < 3; ++
I) {
5894 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5895 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5897 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5898 &AMDGPU::SReg_64RegClass) ||
5899 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5900 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5912 switch (
MI.getOpcode()) {
5913 default:
return AMDGPU::INSTRUCTION_LIST_END;
5914 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5915 case AMDGPU::COPY:
return AMDGPU::COPY;
5916 case AMDGPU::PHI:
return AMDGPU::PHI;
5917 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5918 case AMDGPU::WQM:
return AMDGPU::WQM;
5919 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5920 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5921 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5922 case AMDGPU::S_MOV_B32: {
5924 return MI.getOperand(1).isReg() ||
5925 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5926 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5928 case AMDGPU::S_ADD_I32:
5929 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5930 case AMDGPU::S_ADDC_U32:
5931 return AMDGPU::V_ADDC_U32_e32;
5932 case AMDGPU::S_SUB_I32:
5933 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5936 case AMDGPU::S_ADD_U32:
5937 return AMDGPU::V_ADD_CO_U32_e32;
5938 case AMDGPU::S_SUB_U32:
5939 return AMDGPU::V_SUB_CO_U32_e32;
5940 case AMDGPU::S_ADD_U64_PSEUDO:
5941 return AMDGPU::V_ADD_U64_PSEUDO;
5942 case AMDGPU::S_SUB_U64_PSEUDO:
5943 return AMDGPU::V_SUB_U64_PSEUDO;
5944 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5945 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5946 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5947 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5948 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5949 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5950 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5951 case AMDGPU::S_XNOR_B32:
5952 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5953 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5954 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5955 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5956 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5957 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5958 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5959 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5960 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5961 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5962 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5963 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5964 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5965 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5966 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5967 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5968 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5969 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5970 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5971 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5972 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5973 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5974 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5975 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5976 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5977 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5978 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5979 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5980 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5981 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5982 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5983 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5984 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5985 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5986 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5987 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5988 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5989 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5990 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5991 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5992 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5993 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5994 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5995 case AMDGPU::S_CVT_F32_F16:
5996 case AMDGPU::S_CVT_HI_F32_F16:
5997 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5998 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5999 case AMDGPU::S_CVT_F16_F32:
6000 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6001 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6002 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6003 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6004 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6005 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6006 case AMDGPU::S_CEIL_F16:
6007 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6008 : AMDGPU::V_CEIL_F16_fake16_e64;
6009 case AMDGPU::S_FLOOR_F16:
6010 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6011 : AMDGPU::V_FLOOR_F16_fake16_e64;
6012 case AMDGPU::S_TRUNC_F16:
6013 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6014 : AMDGPU::V_TRUNC_F16_fake16_e64;
6015 case AMDGPU::S_RNDNE_F16:
6016 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6017 : AMDGPU::V_RNDNE_F16_fake16_e64;
6018 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6019 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6020 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6021 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6022 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6023 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6024 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6025 case AMDGPU::S_ADD_F16:
6026 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6027 : AMDGPU::V_ADD_F16_fake16_e64;
6028 case AMDGPU::S_SUB_F16:
6029 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6030 : AMDGPU::V_SUB_F16_fake16_e64;
6031 case AMDGPU::S_MIN_F16:
6032 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6033 : AMDGPU::V_MIN_F16_fake16_e64;
6034 case AMDGPU::S_MAX_F16:
6035 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6036 : AMDGPU::V_MAX_F16_fake16_e64;
6037 case AMDGPU::S_MINIMUM_F16:
6038 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6039 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6040 case AMDGPU::S_MAXIMUM_F16:
6041 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6042 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6043 case AMDGPU::S_MUL_F16:
6044 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6045 : AMDGPU::V_MUL_F16_fake16_e64;
6046 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6047 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6048 case AMDGPU::S_FMAC_F16:
6049 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6050 : AMDGPU::V_FMAC_F16_fake16_e64;
6051 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6052 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6053 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6054 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6055 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6056 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6057 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6058 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6059 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6060 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6061 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6062 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6063 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6064 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6065 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6066 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6067 case AMDGPU::S_CMP_LT_F16:
6068 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6069 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6070 case AMDGPU::S_CMP_EQ_F16:
6071 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6072 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6073 case AMDGPU::S_CMP_LE_F16:
6074 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6075 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6076 case AMDGPU::S_CMP_GT_F16:
6077 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6078 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6079 case AMDGPU::S_CMP_LG_F16:
6080 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6081 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6082 case AMDGPU::S_CMP_GE_F16:
6083 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6084 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6085 case AMDGPU::S_CMP_O_F16:
6086 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6087 : AMDGPU::V_CMP_O_F16_fake16_e64;
6088 case AMDGPU::S_CMP_U_F16:
6089 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6090 : AMDGPU::V_CMP_U_F16_fake16_e64;
6091 case AMDGPU::S_CMP_NGE_F16:
6092 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6093 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6094 case AMDGPU::S_CMP_NLG_F16:
6095 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6096 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6097 case AMDGPU::S_CMP_NGT_F16:
6098 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6099 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6100 case AMDGPU::S_CMP_NLE_F16:
6101 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6102 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6103 case AMDGPU::S_CMP_NEQ_F16:
6104 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6105 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6106 case AMDGPU::S_CMP_NLT_F16:
6107 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6108 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6109 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6110 case AMDGPU::V_S_EXP_F16_e64:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6112 : AMDGPU::V_EXP_F16_fake16_e64;
6113 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6114 case AMDGPU::V_S_LOG_F16_e64:
6115 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6116 : AMDGPU::V_LOG_F16_fake16_e64;
6117 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6118 case AMDGPU::V_S_RCP_F16_e64:
6119 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6120 : AMDGPU::V_RCP_F16_fake16_e64;
6121 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6122 case AMDGPU::V_S_RSQ_F16_e64:
6123 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6124 : AMDGPU::V_RSQ_F16_fake16_e64;
6125 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6126 case AMDGPU::V_S_SQRT_F16_e64:
6127 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6128 : AMDGPU::V_SQRT_F16_fake16_e64;
6131 "Unexpected scalar opcode without corresponding vector one!");
6180 "Not a whole wave func");
6183 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6184 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6191 unsigned OpNo)
const {
6193 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6194 Desc.operands()[OpNo].RegClass == -1) {
6197 if (Reg.isVirtual()) {
6199 return MRI.getRegClass(Reg);
6201 return RI.getPhysRegBaseClass(Reg);
6204 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6205 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6213 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6215 unsigned Size = RI.getRegSizeInBits(*RC);
6216 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6217 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6218 : AMDGPU::V_MOV_B32_e32;
6220 Opcode = AMDGPU::COPY;
6221 else if (RI.isSGPRClass(RC))
6222 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6236 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6242 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6253 if (SubIdx == AMDGPU::sub0)
6255 if (SubIdx == AMDGPU::sub1)
6267void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6283 if (Reg.isPhysical())
6293 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6296 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6303 unsigned Opc =
MI.getOpcode();
6309 constexpr AMDGPU::OpName OpNames[] = {
6310 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6313 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6314 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6324 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6325 if (IsAGPR && !ST.hasMAIInsts())
6327 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6331 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6332 const int DataIdx = AMDGPU::getNamedOperandIdx(
6333 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6334 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6335 MI.getOperand(DataIdx).isReg() &&
6336 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6338 if ((
int)
OpIdx == DataIdx) {
6339 if (VDstIdx != -1 &&
6340 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6343 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6344 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6345 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6350 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6351 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6355 if (ST.hasFlatScratchHiInB64InstHazard() &&
6362 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6383 constexpr unsigned NumOps = 3;
6384 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6385 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6386 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6387 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6392 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6395 MO = &
MI.getOperand(SrcIdx);
6402 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6406 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6410 return !OpSel && !OpSelHi;
6419 int64_t RegClass = getOpRegClassID(OpInfo);
6421 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6430 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6431 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6435 if (!LiteralLimit--)
6445 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6453 if (--ConstantBusLimit <= 0)
6465 if (!LiteralLimit--)
6467 if (--ConstantBusLimit <= 0)
6473 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6477 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6479 !
Op.isIdenticalTo(*MO))
6489 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6503 bool Is64BitOp = Is64BitFPOp ||
6510 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6519 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6537 bool IsGFX950Only = ST.hasGFX950Insts();
6538 bool IsGFX940Only = ST.hasGFX940Insts();
6540 if (!IsGFX950Only && !IsGFX940Only)
6558 unsigned Opcode =
MI.getOpcode();
6560 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6561 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6562 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6563 case AMDGPU::V_MQSAD_U32_U8_e64:
6564 case AMDGPU::V_PK_ADD_F16:
6565 case AMDGPU::V_PK_ADD_F32:
6566 case AMDGPU::V_PK_ADD_I16:
6567 case AMDGPU::V_PK_ADD_U16:
6568 case AMDGPU::V_PK_ASHRREV_I16:
6569 case AMDGPU::V_PK_FMA_F16:
6570 case AMDGPU::V_PK_FMA_F32:
6571 case AMDGPU::V_PK_FMAC_F16_e32:
6572 case AMDGPU::V_PK_FMAC_F16_e64:
6573 case AMDGPU::V_PK_LSHLREV_B16:
6574 case AMDGPU::V_PK_LSHRREV_B16:
6575 case AMDGPU::V_PK_MAD_I16:
6576 case AMDGPU::V_PK_MAD_U16:
6577 case AMDGPU::V_PK_MAX_F16:
6578 case AMDGPU::V_PK_MAX_I16:
6579 case AMDGPU::V_PK_MAX_U16:
6580 case AMDGPU::V_PK_MIN_F16:
6581 case AMDGPU::V_PK_MIN_I16:
6582 case AMDGPU::V_PK_MIN_U16:
6583 case AMDGPU::V_PK_MOV_B32:
6584 case AMDGPU::V_PK_MUL_F16:
6585 case AMDGPU::V_PK_MUL_F32:
6586 case AMDGPU::V_PK_MUL_LO_U16:
6587 case AMDGPU::V_PK_SUB_I16:
6588 case AMDGPU::V_PK_SUB_U16:
6589 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6598 unsigned Opc =
MI.getOpcode();
6601 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6604 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6610 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6617 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6620 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6626 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6636 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6637 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6638 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6650 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6652 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6664 if (HasImplicitSGPR || !
MI.isCommutable()) {
6681 if (CommutedOpc == -1) {
6686 MI.setDesc(
get(CommutedOpc));
6690 bool Src0Kill = Src0.
isKill();
6694 else if (Src1.
isReg()) {
6709 unsigned Opc =
MI.getOpcode();
6712 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6713 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6714 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6717 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6718 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6719 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6720 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6721 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6722 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6723 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6727 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6728 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6733 if (VOP3Idx[2] != -1) {
6735 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6736 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6745 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6746 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6748 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6750 SGPRsUsed.
insert(SGPRReg);
6754 for (
int Idx : VOP3Idx) {
6763 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6775 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6782 if (ConstantBusLimit > 0) {
6794 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6795 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6801 for (
unsigned I = 0;
I < 3; ++
I) {
6814 SRC = RI.getCommonSubClass(SRC, DstRC);
6817 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6819 if (RI.hasAGPRs(VRC)) {
6820 VRC = RI.getEquivalentVGPRClass(VRC);
6821 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6823 get(TargetOpcode::COPY), NewSrcReg)
6830 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6836 for (
unsigned i = 0; i < SubRegs; ++i) {
6837 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6839 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6840 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6846 get(AMDGPU::REG_SEQUENCE), DstReg);
6847 for (
unsigned i = 0; i < SubRegs; ++i) {
6849 MIB.
addImm(RI.getSubRegFromChannel(i));
6862 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6864 SBase->setReg(SGPR);
6867 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6875 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6876 if (OldSAddrIdx < 0)
6892 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6893 if (NewVAddrIdx < 0)
6896 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6900 if (OldVAddrIdx >= 0) {
6902 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6914 if (OldVAddrIdx == NewVAddrIdx) {
6917 MRI.removeRegOperandFromUseList(&NewVAddr);
6918 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6922 MRI.removeRegOperandFromUseList(&NewVAddr);
6923 MRI.addRegOperandToUseList(&NewVAddr);
6925 assert(OldSAddrIdx == NewVAddrIdx);
6927 if (OldVAddrIdx >= 0) {
6928 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6929 AMDGPU::OpName::vdst_in);
6933 if (NewVDstIn != -1) {
6934 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6940 if (NewVDstIn != -1) {
6941 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6962 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6982 unsigned OpSubReg =
Op.getSubReg();
6985 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6991 Register DstReg =
MRI.createVirtualRegister(DstRC);
7001 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7004 bool ImpDef = Def->isImplicitDef();
7005 while (!ImpDef && Def && Def->isCopy()) {
7006 if (Def->getOperand(1).getReg().isPhysical())
7008 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
7009 ImpDef = Def && Def->isImplicitDef();
7011 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7030 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7036 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7037 unsigned NumSubRegs =
RegSize / 32;
7038 Register VScalarOp = ScalarOp->getReg();
7040 if (NumSubRegs == 1) {
7041 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7043 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7046 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7048 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7054 CondReg = NewCondReg;
7056 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7064 ScalarOp->setReg(CurReg);
7065 ScalarOp->setIsKill();
7069 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7070 "Unhandled register size");
7072 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7074 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7076 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7079 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7080 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7083 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7084 .
addReg(VScalarOp, VScalarOpUndef,
7085 TRI->getSubRegFromChannel(Idx + 1));
7091 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7092 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7098 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7099 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7102 if (NumSubRegs <= 2)
7103 Cmp.addReg(VScalarOp);
7105 Cmp.addReg(VScalarOp, VScalarOpUndef,
7106 TRI->getSubRegFromChannel(Idx, 2));
7110 CondReg = NewCondReg;
7112 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7120 const auto *SScalarOpRC =
7121 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7122 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7126 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7127 unsigned Channel = 0;
7128 for (
Register Piece : ReadlanePieces) {
7129 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7133 ScalarOp->setReg(SScalarOp);
7134 ScalarOp->setIsKill();
7138 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7139 MRI.setSimpleHint(SaveExec, CondReg);
7170 if (!Begin.isValid())
7172 if (!End.isValid()) {
7178 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7186 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7187 std::numeric_limits<unsigned>::max()) !=
7190 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7196 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7205 for (
auto I = Begin;
I != AfterMI;
I++) {
7206 for (
auto &MO :
I->all_uses())
7207 MRI.clearKillFlags(MO.getReg());
7232 MBB.addSuccessor(LoopBB);
7242 for (
auto &Succ : RemainderBB->
successors()) {
7266static std::tuple<unsigned, unsigned>
7274 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7275 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7278 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7279 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7280 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7281 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7282 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7299 .
addImm(AMDGPU::sub0_sub1)
7305 return std::tuple(RsrcPtr, NewSRsrc);
7342 if (
MI.getOpcode() == AMDGPU::PHI) {
7344 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7345 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7348 MRI.getRegClass(
MI.getOperand(i).getReg());
7349 if (RI.hasVectorRegisters(OpRC)) {
7363 VRC = &AMDGPU::VReg_1RegClass;
7366 ? RI.getEquivalentAGPRClass(SRC)
7367 : RI.getEquivalentVGPRClass(SRC);
7370 ? RI.getEquivalentAGPRClass(VRC)
7371 : RI.getEquivalentVGPRClass(VRC);
7379 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7381 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7397 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7400 if (RI.hasVGPRs(DstRC)) {
7404 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7406 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7424 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7429 if (DstRC != Src0RC) {
7438 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7440 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7446 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7447 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7448 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7449 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7450 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7451 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7452 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7454 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7467 ? AMDGPU::OpName::rsrc
7468 : AMDGPU::OpName::srsrc;
7470 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7473 AMDGPU::OpName SampOpName =
7474 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7476 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7483 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7485 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7489 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7490 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7495 while (Start->getOpcode() != FrameSetupOpcode)
7498 while (End->getOpcode() != FrameDestroyOpcode)
7502 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7503 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7511 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7513 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7515 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7525 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7526 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7527 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7528 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7530 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7537 bool isSoffsetLegal =
true;
7539 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7540 if (SoffsetIdx != -1) {
7543 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7544 isSoffsetLegal =
false;
7548 bool isRsrcLegal =
true;
7550 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7551 if (RsrcIdx != -1) {
7554 isRsrcLegal =
false;
7558 if (isRsrcLegal && isSoffsetLegal)
7582 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7583 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7584 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7586 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7587 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7588 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7590 unsigned RsrcPtr, NewSRsrc;
7597 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7604 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7618 }
else if (!VAddr && ST.hasAddr64()) {
7622 "FIXME: Need to emit flat atomics here");
7624 unsigned RsrcPtr, NewSRsrc;
7627 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7650 MIB.
addImm(CPol->getImm());
7655 MIB.
addImm(TFE->getImm());
7675 MI.removeFromParent();
7680 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7682 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7686 if (!isSoffsetLegal) {
7698 if (!isSoffsetLegal) {
7710 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7711 if (RsrcIdx != -1) {
7712 DeferredList.insert(
MI);
7717 return DeferredList.contains(
MI);
7727 if (!ST.useRealTrue16Insts())
7730 unsigned Opcode =
MI.getOpcode();
7734 OpIdx >=
get(Opcode).getNumOperands() ||
7735 get(Opcode).operands()[
OpIdx].RegClass == -1)
7739 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7743 if (!RI.isVGPRClass(CurrRC))
7746 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7748 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7749 Op.setSubReg(AMDGPU::lo16);
7750 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7752 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7753 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7760 Op.setReg(NewDstReg);
7772 while (!Worklist.
empty()) {
7786 "Deferred MachineInstr are not supposed to re-populate worklist");
7806 case AMDGPU::S_ADD_I32:
7807 case AMDGPU::S_SUB_I32: {
7811 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7819 case AMDGPU::S_MUL_U64:
7820 if (ST.hasVectorMulU64()) {
7821 NewOpcode = AMDGPU::V_MUL_U64_e64;
7825 splitScalarSMulU64(Worklist, Inst, MDT);
7829 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7830 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7833 splitScalarSMulPseudo(Worklist, Inst, MDT);
7837 case AMDGPU::S_AND_B64:
7838 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7842 case AMDGPU::S_OR_B64:
7843 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7847 case AMDGPU::S_XOR_B64:
7848 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7852 case AMDGPU::S_NAND_B64:
7853 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7857 case AMDGPU::S_NOR_B64:
7858 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7862 case AMDGPU::S_XNOR_B64:
7863 if (ST.hasDLInsts())
7864 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7866 splitScalar64BitXnor(Worklist, Inst, MDT);
7870 case AMDGPU::S_ANDN2_B64:
7871 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7875 case AMDGPU::S_ORN2_B64:
7876 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7880 case AMDGPU::S_BREV_B64:
7881 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7885 case AMDGPU::S_NOT_B64:
7886 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7890 case AMDGPU::S_BCNT1_I32_B64:
7891 splitScalar64BitBCNT(Worklist, Inst);
7895 case AMDGPU::S_BFE_I64:
7896 splitScalar64BitBFE(Worklist, Inst);
7900 case AMDGPU::S_FLBIT_I32_B64:
7901 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7904 case AMDGPU::S_FF1_I32_B64:
7905 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7909 case AMDGPU::S_LSHL_B32:
7910 if (ST.hasOnlyRevVALUShifts()) {
7911 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7915 case AMDGPU::S_ASHR_I32:
7916 if (ST.hasOnlyRevVALUShifts()) {
7917 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7921 case AMDGPU::S_LSHR_B32:
7922 if (ST.hasOnlyRevVALUShifts()) {
7923 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7927 case AMDGPU::S_LSHL_B64:
7928 if (ST.hasOnlyRevVALUShifts()) {
7930 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7931 : AMDGPU::V_LSHLREV_B64_e64;
7935 case AMDGPU::S_ASHR_I64:
7936 if (ST.hasOnlyRevVALUShifts()) {
7937 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7941 case AMDGPU::S_LSHR_B64:
7942 if (ST.hasOnlyRevVALUShifts()) {
7943 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7948 case AMDGPU::S_ABS_I32:
7949 lowerScalarAbs(Worklist, Inst);
7953 case AMDGPU::S_ABSDIFF_I32:
7954 lowerScalarAbsDiff(Worklist, Inst);
7958 case AMDGPU::S_CBRANCH_SCC0:
7959 case AMDGPU::S_CBRANCH_SCC1: {
7962 bool IsSCC = CondReg == AMDGPU::SCC;
7970 case AMDGPU::S_BFE_U64:
7971 case AMDGPU::S_BFM_B64:
7974 case AMDGPU::S_PACK_LL_B32_B16:
7975 case AMDGPU::S_PACK_LH_B32_B16:
7976 case AMDGPU::S_PACK_HL_B32_B16:
7977 case AMDGPU::S_PACK_HH_B32_B16:
7978 movePackToVALU(Worklist,
MRI, Inst);
7982 case AMDGPU::S_XNOR_B32:
7983 lowerScalarXnor(Worklist, Inst);
7987 case AMDGPU::S_NAND_B32:
7988 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7992 case AMDGPU::S_NOR_B32:
7993 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7997 case AMDGPU::S_ANDN2_B32:
7998 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8002 case AMDGPU::S_ORN2_B32:
8003 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8011 case AMDGPU::S_ADD_CO_PSEUDO:
8012 case AMDGPU::S_SUB_CO_PSEUDO: {
8013 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8014 ? AMDGPU::V_ADDC_U32_e64
8015 : AMDGPU::V_SUBB_U32_e64;
8016 const auto *CarryRC = RI.getWaveMaskRegClass();
8019 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
8020 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
8027 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8038 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8042 case AMDGPU::S_UADDO_PSEUDO:
8043 case AMDGPU::S_USUBO_PSEUDO: {
8049 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8050 ? AMDGPU::V_ADD_CO_U32_e64
8051 : AMDGPU::V_SUB_CO_U32_e64;
8053 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8054 Register DestReg =
MRI.createVirtualRegister(NewRC);
8062 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8063 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8067 case AMDGPU::S_LSHL1_ADD_U32:
8068 case AMDGPU::S_LSHL2_ADD_U32:
8069 case AMDGPU::S_LSHL3_ADD_U32:
8070 case AMDGPU::S_LSHL4_ADD_U32: {
8074 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8075 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8076 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8080 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8081 Register DestReg =
MRI.createVirtualRegister(NewRC);
8089 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8090 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8094 case AMDGPU::S_CSELECT_B32:
8095 case AMDGPU::S_CSELECT_B64:
8096 lowerSelect(Worklist, Inst, MDT);
8099 case AMDGPU::S_CMP_EQ_I32:
8100 case AMDGPU::S_CMP_LG_I32:
8101 case AMDGPU::S_CMP_GT_I32:
8102 case AMDGPU::S_CMP_GE_I32:
8103 case AMDGPU::S_CMP_LT_I32:
8104 case AMDGPU::S_CMP_LE_I32:
8105 case AMDGPU::S_CMP_EQ_U32:
8106 case AMDGPU::S_CMP_LG_U32:
8107 case AMDGPU::S_CMP_GT_U32:
8108 case AMDGPU::S_CMP_GE_U32:
8109 case AMDGPU::S_CMP_LT_U32:
8110 case AMDGPU::S_CMP_LE_U32:
8111 case AMDGPU::S_CMP_EQ_U64:
8112 case AMDGPU::S_CMP_LG_U64:
8113 case AMDGPU::S_CMP_LT_F32:
8114 case AMDGPU::S_CMP_EQ_F32:
8115 case AMDGPU::S_CMP_LE_F32:
8116 case AMDGPU::S_CMP_GT_F32:
8117 case AMDGPU::S_CMP_LG_F32:
8118 case AMDGPU::S_CMP_GE_F32:
8119 case AMDGPU::S_CMP_O_F32:
8120 case AMDGPU::S_CMP_U_F32:
8121 case AMDGPU::S_CMP_NGE_F32:
8122 case AMDGPU::S_CMP_NLG_F32:
8123 case AMDGPU::S_CMP_NGT_F32:
8124 case AMDGPU::S_CMP_NLE_F32:
8125 case AMDGPU::S_CMP_NEQ_F32:
8126 case AMDGPU::S_CMP_NLT_F32: {
8127 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8131 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8145 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8149 case AMDGPU::S_CMP_LT_F16:
8150 case AMDGPU::S_CMP_EQ_F16:
8151 case AMDGPU::S_CMP_LE_F16:
8152 case AMDGPU::S_CMP_GT_F16:
8153 case AMDGPU::S_CMP_LG_F16:
8154 case AMDGPU::S_CMP_GE_F16:
8155 case AMDGPU::S_CMP_O_F16:
8156 case AMDGPU::S_CMP_U_F16:
8157 case AMDGPU::S_CMP_NGE_F16:
8158 case AMDGPU::S_CMP_NLG_F16:
8159 case AMDGPU::S_CMP_NGT_F16:
8160 case AMDGPU::S_CMP_NLE_F16:
8161 case AMDGPU::S_CMP_NEQ_F16:
8162 case AMDGPU::S_CMP_NLT_F16: {
8163 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8185 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8189 case AMDGPU::S_CVT_HI_F32_F16: {
8190 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8191 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8192 if (ST.useRealTrue16Insts()) {
8197 .
addReg(TmpReg, 0, AMDGPU::hi16)
8213 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8217 case AMDGPU::S_MINIMUM_F32:
8218 case AMDGPU::S_MAXIMUM_F32: {
8219 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8230 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8234 case AMDGPU::S_MINIMUM_F16:
8235 case AMDGPU::S_MAXIMUM_F16: {
8236 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8237 ? &AMDGPU::VGPR_16RegClass
8238 : &AMDGPU::VGPR_32RegClass);
8250 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8254 case AMDGPU::V_S_EXP_F16_e64:
8255 case AMDGPU::V_S_LOG_F16_e64:
8256 case AMDGPU::V_S_RCP_F16_e64:
8257 case AMDGPU::V_S_RSQ_F16_e64:
8258 case AMDGPU::V_S_SQRT_F16_e64: {
8259 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8260 ? &AMDGPU::VGPR_16RegClass
8261 : &AMDGPU::VGPR_32RegClass);
8273 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8279 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8287 if (NewOpcode == Opcode) {
8295 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8297 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8311 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8318 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8319 MRI.replaceRegWith(DstReg, NewDstReg);
8320 MRI.clearKillFlags(NewDstReg);
8323 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8340 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8344 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8345 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8346 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8348 get(AMDGPU::IMPLICIT_DEF), Undef);
8350 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8356 MRI.replaceRegWith(DstReg, NewDstReg);
8357 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8359 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8362 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8363 MRI.replaceRegWith(DstReg, NewDstReg);
8364 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8369 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8370 MRI.replaceRegWith(DstReg, NewDstReg);
8372 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8382 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8383 AMDGPU::OpName::src0_modifiers) >= 0)
8387 NewInstr->addOperand(Src);
8390 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8393 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8395 NewInstr.addImm(
Size);
8396 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8400 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8405 "Scalar BFE is only implemented for constant width and offset");
8413 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8414 AMDGPU::OpName::src1_modifiers) >= 0)
8416 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8418 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8419 AMDGPU::OpName::src2_modifiers) >= 0)
8421 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8423 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8425 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8427 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8433 NewInstr->addOperand(
Op);
8440 if (
Op.getReg() == AMDGPU::SCC) {
8442 if (
Op.isDef() && !
Op.isDead())
8443 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8445 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8450 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8451 Register DstReg = NewInstr->getOperand(0).getReg();
8456 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8457 MRI.replaceRegWith(DstReg, NewDstReg);
8466 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8470std::pair<bool, MachineBasicBlock *>
8482 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8485 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8487 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8488 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8496 MRI.replaceRegWith(OldDstReg, ResultReg);
8499 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8500 return std::pair(
true, NewBB);
8503 return std::pair(
false,
nullptr);
8520 bool IsSCC = (CondReg == AMDGPU::SCC);
8528 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8534 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8535 NewCondReg =
MRI.createVirtualRegister(TC);
8539 bool CopyFound =
false;
8540 for (MachineInstr &CandI :
8543 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8545 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8547 .
addReg(CandI.getOperand(1).getReg());
8559 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8567 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8568 MachineInstr *NewInst;
8569 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8570 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8583 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8585 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8597 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8598 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8600 unsigned SubOp = ST.hasAddNoCarry() ?
8601 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8611 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8612 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8625 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8626 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8627 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8630 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8642 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8643 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8657 if (ST.hasDLInsts()) {
8658 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8666 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8667 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8673 bool Src0IsSGPR = Src0.
isReg() &&
8674 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8675 bool Src1IsSGPR = Src1.
isReg() &&
8676 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8678 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8679 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8689 }
else if (Src1IsSGPR) {
8703 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8707 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8713 unsigned Opcode)
const {
8723 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8724 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8736 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8737 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8742 unsigned Opcode)
const {
8752 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8753 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8765 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8766 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8781 const MCInstrDesc &InstDesc =
get(Opcode);
8782 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8784 &AMDGPU::SGPR_32RegClass;
8786 const TargetRegisterClass *Src0SubRC =
8787 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8790 AMDGPU::sub0, Src0SubRC);
8792 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8793 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8794 const TargetRegisterClass *NewDestSubRC =
8795 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8797 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8798 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8801 AMDGPU::sub1, Src0SubRC);
8803 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8804 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8809 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8816 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8818 Worklist.
insert(&LoHalf);
8819 Worklist.
insert(&HiHalf);
8825 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8836 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8837 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8838 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8846 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8847 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8848 const TargetRegisterClass *Src0SubRC =
8849 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8850 if (RI.isSGPRClass(Src0SubRC))
8851 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8852 const TargetRegisterClass *Src1SubRC =
8853 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8854 if (RI.isSGPRClass(Src1SubRC))
8855 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8859 MachineOperand Op0L =
8861 MachineOperand Op1L =
8863 MachineOperand Op0H =
8865 MachineOperand Op1H =
8883 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8884 MachineInstr *Op1L_Op0H =
8889 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8890 MachineInstr *Op1H_Op0L =
8895 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8896 MachineInstr *Carry =
8901 MachineInstr *LoHalf =
8906 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8911 MachineInstr *HiHalf =
8922 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8934 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8945 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8946 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8947 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8955 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8956 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8957 const TargetRegisterClass *Src0SubRC =
8958 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8959 if (RI.isSGPRClass(Src0SubRC))
8960 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8961 const TargetRegisterClass *Src1SubRC =
8962 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8963 if (RI.isSGPRClass(Src1SubRC))
8964 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8968 MachineOperand Op0L =
8970 MachineOperand Op1L =
8974 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8975 ? AMDGPU::V_MUL_HI_U32_e64
8976 : AMDGPU::V_MUL_HI_I32_e64;
8977 MachineInstr *HiHalf =
8980 MachineInstr *LoHalf =
8991 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8999 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9015 const MCInstrDesc &InstDesc =
get(Opcode);
9016 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9018 &AMDGPU::SGPR_32RegClass;
9020 const TargetRegisterClass *Src0SubRC =
9021 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9022 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9024 &AMDGPU::SGPR_32RegClass;
9026 const TargetRegisterClass *Src1SubRC =
9027 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9030 AMDGPU::sub0, Src0SubRC);
9032 AMDGPU::sub0, Src1SubRC);
9034 AMDGPU::sub1, Src0SubRC);
9036 AMDGPU::sub1, Src1SubRC);
9038 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9039 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9040 const TargetRegisterClass *NewDestSubRC =
9041 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9043 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9044 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9048 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9049 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9053 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9060 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9062 Worklist.
insert(&LoHalf);
9063 Worklist.
insert(&HiHalf);
9066 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9082 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9084 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9086 MachineOperand* Op0;
9087 MachineOperand* Op1;
9100 Register NewDest =
MRI.createVirtualRegister(DestRC);
9106 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9122 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9123 const TargetRegisterClass *SrcRC = Src.isReg() ?
9124 MRI.getRegClass(Src.getReg()) :
9125 &AMDGPU::SGPR_32RegClass;
9127 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9128 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9130 const TargetRegisterClass *SrcSubRC =
9131 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9134 AMDGPU::sub0, SrcSubRC);
9136 AMDGPU::sub1, SrcSubRC);
9142 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9146 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9165 Offset == 0 &&
"Not implemented");
9168 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9169 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9170 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9187 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9188 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9193 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9194 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9198 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9201 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9206 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9207 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9226 const MCInstrDesc &InstDesc =
get(Opcode);
9228 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9229 unsigned OpcodeAdd =
9230 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9232 const TargetRegisterClass *SrcRC =
9233 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9234 const TargetRegisterClass *SrcSubRC =
9235 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9237 MachineOperand SrcRegSub0 =
9239 MachineOperand SrcRegSub1 =
9242 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9243 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9244 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9245 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9252 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9258 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9260 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9262 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9265void SIInstrInfo::addUsersToMoveToVALUWorklist(
9269 MachineInstr &
UseMI = *MO.getParent();
9273 switch (
UseMI.getOpcode()) {
9276 case AMDGPU::SOFT_WQM:
9277 case AMDGPU::STRICT_WWM:
9278 case AMDGPU::STRICT_WQM:
9279 case AMDGPU::REG_SEQUENCE:
9281 case AMDGPU::INSERT_SUBREG:
9284 OpNo = MO.getOperandNo();
9289 MRI.constrainRegClass(DstReg, OpRC);
9291 if (!RI.hasVectorRegisters(OpRC))
9302 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9308 if (ST.useRealTrue16Insts()) {
9311 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9318 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9324 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9325 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9327 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9329 case AMDGPU::S_PACK_LL_B32_B16:
9332 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9333 .addImm(AMDGPU::lo16)
9335 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9336 .addImm(AMDGPU::hi16);
9338 case AMDGPU::S_PACK_LH_B32_B16:
9341 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9342 .addImm(AMDGPU::lo16)
9343 .addReg(SrcReg1, 0, AMDGPU::hi16)
9344 .addImm(AMDGPU::hi16);
9346 case AMDGPU::S_PACK_HL_B32_B16:
9347 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9348 .addImm(AMDGPU::lo16)
9350 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9351 .addImm(AMDGPU::hi16);
9353 case AMDGPU::S_PACK_HH_B32_B16:
9354 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9355 .addImm(AMDGPU::lo16)
9356 .addReg(SrcReg1, 0, AMDGPU::hi16)
9357 .addImm(AMDGPU::hi16);
9364 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9365 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9370 case AMDGPU::S_PACK_LL_B32_B16: {
9371 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9372 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9389 case AMDGPU::S_PACK_LH_B32_B16: {
9390 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9399 case AMDGPU::S_PACK_HL_B32_B16: {
9400 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9410 case AMDGPU::S_PACK_HH_B32_B16: {
9411 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9412 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9429 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9430 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9439 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9440 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9441 SmallVector<MachineInstr *, 4> CopyToDelete;
9444 for (MachineInstr &
MI :
9448 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9451 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9452 Register DestReg =
MI.getOperand(0).getReg();
9454 MRI.replaceRegWith(DestReg, NewCond);
9459 MI.getOperand(SCCIdx).setReg(NewCond);
9465 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9468 for (
auto &Copy : CopyToDelete)
9469 Copy->eraseFromParent();
9477void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9483 for (MachineInstr &
MI :
9486 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9488 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9497 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9505 case AMDGPU::REG_SEQUENCE:
9506 case AMDGPU::INSERT_SUBREG:
9508 case AMDGPU::SOFT_WQM:
9509 case AMDGPU::STRICT_WWM:
9510 case AMDGPU::STRICT_WQM: {
9512 if (RI.isAGPRClass(SrcRC)) {
9513 if (RI.isAGPRClass(NewDstRC))
9518 case AMDGPU::REG_SEQUENCE:
9519 case AMDGPU::INSERT_SUBREG:
9520 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9523 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9529 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9532 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9546 int OpIndices[3])
const {
9547 const MCInstrDesc &
Desc =
MI.getDesc();
9563 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9565 for (
unsigned i = 0; i < 3; ++i) {
9566 int Idx = OpIndices[i];
9570 const MachineOperand &MO =
MI.getOperand(Idx);
9576 const TargetRegisterClass *OpRC =
9577 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9578 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9584 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9585 if (RI.isSGPRClass(RegRC))
9603 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9604 SGPRReg = UsedSGPRs[0];
9607 if (!SGPRReg && UsedSGPRs[1]) {
9608 if (UsedSGPRs[1] == UsedSGPRs[2])
9609 SGPRReg = UsedSGPRs[1];
9616 AMDGPU::OpName OperandName)
const {
9617 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9620 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9624 return &
MI.getOperand(Idx);
9638 if (ST.isAmdHsaOS()) {
9641 RsrcDataFormat |= (1ULL << 56);
9646 RsrcDataFormat |= (2ULL << 59);
9649 return RsrcDataFormat;
9659 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9664 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9671 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9677 unsigned Opc =
MI.getOpcode();
9683 return get(
Opc).mayLoad() &&
9688 int &FrameIndex)
const {
9690 if (!Addr || !Addr->
isFI())
9701 int &FrameIndex)
const {
9709 int &FrameIndex)
const {
9723 int &FrameIndex)
const {
9740 while (++
I != E &&
I->isInsideBundle()) {
9741 assert(!
I->isBundle() &&
"No nested bundle!");
9749 unsigned Opc =
MI.getOpcode();
9751 unsigned DescSize =
Desc.getSize();
9756 unsigned Size = DescSize;
9760 if (
MI.isBranch() && ST.hasOffset3fBug())
9771 bool HasLiteral =
false;
9772 unsigned LiteralSize = 4;
9773 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9778 if (ST.has64BitLiterals()) {
9779 switch (OpInfo.OperandType) {
9795 return HasLiteral ? DescSize + LiteralSize : DescSize;
9800 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9804 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9805 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9809 case TargetOpcode::BUNDLE:
9811 case TargetOpcode::INLINEASM:
9812 case TargetOpcode::INLINEASM_BR: {
9814 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9818 if (
MI.isMetaInstruction())
9822 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9825 unsigned LoInstOpcode = D16Info->LoOp;
9827 DescSize =
Desc.getSize();
9831 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9834 DescSize =
Desc.getSize();
9845 if (
MI.memoperands_empty())
9857 static const std::pair<int, const char *> TargetIndices[] = {
9895std::pair<unsigned, unsigned>
9902 static const std::pair<unsigned, const char *> TargetFlags[] = {
9920 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9935 return AMDGPU::WWM_COPY;
9937 return AMDGPU::COPY;
9954 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9958 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9959 return IsLRSplitInst;
9972 bool IsNullOrVectorRegister =
true;
9976 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9979 return IsNullOrVectorRegister &&
9981 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
9982 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9990 if (ST.hasAddNoCarry())
9994 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9995 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
10006 if (ST.hasAddNoCarry())
10010 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10012 : RS.scavengeRegisterBackwards(
10013 *RI.getBoolRC(),
I,
false,
10026 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10027 case AMDGPU::SI_KILL_I1_TERMINATOR:
10036 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10037 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10038 case AMDGPU::SI_KILL_I1_PSEUDO:
10039 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10051 const unsigned OffsetBits =
10053 return (1 << OffsetBits) - 1;
10057 if (!ST.isWave32())
10060 if (
MI.isInlineAsm())
10063 for (
auto &
Op :
MI.implicit_operands()) {
10064 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10065 Op.setReg(AMDGPU::VCC_LO);
10074 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10078 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10079 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10095 if (Imm > MaxImm) {
10096 if (Imm <= MaxImm + 64) {
10098 Overflow = Imm - MaxImm;
10117 if (Overflow > 0) {
10125 if (ST.hasRestrictedSOffset())
10130 SOffset = Overflow;
10168 if (!ST.hasFlatInstOffsets())
10176 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10188std::pair<int64_t, int64_t>
10191 int64_t RemainderOffset = COffsetVal;
10192 int64_t ImmField = 0;
10197 if (AllowNegative) {
10199 int64_t
D = 1LL << NumBits;
10200 RemainderOffset = (COffsetVal /
D) *
D;
10201 ImmField = COffsetVal - RemainderOffset;
10203 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10205 (ImmField % 4) != 0) {
10207 RemainderOffset += ImmField % 4;
10208 ImmField -= ImmField % 4;
10210 }
else if (COffsetVal >= 0) {
10212 RemainderOffset = COffsetVal - ImmField;
10216 assert(RemainderOffset + ImmField == COffsetVal);
10217 return {ImmField, RemainderOffset};
10221 if (ST.hasNegativeScratchOffsetBug() &&
10229 switch (ST.getGeneration()) {
10255 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10256 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10257 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10258 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10259 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10260 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10261 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10262 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10269#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10270 case OPCODE##_dpp: \
10271 case OPCODE##_e32: \
10272 case OPCODE##_e64: \
10273 case OPCODE##_e64_dpp: \
10274 case OPCODE##_sdwa:
10288 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10289 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10290 case AMDGPU::V_FMA_F16_gfx9_e64:
10291 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10292 case AMDGPU::V_INTERP_P2_F16:
10293 case AMDGPU::V_MAD_F16_e64:
10294 case AMDGPU::V_MAD_U16_e64:
10295 case AMDGPU::V_MAD_I16_e64:
10304 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10318 switch (ST.getGeneration()) {
10331 if (
isMAI(Opcode)) {
10339 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10346 if (ST.hasGFX90AInsts()) {
10348 if (ST.hasGFX940Insts())
10379 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10380 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10381 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10393 switch (
MI.getOpcode()) {
10395 case AMDGPU::REG_SEQUENCE:
10399 case AMDGPU::INSERT_SUBREG:
10400 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10417 if (!
P.Reg.isVirtual())
10421 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10422 while (
auto *
MI = DefInst) {
10424 switch (
MI->getOpcode()) {
10426 case AMDGPU::V_MOV_B32_e32: {
10427 auto &Op1 =
MI->getOperand(1);
10432 DefInst =
MRI.getVRegDef(RSR.Reg);
10440 DefInst =
MRI.getVRegDef(RSR.Reg);
10453 assert(
MRI.isSSA() &&
"Must be run on SSA");
10455 auto *
TRI =
MRI.getTargetRegisterInfo();
10456 auto *DefBB =
DefMI.getParent();
10460 if (
UseMI.getParent() != DefBB)
10463 const int MaxInstScan = 20;
10467 auto E =
UseMI.getIterator();
10468 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10469 if (
I->isDebugInstr())
10472 if (++NumInst > MaxInstScan)
10475 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10485 assert(
MRI.isSSA() &&
"Must be run on SSA");
10487 auto *
TRI =
MRI.getTargetRegisterInfo();
10488 auto *DefBB =
DefMI.getParent();
10490 const int MaxUseScan = 10;
10493 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10494 auto &UseInst = *
Use.getParent();
10497 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10500 if (++NumUse > MaxUseScan)
10507 const int MaxInstScan = 20;
10511 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10514 if (
I->isDebugInstr())
10517 if (++NumInst > MaxInstScan)
10530 if (Reg == VReg && --NumUse == 0)
10532 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10541 auto Cur =
MBB.begin();
10542 if (Cur !=
MBB.end())
10544 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10547 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10556 if (InsPt !=
MBB.end() &&
10557 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10558 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10559 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10560 InsPt->definesRegister(Src,
nullptr)) {
10564 .
addReg(Src, 0, SrcSubReg)
10589 if (isFullCopyInstr(
MI)) {
10590 Register DstReg =
MI.getOperand(0).getReg();
10591 Register SrcReg =
MI.getOperand(1).getReg();
10598 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10602 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10613 unsigned *PredCost)
const {
10614 if (
MI.isBundle()) {
10617 unsigned Lat = 0,
Count = 0;
10618 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10620 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10622 return Lat +
Count - 1;
10625 return SchedModel.computeInstrLatency(&
MI);
10632 return *CallAddrOp;
10639 unsigned Opcode =
MI.getOpcode();
10644 :
MI.getOperand(1).getReg();
10645 LLT DstTy =
MRI.getType(Dst);
10646 LLT SrcTy =
MRI.getType(Src);
10648 unsigned SrcAS = SrcTy.getAddressSpace();
10651 ST.hasGloballyAddressableScratch()
10659 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10660 return HandleAddrSpaceCast(
MI);
10663 auto IID = GI->getIntrinsicID();
10670 case Intrinsic::amdgcn_addrspacecast_nonnull:
10671 return HandleAddrSpaceCast(
MI);
10672 case Intrinsic::amdgcn_if:
10673 case Intrinsic::amdgcn_else:
10687 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10688 Opcode == AMDGPU::G_SEXTLOAD) {
10689 if (
MI.memoperands_empty())
10693 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10694 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10702 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10703 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10704 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10717 unsigned opcode =
MI.getOpcode();
10718 if (opcode == AMDGPU::V_READLANE_B32 ||
10719 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10720 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10723 if (isCopyInstr(
MI)) {
10727 RI.getPhysRegBaseClass(srcOp.
getReg());
10735 if (
MI.isPreISelOpcode())
10750 if (
MI.memoperands_empty())
10754 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10755 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10770 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10772 if (!
SrcOp.isReg())
10776 if (!Reg || !
SrcOp.readsReg())
10782 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10809 F,
"ds_ordered_count unsupported for this calling conv"));
10823 Register &SrcReg2, int64_t &CmpMask,
10824 int64_t &CmpValue)
const {
10825 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10828 switch (
MI.getOpcode()) {
10831 case AMDGPU::S_CMP_EQ_U32:
10832 case AMDGPU::S_CMP_EQ_I32:
10833 case AMDGPU::S_CMP_LG_U32:
10834 case AMDGPU::S_CMP_LG_I32:
10835 case AMDGPU::S_CMP_LT_U32:
10836 case AMDGPU::S_CMP_LT_I32:
10837 case AMDGPU::S_CMP_GT_U32:
10838 case AMDGPU::S_CMP_GT_I32:
10839 case AMDGPU::S_CMP_LE_U32:
10840 case AMDGPU::S_CMP_LE_I32:
10841 case AMDGPU::S_CMP_GE_U32:
10842 case AMDGPU::S_CMP_GE_I32:
10843 case AMDGPU::S_CMP_EQ_U64:
10844 case AMDGPU::S_CMP_LG_U64:
10845 SrcReg =
MI.getOperand(0).getReg();
10846 if (
MI.getOperand(1).isReg()) {
10847 if (
MI.getOperand(1).getSubReg())
10849 SrcReg2 =
MI.getOperand(1).getReg();
10851 }
else if (
MI.getOperand(1).isImm()) {
10853 CmpValue =
MI.getOperand(1).getImm();
10859 case AMDGPU::S_CMPK_EQ_U32:
10860 case AMDGPU::S_CMPK_EQ_I32:
10861 case AMDGPU::S_CMPK_LG_U32:
10862 case AMDGPU::S_CMPK_LG_I32:
10863 case AMDGPU::S_CMPK_LT_U32:
10864 case AMDGPU::S_CMPK_LT_I32:
10865 case AMDGPU::S_CMPK_GT_U32:
10866 case AMDGPU::S_CMPK_GT_I32:
10867 case AMDGPU::S_CMPK_LE_U32:
10868 case AMDGPU::S_CMPK_LE_I32:
10869 case AMDGPU::S_CMPK_GE_U32:
10870 case AMDGPU::S_CMPK_GE_I32:
10871 SrcReg =
MI.getOperand(0).getReg();
10873 CmpValue =
MI.getOperand(1).getImm();
10883 if (S->isLiveIn(AMDGPU::SCC))
10892bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10895 bool SCCIsDead =
false;
10898 constexpr unsigned ScanLimit = 12;
10899 unsigned Count = 0;
10900 for (MachineInstr &
MI :
10902 if (++
Count > ScanLimit)
10904 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10905 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10906 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10907 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10908 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10913 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10926 for (MachineInstr *
MI : InvertInstr) {
10927 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10928 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10930 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10931 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10932 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10933 ? AMDGPU::S_CBRANCH_SCC1
10934 : AMDGPU::S_CBRANCH_SCC0));
10947 bool NeedInversion)
const {
10948 MachineInstr *KillsSCC =
nullptr;
10953 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10955 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10958 if (NeedInversion && !invertSCCUse(SCCRedefine))
10960 if (MachineOperand *SccDef =
10962 SccDef->setIsDead(
false);
10970 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10971 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10973 bool Op1IsNonZeroImm =
10974 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10975 bool Op2IsZeroImm =
10976 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10977 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10983 Register SrcReg2, int64_t CmpMask,
10992 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10993 this](
bool NeedInversion) ->
bool {
11014 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11024 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11025 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11031 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11039 optimizeSCC(
Select, Def,
false);
11046 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11047 this](int64_t ExpectedValue,
unsigned SrcSize,
11048 bool IsReversible,
bool IsSigned) ->
bool {
11076 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11077 Def->getOpcode() != AMDGPU::S_AND_B64)
11081 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11092 SrcOp = &Def->getOperand(2);
11093 else if (isMask(&Def->getOperand(2)))
11094 SrcOp = &Def->getOperand(1);
11102 if (IsSigned && BitNo == SrcSize - 1)
11105 ExpectedValue <<= BitNo;
11107 bool IsReversedCC =
false;
11108 if (CmpValue != ExpectedValue) {
11111 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11116 Register DefReg = Def->getOperand(0).getReg();
11117 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11120 if (!optimizeSCC(Def, &CmpInstr,
false))
11123 if (!
MRI->use_nodbg_empty(DefReg)) {
11131 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11132 : AMDGPU::S_BITCMP1_B32
11133 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11134 : AMDGPU::S_BITCMP1_B64;
11139 Def->eraseFromParent();
11147 case AMDGPU::S_CMP_EQ_U32:
11148 case AMDGPU::S_CMP_EQ_I32:
11149 case AMDGPU::S_CMPK_EQ_U32:
11150 case AMDGPU::S_CMPK_EQ_I32:
11151 return optimizeCmpAnd(1, 32,
true,
false) ||
11152 optimizeCmpSelect(
true);
11153 case AMDGPU::S_CMP_GE_U32:
11154 case AMDGPU::S_CMPK_GE_U32:
11155 return optimizeCmpAnd(1, 32,
false,
false);
11156 case AMDGPU::S_CMP_GE_I32:
11157 case AMDGPU::S_CMPK_GE_I32:
11158 return optimizeCmpAnd(1, 32,
false,
true);
11159 case AMDGPU::S_CMP_EQ_U64:
11160 return optimizeCmpAnd(1, 64,
true,
false);
11161 case AMDGPU::S_CMP_LG_U32:
11162 case AMDGPU::S_CMP_LG_I32:
11163 case AMDGPU::S_CMPK_LG_U32:
11164 case AMDGPU::S_CMPK_LG_I32:
11165 return optimizeCmpAnd(0, 32,
true,
false) ||
11166 optimizeCmpSelect(
false);
11167 case AMDGPU::S_CMP_GT_U32:
11168 case AMDGPU::S_CMPK_GT_U32:
11169 return optimizeCmpAnd(0, 32,
false,
false);
11170 case AMDGPU::S_CMP_GT_I32:
11171 case AMDGPU::S_CMPK_GT_I32:
11172 return optimizeCmpAnd(0, 32,
false,
true);
11173 case AMDGPU::S_CMP_LG_U64:
11174 return optimizeCmpAnd(0, 64,
true,
false) ||
11175 optimizeCmpSelect(
false);
11182 AMDGPU::OpName
OpName)
const {
11183 if (!ST.needsAlignedVGPRs())
11186 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11198 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11200 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11203 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11204 : &AMDGPU::VReg_64_Align2RegClass);
11206 .
addReg(DataReg, 0,
Op.getSubReg())
11211 Op.setSubReg(AMDGPU::sub0);
11233 unsigned Opcode =
MI.getOpcode();
11239 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11240 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11243 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.