33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO: {
1333 return MI.getOperand(0).getReg() == Reg;
1338 case AMDGPU::S_BREV_B32:
1339 case AMDGPU::V_BFREV_B32_e32:
1340 case AMDGPU::V_BFREV_B32_e64: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_NOT_B32:
1350 case AMDGPU::V_NOT_B32_e32:
1351 case AMDGPU::V_NOT_B32_e64: {
1354 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1355 return MI.getOperand(0).getReg() == Reg;
1367 if (RI.isAGPRClass(DstRC))
1368 return AMDGPU::COPY;
1369 if (RI.getRegSizeInBits(*DstRC) == 16) {
1372 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1374 if (RI.getRegSizeInBits(*DstRC) == 32)
1375 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1376 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1377 return AMDGPU::S_MOV_B64;
1378 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1379 return AMDGPU::V_MOV_B64_PSEUDO;
1380 return AMDGPU::COPY;
1385 bool IsIndirectSrc)
const {
1386 if (IsIndirectSrc) {
1388 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1390 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1392 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1394 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1396 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1413 if (VecSize <= 1024)
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1420 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1422 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1424 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1426 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1428 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1445 if (VecSize <= 1024)
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1453 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1455 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1457 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1478 if (VecSize <= 1024)
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1486 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1511 if (VecSize <= 1024)
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1526 if (VecSize <= 1024)
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1534 bool IsSGPR)
const {
1546 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1553 return AMDGPU::SI_SPILL_S32_SAVE;
1555 return AMDGPU::SI_SPILL_S64_SAVE;
1557 return AMDGPU::SI_SPILL_S96_SAVE;
1559 return AMDGPU::SI_SPILL_S128_SAVE;
1561 return AMDGPU::SI_SPILL_S160_SAVE;
1563 return AMDGPU::SI_SPILL_S192_SAVE;
1565 return AMDGPU::SI_SPILL_S224_SAVE;
1567 return AMDGPU::SI_SPILL_S256_SAVE;
1569 return AMDGPU::SI_SPILL_S288_SAVE;
1571 return AMDGPU::SI_SPILL_S320_SAVE;
1573 return AMDGPU::SI_SPILL_S352_SAVE;
1575 return AMDGPU::SI_SPILL_S384_SAVE;
1577 return AMDGPU::SI_SPILL_S512_SAVE;
1579 return AMDGPU::SI_SPILL_S1024_SAVE;
1588 return AMDGPU::SI_SPILL_V16_SAVE;
1590 return AMDGPU::SI_SPILL_V32_SAVE;
1592 return AMDGPU::SI_SPILL_V64_SAVE;
1594 return AMDGPU::SI_SPILL_V96_SAVE;
1596 return AMDGPU::SI_SPILL_V128_SAVE;
1598 return AMDGPU::SI_SPILL_V160_SAVE;
1600 return AMDGPU::SI_SPILL_V192_SAVE;
1602 return AMDGPU::SI_SPILL_V224_SAVE;
1604 return AMDGPU::SI_SPILL_V256_SAVE;
1606 return AMDGPU::SI_SPILL_V288_SAVE;
1608 return AMDGPU::SI_SPILL_V320_SAVE;
1610 return AMDGPU::SI_SPILL_V352_SAVE;
1612 return AMDGPU::SI_SPILL_V384_SAVE;
1614 return AMDGPU::SI_SPILL_V512_SAVE;
1616 return AMDGPU::SI_SPILL_V1024_SAVE;
1625 return AMDGPU::SI_SPILL_AV32_SAVE;
1627 return AMDGPU::SI_SPILL_AV64_SAVE;
1629 return AMDGPU::SI_SPILL_AV96_SAVE;
1631 return AMDGPU::SI_SPILL_AV128_SAVE;
1633 return AMDGPU::SI_SPILL_AV160_SAVE;
1635 return AMDGPU::SI_SPILL_AV192_SAVE;
1637 return AMDGPU::SI_SPILL_AV224_SAVE;
1639 return AMDGPU::SI_SPILL_AV256_SAVE;
1641 return AMDGPU::SI_SPILL_AV288_SAVE;
1643 return AMDGPU::SI_SPILL_AV320_SAVE;
1645 return AMDGPU::SI_SPILL_AV352_SAVE;
1647 return AMDGPU::SI_SPILL_AV384_SAVE;
1649 return AMDGPU::SI_SPILL_AV512_SAVE;
1651 return AMDGPU::SI_SPILL_AV1024_SAVE;
1658 bool IsVectorSuperClass) {
1663 if (IsVectorSuperClass)
1664 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1666 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1672 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1679 if (ST.hasMAIInsts())
1698 FrameInfo.getObjectAlign(FrameIndex));
1699 unsigned SpillSize = RI.getSpillSize(*RC);
1702 if (RI.isSGPRClass(RC)) {
1704 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1705 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1706 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1714 if (SrcReg.
isVirtual() && SpillSize == 4) {
1715 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1724 if (RI.spillSGPRToVGPR())
1744 return AMDGPU::SI_SPILL_S32_RESTORE;
1746 return AMDGPU::SI_SPILL_S64_RESTORE;
1748 return AMDGPU::SI_SPILL_S96_RESTORE;
1750 return AMDGPU::SI_SPILL_S128_RESTORE;
1752 return AMDGPU::SI_SPILL_S160_RESTORE;
1754 return AMDGPU::SI_SPILL_S192_RESTORE;
1756 return AMDGPU::SI_SPILL_S224_RESTORE;
1758 return AMDGPU::SI_SPILL_S256_RESTORE;
1760 return AMDGPU::SI_SPILL_S288_RESTORE;
1762 return AMDGPU::SI_SPILL_S320_RESTORE;
1764 return AMDGPU::SI_SPILL_S352_RESTORE;
1766 return AMDGPU::SI_SPILL_S384_RESTORE;
1768 return AMDGPU::SI_SPILL_S512_RESTORE;
1770 return AMDGPU::SI_SPILL_S1024_RESTORE;
1779 return AMDGPU::SI_SPILL_V16_RESTORE;
1781 return AMDGPU::SI_SPILL_V32_RESTORE;
1783 return AMDGPU::SI_SPILL_V64_RESTORE;
1785 return AMDGPU::SI_SPILL_V96_RESTORE;
1787 return AMDGPU::SI_SPILL_V128_RESTORE;
1789 return AMDGPU::SI_SPILL_V160_RESTORE;
1791 return AMDGPU::SI_SPILL_V192_RESTORE;
1793 return AMDGPU::SI_SPILL_V224_RESTORE;
1795 return AMDGPU::SI_SPILL_V256_RESTORE;
1797 return AMDGPU::SI_SPILL_V288_RESTORE;
1799 return AMDGPU::SI_SPILL_V320_RESTORE;
1801 return AMDGPU::SI_SPILL_V352_RESTORE;
1803 return AMDGPU::SI_SPILL_V384_RESTORE;
1805 return AMDGPU::SI_SPILL_V512_RESTORE;
1807 return AMDGPU::SI_SPILL_V1024_RESTORE;
1816 return AMDGPU::SI_SPILL_AV32_RESTORE;
1818 return AMDGPU::SI_SPILL_AV64_RESTORE;
1820 return AMDGPU::SI_SPILL_AV96_RESTORE;
1822 return AMDGPU::SI_SPILL_AV128_RESTORE;
1824 return AMDGPU::SI_SPILL_AV160_RESTORE;
1826 return AMDGPU::SI_SPILL_AV192_RESTORE;
1828 return AMDGPU::SI_SPILL_AV224_RESTORE;
1830 return AMDGPU::SI_SPILL_AV256_RESTORE;
1832 return AMDGPU::SI_SPILL_AV288_RESTORE;
1834 return AMDGPU::SI_SPILL_AV320_RESTORE;
1836 return AMDGPU::SI_SPILL_AV352_RESTORE;
1838 return AMDGPU::SI_SPILL_AV384_RESTORE;
1840 return AMDGPU::SI_SPILL_AV512_RESTORE;
1842 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1849 bool IsVectorSuperClass) {
1854 if (IsVectorSuperClass)
1855 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1857 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1863 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1870 if (ST.hasMAIInsts())
1873 assert(!RI.isAGPRClass(RC));
1887 unsigned SpillSize = RI.getSpillSize(*RC);
1894 FrameInfo.getObjectAlign(FrameIndex));
1896 if (RI.isSGPRClass(RC)) {
1898 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1899 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1900 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1905 if (DestReg.
isVirtual() && SpillSize == 4) {
1907 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1910 if (RI.spillSGPRToVGPR())
1936 unsigned Quantity)
const {
1938 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1939 while (Quantity > 0) {
1940 unsigned Arg = std::min(Quantity, MaxSNopCount);
1947 auto *MF =
MBB.getParent();
1950 assert(Info->isEntryFunction());
1952 if (
MBB.succ_empty()) {
1953 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1954 if (HasNoTerminator) {
1955 if (Info->returnsVoid()) {
1969 constexpr unsigned DoorbellIDMask = 0x3ff;
1970 constexpr unsigned ECQueueWaveAbort = 0x400;
1976 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1977 ContBB =
MBB.splitAt(
MI,
false);
1981 MBB.addSuccessor(TrapBB);
1985 ContBB = HaltLoopBB;
1992 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1996 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1999 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2000 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2004 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2005 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2006 .
addUse(DoorbellRegMasked)
2007 .
addImm(ECQueueWaveAbort);
2008 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2009 .
addUse(SetWaveAbortBit);
2012 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2027 switch (
MI.getOpcode()) {
2029 if (
MI.isMetaInstruction())
2034 return MI.getOperand(0).getImm() + 1;
2044 switch (
MI.getOpcode()) {
2046 case AMDGPU::S_MOV_B64_term:
2049 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2052 case AMDGPU::S_MOV_B32_term:
2055 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2058 case AMDGPU::S_XOR_B64_term:
2061 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2064 case AMDGPU::S_XOR_B32_term:
2067 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2069 case AMDGPU::S_OR_B64_term:
2072 MI.setDesc(
get(AMDGPU::S_OR_B64));
2074 case AMDGPU::S_OR_B32_term:
2077 MI.setDesc(
get(AMDGPU::S_OR_B32));
2080 case AMDGPU::S_ANDN2_B64_term:
2083 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2086 case AMDGPU::S_ANDN2_B32_term:
2089 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2092 case AMDGPU::S_AND_B64_term:
2095 MI.setDesc(
get(AMDGPU::S_AND_B64));
2098 case AMDGPU::S_AND_B32_term:
2101 MI.setDesc(
get(AMDGPU::S_AND_B32));
2104 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2107 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2110 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2113 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2116 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2117 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2120 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2121 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2123 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2127 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2130 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2133 int64_t Imm =
MI.getOperand(1).getImm();
2135 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2136 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2143 MI.eraseFromParent();
2149 case AMDGPU::V_MOV_B64_PSEUDO: {
2151 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2152 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2157 if (ST.hasMovB64()) {
2158 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2163 if (
SrcOp.isImm()) {
2165 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2166 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2188 if (ST.hasPkMovB32() &&
2209 MI.eraseFromParent();
2212 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2216 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2220 if (ST.has64BitLiterals()) {
2221 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2227 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2232 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2233 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2235 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2236 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2243 MI.eraseFromParent();
2246 case AMDGPU::V_SET_INACTIVE_B32: {
2250 .
add(
MI.getOperand(3))
2251 .
add(
MI.getOperand(4))
2252 .
add(
MI.getOperand(1))
2253 .
add(
MI.getOperand(2))
2254 .
add(
MI.getOperand(5));
2255 MI.eraseFromParent();
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2264 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2294 if (RI.hasVGPRs(EltRC)) {
2295 Opc = AMDGPU::V_MOVRELD_B32_e32;
2297 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2298 : AMDGPU::S_MOVRELD_B32;
2303 bool IsUndef =
MI.getOperand(1).isUndef();
2304 unsigned SubReg =
MI.getOperand(3).getImm();
2305 assert(VecReg ==
MI.getOperand(1).getReg());
2310 .
add(
MI.getOperand(2))
2314 const int ImpDefIdx =
2316 const int ImpUseIdx = ImpDefIdx + 1;
2318 MI.eraseFromParent();
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2323 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2324 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2335 assert(ST.useVGPRIndexMode());
2337 bool IsUndef =
MI.getOperand(1).isUndef();
2346 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2350 .
add(
MI.getOperand(2))
2355 const int ImpDefIdx =
2357 const int ImpUseIdx = ImpDefIdx + 1;
2364 MI.eraseFromParent();
2367 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2368 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2369 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2380 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2381 assert(ST.useVGPRIndexMode());
2384 bool IsUndef =
MI.getOperand(1).isUndef();
2402 MI.eraseFromParent();
2405 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2408 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2409 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2428 if (ST.hasGetPCZeroExtension()) {
2432 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2439 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2449 MI.eraseFromParent();
2452 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2462 Op.setOffset(
Op.getOffset() + 4);
2464 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2468 MI.eraseFromParent();
2471 case AMDGPU::ENTER_STRICT_WWM: {
2477 case AMDGPU::ENTER_STRICT_WQM: {
2484 MI.eraseFromParent();
2487 case AMDGPU::EXIT_STRICT_WWM:
2488 case AMDGPU::EXIT_STRICT_WQM: {
2494 case AMDGPU::SI_RETURN: {
2508 MI.eraseFromParent();
2512 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2513 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2514 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2517 case AMDGPU::S_GETPC_B64_pseudo:
2518 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2519 if (ST.hasGetPCZeroExtension()) {
2521 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2530 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2531 assert(ST.hasBF16PackedInsts());
2532 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2556 case AMDGPU::S_LOAD_DWORDX16_IMM:
2557 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2570 for (
auto &CandMO :
I->operands()) {
2571 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2579 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2583 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2587 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2589 unsigned NewOpcode = -1;
2590 if (SubregSize == 256)
2591 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2592 else if (SubregSize == 128)
2593 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2600 MRI.setRegClass(DestReg, NewRC);
2603 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2608 MI->getOperand(0).setReg(DestReg);
2609 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2613 OffsetMO->
setImm(FinalOffset);
2619 MI->setMemRefs(*MF, NewMMOs);
2632std::pair<MachineInstr*, MachineInstr*>
2634 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2636 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2639 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2640 return std::pair(&
MI,
nullptr);
2651 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2653 if (Dst.isPhysical()) {
2654 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2657 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2661 for (
unsigned I = 1;
I <= 2; ++
I) {
2664 if (
SrcOp.isImm()) {
2666 Imm.ashrInPlace(Part * 32);
2667 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2671 if (Src.isPhysical())
2672 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2679 MovDPP.addImm(MO.getImm());
2681 Split[Part] = MovDPP;
2685 if (Dst.isVirtual())
2692 MI.eraseFromParent();
2693 return std::pair(Split[0], Split[1]);
2696std::optional<DestSourcePair>
2698 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2701 return std::nullopt;
2705 AMDGPU::OpName Src0OpName,
2707 AMDGPU::OpName Src1OpName)
const {
2714 "All commutable instructions have both src0 and src1 modifiers");
2716 int Src0ModsVal = Src0Mods->
getImm();
2717 int Src1ModsVal = Src1Mods->
getImm();
2719 Src1Mods->
setImm(Src0ModsVal);
2720 Src0Mods->
setImm(Src1ModsVal);
2729 bool IsKill = RegOp.
isKill();
2731 bool IsUndef = RegOp.
isUndef();
2732 bool IsDebug = RegOp.
isDebug();
2734 if (NonRegOp.
isImm())
2736 else if (NonRegOp.
isFI())
2757 int64_t NonRegVal = NonRegOp1.
getImm();
2760 NonRegOp2.
setImm(NonRegVal);
2767 unsigned OpIdx1)
const {
2772 unsigned Opc =
MI.getOpcode();
2773 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2783 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2786 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2791 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2797 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2812 unsigned Src1Idx)
const {
2813 assert(!NewMI &&
"this should never be used");
2815 unsigned Opc =
MI.getOpcode();
2817 if (CommutedOpcode == -1)
2820 if (Src0Idx > Src1Idx)
2823 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2824 static_cast<int>(Src0Idx) &&
2825 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2826 static_cast<int>(Src1Idx) &&
2827 "inconsistency with findCommutedOpIndices");
2852 Src1, AMDGPU::OpName::src1_modifiers);
2855 AMDGPU::OpName::src1_sel);
2867 unsigned &SrcOpIdx0,
2868 unsigned &SrcOpIdx1)
const {
2873 unsigned &SrcOpIdx0,
2874 unsigned &SrcOpIdx1)
const {
2875 if (!
Desc.isCommutable())
2878 unsigned Opc =
Desc.getOpcode();
2879 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2883 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2887 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2891 int64_t BrOffset)
const {
2908 return MI.getOperand(0).getMBB();
2913 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2914 MI.getOpcode() == AMDGPU::SI_LOOP)
2926 "new block should be inserted for expanding unconditional branch");
2929 "restore block should be inserted for restoring clobbered registers");
2937 if (ST.hasAddPC64Inst()) {
2939 MCCtx.createTempSymbol(
"offset",
true);
2943 MCCtx.createTempSymbol(
"post_addpc",
true);
2944 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2948 Offset->setVariableValue(OffsetExpr);
2952 assert(RS &&
"RegScavenger required for long branching");
2956 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2960 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2961 ST.hasVALUReadSGPRHazard();
2962 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2963 if (FlushSGPRWrites)
2971 ApplyHazardWorkarounds();
2974 MCCtx.createTempSymbol(
"post_getpc",
true);
2978 MCCtx.createTempSymbol(
"offset_lo",
true);
2980 MCCtx.createTempSymbol(
"offset_hi",
true);
2983 .
addReg(PCReg, 0, AMDGPU::sub0)
2987 .
addReg(PCReg, 0, AMDGPU::sub1)
2989 ApplyHazardWorkarounds();
3030 if (LongBranchReservedReg) {
3031 RS->enterBasicBlock(
MBB);
3032 Scav = LongBranchReservedReg;
3034 RS->enterBasicBlockEnd(
MBB);
3035 Scav = RS->scavengeRegisterBackwards(
3040 RS->setRegUsed(Scav);
3041 MRI.replaceRegWith(PCReg, Scav);
3042 MRI.clearVirtRegs();
3048 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3049 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3050 MRI.clearVirtRegs();
3065unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3067 case SIInstrInfo::SCC_TRUE:
3068 return AMDGPU::S_CBRANCH_SCC1;
3069 case SIInstrInfo::SCC_FALSE:
3070 return AMDGPU::S_CBRANCH_SCC0;
3071 case SIInstrInfo::VCCNZ:
3072 return AMDGPU::S_CBRANCH_VCCNZ;
3073 case SIInstrInfo::VCCZ:
3074 return AMDGPU::S_CBRANCH_VCCZ;
3075 case SIInstrInfo::EXECNZ:
3076 return AMDGPU::S_CBRANCH_EXECNZ;
3077 case SIInstrInfo::EXECZ:
3078 return AMDGPU::S_CBRANCH_EXECZ;
3084SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3086 case AMDGPU::S_CBRANCH_SCC0:
3088 case AMDGPU::S_CBRANCH_SCC1:
3090 case AMDGPU::S_CBRANCH_VCCNZ:
3092 case AMDGPU::S_CBRANCH_VCCZ:
3094 case AMDGPU::S_CBRANCH_EXECNZ:
3096 case AMDGPU::S_CBRANCH_EXECZ:
3108 bool AllowModify)
const {
3109 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3111 TBB =
I->getOperand(0).getMBB();
3115 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3116 if (Pred == INVALID_BR)
3121 Cond.push_back(
I->getOperand(1));
3125 if (
I ==
MBB.end()) {
3131 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3133 FBB =
I->getOperand(0).getMBB();
3143 bool AllowModify)
const {
3151 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3152 switch (
I->getOpcode()) {
3153 case AMDGPU::S_MOV_B64_term:
3154 case AMDGPU::S_XOR_B64_term:
3155 case AMDGPU::S_OR_B64_term:
3156 case AMDGPU::S_ANDN2_B64_term:
3157 case AMDGPU::S_AND_B64_term:
3158 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3159 case AMDGPU::S_MOV_B32_term:
3160 case AMDGPU::S_XOR_B32_term:
3161 case AMDGPU::S_OR_B32_term:
3162 case AMDGPU::S_ANDN2_B32_term:
3163 case AMDGPU::S_AND_B32_term:
3164 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3167 case AMDGPU::SI_ELSE:
3168 case AMDGPU::SI_KILL_I1_TERMINATOR:
3169 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3186 int *BytesRemoved)
const {
3188 unsigned RemovedSize = 0;
3191 if (
MI.isBranch() ||
MI.isReturn()) {
3193 MI.eraseFromParent();
3199 *BytesRemoved = RemovedSize;
3216 int *BytesAdded)
const {
3217 if (!FBB &&
Cond.empty()) {
3221 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3228 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3240 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3258 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3265 if (
Cond.size() != 2) {
3269 if (
Cond[0].isImm()) {
3280 Register FalseReg,
int &CondCycles,
3281 int &TrueCycles,
int &FalseCycles)
const {
3287 if (
MRI.getRegClass(FalseReg) != RC)
3291 CondCycles = TrueCycles = FalseCycles = NumInsts;
3294 return RI.hasVGPRs(RC) && NumInsts <= 6;
3302 if (
MRI.getRegClass(FalseReg) != RC)
3308 if (NumInsts % 2 == 0)
3311 CondCycles = TrueCycles = FalseCycles = NumInsts;
3312 return RI.isSGPRClass(RC);
3323 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3324 if (Pred == VCCZ || Pred == SCC_FALSE) {
3325 Pred =
static_cast<BranchPredicate
>(-Pred);
3331 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3333 if (DstSize == 32) {
3335 if (Pred == SCC_TRUE) {
3350 if (DstSize == 64 && Pred == SCC_TRUE) {
3360 static const int16_t Sub0_15[] = {
3361 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3362 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3363 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3364 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3367 static const int16_t Sub0_15_64[] = {
3368 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3369 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3370 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3371 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3374 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3376 const int16_t *SubIndices = Sub0_15;
3377 int NElts = DstSize / 32;
3381 if (Pred == SCC_TRUE) {
3383 SelOp = AMDGPU::S_CSELECT_B32;
3384 EltRC = &AMDGPU::SGPR_32RegClass;
3386 SelOp = AMDGPU::S_CSELECT_B64;
3387 EltRC = &AMDGPU::SGPR_64RegClass;
3388 SubIndices = Sub0_15_64;
3394 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3399 for (
int Idx = 0; Idx != NElts; ++Idx) {
3400 Register DstElt =
MRI.createVirtualRegister(EltRC);
3403 unsigned SubIdx = SubIndices[Idx];
3406 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3409 .
addReg(FalseReg, 0, SubIdx)
3410 .
addReg(TrueReg, 0, SubIdx);
3414 .
addReg(TrueReg, 0, SubIdx)
3415 .
addReg(FalseReg, 0, SubIdx);
3427 switch (
MI.getOpcode()) {
3428 case AMDGPU::V_MOV_B16_t16_e32:
3429 case AMDGPU::V_MOV_B16_t16_e64:
3430 case AMDGPU::V_MOV_B32_e32:
3431 case AMDGPU::V_MOV_B32_e64:
3432 case AMDGPU::V_MOV_B64_PSEUDO:
3433 case AMDGPU::V_MOV_B64_e32:
3434 case AMDGPU::V_MOV_B64_e64:
3435 case AMDGPU::S_MOV_B32:
3436 case AMDGPU::S_MOV_B64:
3437 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3439 case AMDGPU::WWM_COPY:
3440 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3441 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3442 case AMDGPU::V_ACCVGPR_MOV_B32:
3443 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3444 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3452 switch (
MI.getOpcode()) {
3453 case AMDGPU::V_MOV_B16_t16_e32:
3454 case AMDGPU::V_MOV_B16_t16_e64:
3456 case AMDGPU::V_MOV_B32_e32:
3457 case AMDGPU::V_MOV_B32_e64:
3458 case AMDGPU::V_MOV_B64_PSEUDO:
3459 case AMDGPU::V_MOV_B64_e32:
3460 case AMDGPU::V_MOV_B64_e64:
3461 case AMDGPU::S_MOV_B32:
3462 case AMDGPU::S_MOV_B64:
3463 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3465 case AMDGPU::WWM_COPY:
3466 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3467 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3468 case AMDGPU::V_ACCVGPR_MOV_B32:
3469 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3470 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3478 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3479 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3480 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3483 unsigned Opc =
MI.getOpcode();
3485 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3487 MI.removeOperand(Idx);
3493 MI.setDesc(NewDesc);
3499 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3500 Desc.implicit_defs().size();
3502 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3503 MI.removeOperand(
I);
3507 unsigned SubRegIndex) {
3508 switch (SubRegIndex) {
3509 case AMDGPU::NoSubRegister:
3519 case AMDGPU::sub1_lo16:
3521 case AMDGPU::sub1_hi16:
3524 return std::nullopt;
3532 case AMDGPU::V_MAC_F16_e32:
3533 case AMDGPU::V_MAC_F16_e64:
3534 case AMDGPU::V_MAD_F16_e64:
3535 return AMDGPU::V_MADAK_F16;
3536 case AMDGPU::V_MAC_F32_e32:
3537 case AMDGPU::V_MAC_F32_e64:
3538 case AMDGPU::V_MAD_F32_e64:
3539 return AMDGPU::V_MADAK_F32;
3540 case AMDGPU::V_FMAC_F32_e32:
3541 case AMDGPU::V_FMAC_F32_e64:
3542 case AMDGPU::V_FMA_F32_e64:
3543 return AMDGPU::V_FMAAK_F32;
3544 case AMDGPU::V_FMAC_F16_e32:
3545 case AMDGPU::V_FMAC_F16_e64:
3546 case AMDGPU::V_FMAC_F16_t16_e64:
3547 case AMDGPU::V_FMAC_F16_fake16_e64:
3548 case AMDGPU::V_FMA_F16_e64:
3549 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3550 ? AMDGPU::V_FMAAK_F16_t16
3551 : AMDGPU::V_FMAAK_F16_fake16
3552 : AMDGPU::V_FMAAK_F16;
3553 case AMDGPU::V_FMAC_F64_e32:
3554 case AMDGPU::V_FMAC_F64_e64:
3555 case AMDGPU::V_FMA_F64_e64:
3556 return AMDGPU::V_FMAAK_F64;
3564 case AMDGPU::V_MAC_F16_e32:
3565 case AMDGPU::V_MAC_F16_e64:
3566 case AMDGPU::V_MAD_F16_e64:
3567 return AMDGPU::V_MADMK_F16;
3568 case AMDGPU::V_MAC_F32_e32:
3569 case AMDGPU::V_MAC_F32_e64:
3570 case AMDGPU::V_MAD_F32_e64:
3571 return AMDGPU::V_MADMK_F32;
3572 case AMDGPU::V_FMAC_F32_e32:
3573 case AMDGPU::V_FMAC_F32_e64:
3574 case AMDGPU::V_FMA_F32_e64:
3575 return AMDGPU::V_FMAMK_F32;
3576 case AMDGPU::V_FMAC_F16_e32:
3577 case AMDGPU::V_FMAC_F16_e64:
3578 case AMDGPU::V_FMAC_F16_t16_e64:
3579 case AMDGPU::V_FMAC_F16_fake16_e64:
3580 case AMDGPU::V_FMA_F16_e64:
3581 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3582 ? AMDGPU::V_FMAMK_F16_t16
3583 : AMDGPU::V_FMAMK_F16_fake16
3584 : AMDGPU::V_FMAMK_F16;
3585 case AMDGPU::V_FMAC_F64_e32:
3586 case AMDGPU::V_FMAC_F64_e64:
3587 case AMDGPU::V_FMA_F64_e64:
3588 return AMDGPU::V_FMAMK_F64;
3600 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3602 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3605 if (
Opc == AMDGPU::COPY) {
3606 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3613 if (HasMultipleUses) {
3616 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3619 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3627 if (ImmDefSize == 32 &&
3632 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3633 RI.getSubRegIdxSize(UseSubReg) == 16;
3636 if (RI.hasVGPRs(DstRC))
3639 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3645 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3652 for (
unsigned MovOp :
3653 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3654 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3662 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3666 if (MovDstPhysReg) {
3670 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3677 if (MovDstPhysReg) {
3678 if (!MovDstRC->
contains(MovDstPhysReg))
3680 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3694 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3702 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3706 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3708 UseMI.getOperand(0).setReg(MovDstPhysReg);
3713 UseMI.setDesc(NewMCID);
3714 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3715 UseMI.addImplicitDefUseOperands(*MF);
3719 if (HasMultipleUses)
3722 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3723 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3724 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3725 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3726 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3727 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3728 Opc == AMDGPU::V_FMAC_F64_e64) {
3737 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3752 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3753 if (!RegSrc->
isReg())
3755 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3756 ST.getConstantBusLimit(
Opc) < 2)
3759 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3771 if (Def && Def->isMoveImmediate() &&
3782 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3783 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3793 unsigned SrcSubReg = RegSrc->
getSubReg();
3798 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3799 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3800 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3801 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3802 UseMI.untieRegOperand(
3803 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3810 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3812 DefMI.eraseFromParent();
3819 if (ST.getConstantBusLimit(
Opc) < 2) {
3822 bool Src0Inlined =
false;
3823 if (Src0->
isReg()) {
3828 if (Def && Def->isMoveImmediate() &&
3833 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3840 if (Src1->
isReg() && !Src0Inlined) {
3843 if (Def && Def->isMoveImmediate() &&
3845 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3847 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3860 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3861 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3867 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3868 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3869 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3870 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3871 UseMI.untieRegOperand(
3872 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3874 const std::optional<int64_t> SubRegImm =
3888 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3890 DefMI.eraseFromParent();
3902 if (BaseOps1.
size() != BaseOps2.
size())
3904 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3905 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3913 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3914 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3915 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3917 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3920bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3923 int64_t Offset0, Offset1;
3926 bool Offset0IsScalable, Offset1IsScalable;
3940 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3941 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3948 "MIa must load from or modify a memory location");
3950 "MIb must load from or modify a memory location");
3972 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3979 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3989 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4003 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4014 if (
Reg.isPhysical())
4016 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4018 Imm = Def->getOperand(1).getImm();
4038 unsigned NumOps =
MI.getNumOperands();
4041 if (
Op.isReg() &&
Op.isKill())
4049 case AMDGPU::V_MAC_F16_e32:
4050 case AMDGPU::V_MAC_F16_e64:
4051 return AMDGPU::V_MAD_F16_e64;
4052 case AMDGPU::V_MAC_F32_e32:
4053 case AMDGPU::V_MAC_F32_e64:
4054 return AMDGPU::V_MAD_F32_e64;
4055 case AMDGPU::V_MAC_LEGACY_F32_e32:
4056 case AMDGPU::V_MAC_LEGACY_F32_e64:
4057 return AMDGPU::V_MAD_LEGACY_F32_e64;
4058 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4059 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4060 return AMDGPU::V_FMA_LEGACY_F32_e64;
4061 case AMDGPU::V_FMAC_F16_e32:
4062 case AMDGPU::V_FMAC_F16_e64:
4063 case AMDGPU::V_FMAC_F16_t16_e64:
4064 case AMDGPU::V_FMAC_F16_fake16_e64:
4065 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4066 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4067 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4068 : AMDGPU::V_FMA_F16_gfx9_e64;
4069 case AMDGPU::V_FMAC_F32_e32:
4070 case AMDGPU::V_FMAC_F32_e64:
4071 return AMDGPU::V_FMA_F32_e64;
4072 case AMDGPU::V_FMAC_F64_e32:
4073 case AMDGPU::V_FMAC_F64_e64:
4074 return AMDGPU::V_FMA_F64_e64;
4094 if (
MI.isBundle()) {
4097 if (
MI.getBundleSize() != 1)
4099 CandidateMI =
MI.getNextNode();
4103 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4107 if (
MI.isBundle()) {
4112 MI.untieRegOperand(MO.getOperandNo());
4120 if (Def.isEarlyClobber() && Def.isReg() &&
4125 auto UpdateDefIndex = [&](
LiveRange &LR) {
4126 auto *S = LR.find(OldIndex);
4127 if (S != LR.end() && S->start == OldIndex) {
4128 assert(S->valno && S->valno->def == OldIndex);
4129 S->start = NewIndex;
4130 S->valno->def = NewIndex;
4134 for (
auto &SR : LI.subranges())
4140 if (U.RemoveMIUse) {
4143 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4145 if (
MRI.hasOneNonDBGUse(DefReg)) {
4147 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4148 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4149 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4150 U.RemoveMIUse->removeOperand(
I);
4155 if (
MI.isBundle()) {
4159 if (MO.isReg() && MO.getReg() == DefReg) {
4160 assert(MO.getSubReg() == 0 &&
4161 "tied sub-registers in bundles currently not supported");
4162 MI.removeOperand(MO.getOperandNo());
4177 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4179 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4180 MIOp.setIsUndef(
true);
4181 MIOp.setReg(DummyReg);
4185 if (
MI.isBundle()) {
4189 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4190 MIOp.setIsUndef(
true);
4191 MIOp.setReg(DummyReg);
4204 return MI.isBundle() ? &
MI : NewMI;
4209 ThreeAddressUpdates &U)
const {
4211 unsigned Opc =
MI.getOpcode();
4215 if (NewMFMAOpc != -1) {
4218 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4219 MIB.
add(
MI.getOperand(
I));
4227 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4232 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4233 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4234 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4238 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4239 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4240 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4241 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4242 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4243 bool Src0Literal =
false;
4248 case AMDGPU::V_MAC_F16_e64:
4249 case AMDGPU::V_FMAC_F16_e64:
4250 case AMDGPU::V_FMAC_F16_t16_e64:
4251 case AMDGPU::V_FMAC_F16_fake16_e64:
4252 case AMDGPU::V_MAC_F32_e64:
4253 case AMDGPU::V_MAC_LEGACY_F32_e64:
4254 case AMDGPU::V_FMAC_F32_e64:
4255 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4256 case AMDGPU::V_FMAC_F64_e64:
4258 case AMDGPU::V_MAC_F16_e32:
4259 case AMDGPU::V_FMAC_F16_e32:
4260 case AMDGPU::V_MAC_F32_e32:
4261 case AMDGPU::V_MAC_LEGACY_F32_e32:
4262 case AMDGPU::V_FMAC_F32_e32:
4263 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4264 case AMDGPU::V_FMAC_F64_e32: {
4265 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4266 AMDGPU::OpName::src0);
4267 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4278 MachineInstrBuilder MIB;
4281 const MachineOperand *Src0Mods =
4284 const MachineOperand *Src1Mods =
4287 const MachineOperand *Src2Mods =
4293 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4294 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4296 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4298 MachineInstr *
DefMI;
4334 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4350 if (Src0Literal && !ST.hasVOP3Literal())
4378 switch (
MI.getOpcode()) {
4379 case AMDGPU::S_SET_GPR_IDX_ON:
4380 case AMDGPU::S_SET_GPR_IDX_MODE:
4381 case AMDGPU::S_SET_GPR_IDX_OFF:
4399 if (
MI.isTerminator() ||
MI.isPosition())
4403 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4406 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4412 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4413 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4414 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4415 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4416 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4421 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4422 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4423 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4432 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4441 if (
MI.memoperands_empty())
4446 unsigned AS = Memop->getAddrSpace();
4447 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4448 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4449 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4450 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4465 if (
MI.memoperands_empty())
4474 unsigned AS = Memop->getAddrSpace();
4491 if (ST.isTgSplitEnabled())
4496 if (
MI.memoperands_empty())
4501 unsigned AS = Memop->getAddrSpace();
4517 unsigned Opcode =
MI.getOpcode();
4532 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4533 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4534 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4537 if (
MI.isCall() ||
MI.isInlineAsm())
4553 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4554 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4555 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4556 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4564 if (
MI.isMetaInstruction())
4568 if (
MI.isCopyLike()) {
4569 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4573 return MI.readsRegister(AMDGPU::EXEC, &RI);
4584 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4588 switch (Imm.getBitWidth()) {
4594 ST.hasInv2PiInlineImm());
4597 ST.hasInv2PiInlineImm());
4599 return ST.has16BitInsts() &&
4601 ST.hasInv2PiInlineImm());
4608 APInt IntImm = Imm.bitcastToAPInt();
4610 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4618 return ST.has16BitInsts() &&
4621 return ST.has16BitInsts() &&
4631 switch (OperandType) {
4641 int32_t Trunc =
static_cast<int32_t
>(Imm);
4681 int16_t Trunc =
static_cast<int16_t
>(Imm);
4682 return ST.has16BitInsts() &&
4691 int16_t Trunc =
static_cast<int16_t
>(Imm);
4692 return ST.has16BitInsts() &&
4743 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4749 return ST.hasVOP3Literal();
4753 int64_t ImmVal)
const {
4756 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4757 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4758 AMDGPU::OpName::src2))
4760 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4772 "unexpected imm-like operand kind");
4785 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4803 AMDGPU::OpName
OpName)
const {
4805 return Mods && Mods->
getImm();
4818 switch (
MI.getOpcode()) {
4819 default:
return false;
4821 case AMDGPU::V_ADDC_U32_e64:
4822 case AMDGPU::V_SUBB_U32_e64:
4823 case AMDGPU::V_SUBBREV_U32_e64: {
4831 case AMDGPU::V_MAC_F16_e64:
4832 case AMDGPU::V_MAC_F32_e64:
4833 case AMDGPU::V_MAC_LEGACY_F32_e64:
4834 case AMDGPU::V_FMAC_F16_e64:
4835 case AMDGPU::V_FMAC_F16_t16_e64:
4836 case AMDGPU::V_FMAC_F16_fake16_e64:
4837 case AMDGPU::V_FMAC_F32_e64:
4838 case AMDGPU::V_FMAC_F64_e64:
4839 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4845 case AMDGPU::V_CNDMASK_B32_e64:
4851 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4881 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4890 unsigned Op32)
const {
4904 Inst32.
add(
MI.getOperand(
I));
4908 int Idx =
MI.getNumExplicitDefs();
4910 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4915 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4937 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4945 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4948 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4949 AMDGPU::SReg_64RegClass.contains(Reg);
4955 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4967 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4977 switch (MO.getReg()) {
4979 case AMDGPU::VCC_LO:
4980 case AMDGPU::VCC_HI:
4982 case AMDGPU::FLAT_SCR:
4995 switch (
MI.getOpcode()) {
4996 case AMDGPU::V_READLANE_B32:
4997 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4998 case AMDGPU::V_WRITELANE_B32:
4999 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5006 if (
MI.isPreISelOpcode() ||
5007 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5022 if (
SubReg.getReg().isPhysical())
5025 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5036 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5037 ErrInfo =
"illegal copy from vector register to SGPR";
5055 if (!
MRI.isSSA() &&
MI.isCopy())
5056 return verifyCopy(
MI,
MRI, ErrInfo);
5058 if (SIInstrInfo::isGenericOpcode(Opcode))
5061 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5062 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5063 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5065 if (Src0Idx == -1) {
5067 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5068 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5069 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5070 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5075 if (!
Desc.isVariadic() &&
5076 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5077 ErrInfo =
"Instruction has wrong number of operands.";
5081 if (
MI.isInlineAsm()) {
5094 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5095 ErrInfo =
"inlineasm operand has incorrect register class.";
5103 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5104 ErrInfo =
"missing memory operand from image instruction.";
5109 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5112 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5113 "all fp values to integers.";
5118 int16_t RegClass = getOpRegClassID(OpInfo);
5120 switch (OpInfo.OperandType) {
5122 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5123 ErrInfo =
"Illegal immediate value for operand.";
5157 ErrInfo =
"Illegal immediate value for operand.";
5164 ErrInfo =
"Expected inline constant for operand.";
5179 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5180 ErrInfo =
"Expected immediate, but got non-immediate";
5189 if (OpInfo.isGenericType())
5204 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5206 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5208 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5209 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5216 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5217 ErrInfo =
"Subtarget requires even aligned vector registers";
5222 if (RegClass != -1) {
5223 if (Reg.isVirtual())
5228 ErrInfo =
"Operand has incorrect register class.";
5236 if (!ST.hasSDWA()) {
5237 ErrInfo =
"SDWA is not supported on this target";
5241 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5242 AMDGPU::OpName::dst_sel}) {
5246 int64_t Imm = MO->
getImm();
5248 ErrInfo =
"Invalid SDWA selection";
5253 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5255 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5260 if (!ST.hasSDWAScalar()) {
5262 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5263 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5270 "Only reg allowed as operands in SDWA instructions on GFX9+";
5276 if (!ST.hasSDWAOmod()) {
5279 if (OMod !=
nullptr &&
5281 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5286 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5287 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5288 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5289 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5292 unsigned Mods = Src0ModsMO->
getImm();
5295 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5301 if (
isVOPC(BasicOpcode)) {
5302 if (!ST.hasSDWASdst() && DstIdx != -1) {
5305 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5306 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5309 }
else if (!ST.hasSDWAOutModsVOPC()) {
5312 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5313 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5319 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5320 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5327 if (DstUnused && DstUnused->isImm() &&
5330 if (!Dst.isReg() || !Dst.isTied()) {
5331 ErrInfo =
"Dst register should have tied register";
5336 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5339 "Dst register should be tied to implicit use of preserved register";
5343 ErrInfo =
"Dst register should use same physical register as preserved";
5350 if (
isImage(Opcode) && !
MI.mayStore()) {
5362 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5370 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5374 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5375 if (RegCount > DstSize) {
5376 ErrInfo =
"Image instruction returns too many registers for dst "
5385 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5386 unsigned ConstantBusCount = 0;
5387 bool UsesLiteral =
false;
5390 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5394 LiteralVal = &
MI.getOperand(ImmIdx);
5403 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5414 }
else if (!MO.
isFI()) {
5421 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5431 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5432 return !RI.regsOverlap(SGPRUsed, SGPR);
5441 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5442 Opcode != AMDGPU::V_WRITELANE_B32) {
5443 ErrInfo =
"VOP* instruction violates constant bus restriction";
5447 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5448 ErrInfo =
"VOP3 instruction uses literal";
5455 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5456 unsigned SGPRCount = 0;
5459 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5467 if (MO.
getReg() != SGPRUsed)
5472 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5473 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5480 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5481 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5488 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5498 ErrInfo =
"ABS not allowed in VOP3B instructions";
5511 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5518 if (
Desc.isBranch()) {
5520 ErrInfo =
"invalid branch target for SOPK instruction";
5527 ErrInfo =
"invalid immediate for SOPK instruction";
5532 ErrInfo =
"invalid immediate for SOPK instruction";
5539 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5540 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5541 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5542 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5543 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5544 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5546 const unsigned StaticNumOps =
5547 Desc.getNumOperands() +
Desc.implicit_uses().size();
5548 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5554 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5555 ErrInfo =
"missing implicit register operands";
5561 if (!Dst->isUse()) {
5562 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5567 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5568 UseOpIdx != StaticNumOps + 1) {
5569 ErrInfo =
"movrel implicit operands should be tied";
5576 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5578 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5579 ErrInfo =
"src0 should be subreg of implicit vector use";
5587 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5588 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5594 if (
MI.mayStore() &&
5599 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5600 ErrInfo =
"scalar stores must use m0 as offset register";
5606 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5608 if (
Offset->getImm() != 0) {
5609 ErrInfo =
"subtarget does not support offsets in flat instructions";
5614 if (
isDS(
MI) && !ST.hasGDS()) {
5616 if (GDSOp && GDSOp->
getImm() != 0) {
5617 ErrInfo =
"GDS is not supported on this subtarget";
5625 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5626 AMDGPU::OpName::vaddr0);
5627 AMDGPU::OpName RSrcOpName =
5628 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5629 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5637 ErrInfo =
"dim is out of range";
5642 if (ST.hasR128A16()) {
5644 IsA16 = R128A16->
getImm() != 0;
5645 }
else if (ST.hasA16()) {
5647 IsA16 = A16->
getImm() != 0;
5650 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5652 unsigned AddrWords =
5655 unsigned VAddrWords;
5657 VAddrWords = RsrcIdx - VAddr0Idx;
5658 if (ST.hasPartialNSAEncoding() &&
5660 unsigned LastVAddrIdx = RsrcIdx - 1;
5661 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5669 if (VAddrWords != AddrWords) {
5671 <<
" but got " << VAddrWords <<
"\n");
5672 ErrInfo =
"bad vaddr size";
5682 unsigned DC = DppCt->
getImm();
5683 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5684 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5685 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5686 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5687 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5688 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5689 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5690 ErrInfo =
"Invalid dpp_ctrl value";
5693 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5695 ErrInfo =
"Invalid dpp_ctrl value: "
5696 "wavefront shifts are not supported on GFX10+";
5699 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5701 ErrInfo =
"Invalid dpp_ctrl value: "
5702 "broadcasts are not supported on GFX10+";
5705 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5707 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5708 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5709 !ST.hasGFX90AInsts()) {
5710 ErrInfo =
"Invalid dpp_ctrl value: "
5711 "row_newbroadcast/row_share is not supported before "
5715 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5716 ErrInfo =
"Invalid dpp_ctrl value: "
5717 "row_share and row_xmask are not supported before GFX10";
5722 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5725 ErrInfo =
"Invalid dpp_ctrl value: "
5726 "DP ALU dpp only support row_newbcast";
5733 AMDGPU::OpName DataName =
5734 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5740 if (ST.hasGFX90AInsts()) {
5741 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5742 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5743 ErrInfo =
"Invalid register class: "
5744 "vdata and vdst should be both VGPR or AGPR";
5747 if (
Data && Data2 &&
5749 ErrInfo =
"Invalid register class: "
5750 "both data operands should be VGPR or AGPR";
5754 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5756 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5757 ErrInfo =
"Invalid register class: "
5758 "agpr loads and stores not supported on this GPU";
5764 if (ST.needsAlignedVGPRs()) {
5765 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5770 if (Reg.isPhysical())
5771 return !(RI.getHWRegIndex(Reg) & 1);
5773 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5774 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5777 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5778 Opcode == AMDGPU::DS_GWS_BARRIER) {
5780 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5781 ErrInfo =
"Subtarget requires even aligned vector registers "
5782 "for DS_GWS instructions";
5788 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5789 ErrInfo =
"Subtarget requires even aligned vector registers "
5790 "for vaddr operand of image instructions";
5796 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5798 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5799 ErrInfo =
"Invalid register class: "
5800 "v_accvgpr_write with an SGPR is not supported on this GPU";
5805 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5808 ErrInfo =
"pseudo expects only physical SGPRs";
5815 if (!ST.hasScaleOffset()) {
5816 ErrInfo =
"Subtarget does not support offset scaling";
5820 ErrInfo =
"Instruction does not support offset scaling";
5829 for (
unsigned I = 0;
I < 3; ++
I) {
5835 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5836 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5838 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5839 &AMDGPU::SReg_64RegClass) ||
5840 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5841 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5853 switch (
MI.getOpcode()) {
5854 default:
return AMDGPU::INSTRUCTION_LIST_END;
5855 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5856 case AMDGPU::COPY:
return AMDGPU::COPY;
5857 case AMDGPU::PHI:
return AMDGPU::PHI;
5858 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5859 case AMDGPU::WQM:
return AMDGPU::WQM;
5860 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5861 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5862 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5863 case AMDGPU::S_MOV_B32: {
5865 return MI.getOperand(1).isReg() ||
5866 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5867 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5869 case AMDGPU::S_ADD_I32:
5870 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5871 case AMDGPU::S_ADDC_U32:
5872 return AMDGPU::V_ADDC_U32_e32;
5873 case AMDGPU::S_SUB_I32:
5874 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5877 case AMDGPU::S_ADD_U32:
5878 return AMDGPU::V_ADD_CO_U32_e32;
5879 case AMDGPU::S_SUB_U32:
5880 return AMDGPU::V_SUB_CO_U32_e32;
5881 case AMDGPU::S_ADD_U64_PSEUDO:
5882 return AMDGPU::V_ADD_U64_PSEUDO;
5883 case AMDGPU::S_SUB_U64_PSEUDO:
5884 return AMDGPU::V_SUB_U64_PSEUDO;
5885 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5886 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5887 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5888 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5889 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5890 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5891 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5892 case AMDGPU::S_XNOR_B32:
5893 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5894 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5895 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5896 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5897 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5898 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5899 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5900 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5901 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5902 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5903 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5904 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5905 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5906 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5907 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5908 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5909 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5910 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5911 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5912 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5913 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5914 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5915 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5916 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5917 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5918 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5919 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5920 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5921 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5922 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5923 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5924 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5925 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5926 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5927 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5928 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5929 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5930 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5931 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5932 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5933 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5934 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5935 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5936 case AMDGPU::S_CVT_F32_F16:
5937 case AMDGPU::S_CVT_HI_F32_F16:
5938 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5939 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5940 case AMDGPU::S_CVT_F16_F32:
5941 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5942 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5943 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5944 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5945 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5946 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5947 case AMDGPU::S_CEIL_F16:
5948 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5949 : AMDGPU::V_CEIL_F16_fake16_e64;
5950 case AMDGPU::S_FLOOR_F16:
5951 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5952 : AMDGPU::V_FLOOR_F16_fake16_e64;
5953 case AMDGPU::S_TRUNC_F16:
5954 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5955 : AMDGPU::V_TRUNC_F16_fake16_e64;
5956 case AMDGPU::S_RNDNE_F16:
5957 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5958 : AMDGPU::V_RNDNE_F16_fake16_e64;
5959 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5960 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5961 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5962 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5963 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5964 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5965 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5966 case AMDGPU::S_ADD_F16:
5967 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5968 : AMDGPU::V_ADD_F16_fake16_e64;
5969 case AMDGPU::S_SUB_F16:
5970 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5971 : AMDGPU::V_SUB_F16_fake16_e64;
5972 case AMDGPU::S_MIN_F16:
5973 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5974 : AMDGPU::V_MIN_F16_fake16_e64;
5975 case AMDGPU::S_MAX_F16:
5976 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5977 : AMDGPU::V_MAX_F16_fake16_e64;
5978 case AMDGPU::S_MINIMUM_F16:
5979 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5980 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5981 case AMDGPU::S_MAXIMUM_F16:
5982 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5983 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5984 case AMDGPU::S_MUL_F16:
5985 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5986 : AMDGPU::V_MUL_F16_fake16_e64;
5987 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5988 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5989 case AMDGPU::S_FMAC_F16:
5990 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5991 : AMDGPU::V_FMAC_F16_fake16_e64;
5992 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5993 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5994 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5995 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5996 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5997 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5998 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5999 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6000 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6001 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6002 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6003 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6004 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6005 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6006 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6007 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6008 case AMDGPU::S_CMP_LT_F16:
6009 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6010 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6011 case AMDGPU::S_CMP_EQ_F16:
6012 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6013 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6014 case AMDGPU::S_CMP_LE_F16:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6016 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6017 case AMDGPU::S_CMP_GT_F16:
6018 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6019 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6020 case AMDGPU::S_CMP_LG_F16:
6021 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6022 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6023 case AMDGPU::S_CMP_GE_F16:
6024 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6025 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6026 case AMDGPU::S_CMP_O_F16:
6027 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6028 : AMDGPU::V_CMP_O_F16_fake16_e64;
6029 case AMDGPU::S_CMP_U_F16:
6030 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6031 : AMDGPU::V_CMP_U_F16_fake16_e64;
6032 case AMDGPU::S_CMP_NGE_F16:
6033 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6034 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6035 case AMDGPU::S_CMP_NLG_F16:
6036 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6037 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6038 case AMDGPU::S_CMP_NGT_F16:
6039 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6040 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6041 case AMDGPU::S_CMP_NLE_F16:
6042 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6043 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6044 case AMDGPU::S_CMP_NEQ_F16:
6045 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6046 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6047 case AMDGPU::S_CMP_NLT_F16:
6048 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6049 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6050 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6051 case AMDGPU::V_S_EXP_F16_e64:
6052 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6053 : AMDGPU::V_EXP_F16_fake16_e64;
6054 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6055 case AMDGPU::V_S_LOG_F16_e64:
6056 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6057 : AMDGPU::V_LOG_F16_fake16_e64;
6058 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6059 case AMDGPU::V_S_RCP_F16_e64:
6060 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6061 : AMDGPU::V_RCP_F16_fake16_e64;
6062 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6063 case AMDGPU::V_S_RSQ_F16_e64:
6064 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6065 : AMDGPU::V_RSQ_F16_fake16_e64;
6066 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6067 case AMDGPU::V_S_SQRT_F16_e64:
6068 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6069 : AMDGPU::V_SQRT_F16_fake16_e64;
6072 "Unexpected scalar opcode without corresponding vector one!");
6121 "Not a whole wave func");
6124 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6125 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6132 unsigned OpNo)
const {
6134 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6135 Desc.operands()[OpNo].RegClass == -1) {
6138 if (Reg.isVirtual()) {
6140 return MRI.getRegClass(Reg);
6142 return RI.getPhysRegBaseClass(Reg);
6145 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6146 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6154 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6156 unsigned Size = RI.getRegSizeInBits(*RC);
6157 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6158 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6159 : AMDGPU::V_MOV_B32_e32;
6161 Opcode = AMDGPU::COPY;
6162 else if (RI.isSGPRClass(RC))
6163 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6177 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6183 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6194 if (SubIdx == AMDGPU::sub0)
6196 if (SubIdx == AMDGPU::sub1)
6208void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6224 if (Reg.isPhysical())
6234 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6237 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6244 unsigned Opc =
MI.getOpcode();
6250 constexpr AMDGPU::OpName OpNames[] = {
6251 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6254 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6255 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6265 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6266 if (IsAGPR && !ST.hasMAIInsts())
6268 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6272 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6273 const int DataIdx = AMDGPU::getNamedOperandIdx(
6274 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6275 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6276 MI.getOperand(DataIdx).isReg() &&
6277 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6279 if ((
int)
OpIdx == DataIdx) {
6280 if (VDstIdx != -1 &&
6281 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6284 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6285 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6286 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6291 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6292 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6296 if (ST.hasFlatScratchHiInB64InstHazard() &&
6303 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6324 constexpr unsigned NumOps = 3;
6325 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6326 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6327 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6328 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6333 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6336 MO = &
MI.getOperand(SrcIdx);
6343 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6347 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6351 return !OpSel && !OpSelHi;
6360 int64_t RegClass = getOpRegClassID(OpInfo);
6362 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6371 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6372 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6376 if (!LiteralLimit--)
6386 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6394 if (--ConstantBusLimit <= 0)
6406 if (!LiteralLimit--)
6408 if (--ConstantBusLimit <= 0)
6414 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6418 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6420 !
Op.isIdenticalTo(*MO))
6430 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6444 bool Is64BitOp = Is64BitFPOp ||
6451 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6460 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6478 bool IsGFX950Only = ST.hasGFX950Insts();
6479 bool IsGFX940Only = ST.hasGFX940Insts();
6481 if (!IsGFX950Only && !IsGFX940Only)
6499 unsigned Opcode =
MI.getOpcode();
6501 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6502 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6503 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6504 case AMDGPU::V_MQSAD_U32_U8_e64:
6505 case AMDGPU::V_PK_ADD_F16:
6506 case AMDGPU::V_PK_ADD_F32:
6507 case AMDGPU::V_PK_ADD_I16:
6508 case AMDGPU::V_PK_ADD_U16:
6509 case AMDGPU::V_PK_ASHRREV_I16:
6510 case AMDGPU::V_PK_FMA_F16:
6511 case AMDGPU::V_PK_FMA_F32:
6512 case AMDGPU::V_PK_FMAC_F16_e32:
6513 case AMDGPU::V_PK_FMAC_F16_e64:
6514 case AMDGPU::V_PK_LSHLREV_B16:
6515 case AMDGPU::V_PK_LSHRREV_B16:
6516 case AMDGPU::V_PK_MAD_I16:
6517 case AMDGPU::V_PK_MAD_U16:
6518 case AMDGPU::V_PK_MAX_F16:
6519 case AMDGPU::V_PK_MAX_I16:
6520 case AMDGPU::V_PK_MAX_U16:
6521 case AMDGPU::V_PK_MIN_F16:
6522 case AMDGPU::V_PK_MIN_I16:
6523 case AMDGPU::V_PK_MIN_U16:
6524 case AMDGPU::V_PK_MOV_B32:
6525 case AMDGPU::V_PK_MUL_F16:
6526 case AMDGPU::V_PK_MUL_F32:
6527 case AMDGPU::V_PK_MUL_LO_U16:
6528 case AMDGPU::V_PK_SUB_I16:
6529 case AMDGPU::V_PK_SUB_U16:
6530 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6539 unsigned Opc =
MI.getOpcode();
6542 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6545 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6551 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6558 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6561 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6567 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6577 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6578 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6579 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6591 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6593 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6605 if (HasImplicitSGPR || !
MI.isCommutable()) {
6622 if (CommutedOpc == -1) {
6627 MI.setDesc(
get(CommutedOpc));
6631 bool Src0Kill = Src0.
isKill();
6635 else if (Src1.
isReg()) {
6650 unsigned Opc =
MI.getOpcode();
6653 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6654 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6655 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6658 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6659 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6660 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6661 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6662 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6663 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6664 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6668 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6669 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6674 if (VOP3Idx[2] != -1) {
6676 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6677 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6686 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6687 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6689 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6691 SGPRsUsed.
insert(SGPRReg);
6695 for (
int Idx : VOP3Idx) {
6704 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6716 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6723 if (ConstantBusLimit > 0) {
6735 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6736 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6742 for (
unsigned I = 0;
I < 3; ++
I) {
6755 SRC = RI.getCommonSubClass(SRC, DstRC);
6758 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6760 if (RI.hasAGPRs(VRC)) {
6761 VRC = RI.getEquivalentVGPRClass(VRC);
6762 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6764 get(TargetOpcode::COPY), NewSrcReg)
6771 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6777 for (
unsigned i = 0; i < SubRegs; ++i) {
6778 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6780 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6781 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6787 get(AMDGPU::REG_SEQUENCE), DstReg);
6788 for (
unsigned i = 0; i < SubRegs; ++i) {
6790 MIB.
addImm(RI.getSubRegFromChannel(i));
6803 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6805 SBase->setReg(SGPR);
6808 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6816 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6817 if (OldSAddrIdx < 0)
6833 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6834 if (NewVAddrIdx < 0)
6837 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6841 if (OldVAddrIdx >= 0) {
6843 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6855 if (OldVAddrIdx == NewVAddrIdx) {
6858 MRI.removeRegOperandFromUseList(&NewVAddr);
6859 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6863 MRI.removeRegOperandFromUseList(&NewVAddr);
6864 MRI.addRegOperandToUseList(&NewVAddr);
6866 assert(OldSAddrIdx == NewVAddrIdx);
6868 if (OldVAddrIdx >= 0) {
6869 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6870 AMDGPU::OpName::vdst_in);
6874 if (NewVDstIn != -1) {
6875 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6881 if (NewVDstIn != -1) {
6882 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6903 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6923 unsigned OpSubReg =
Op.getSubReg();
6926 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6932 Register DstReg =
MRI.createVirtualRegister(DstRC);
6942 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6945 bool ImpDef = Def->isImplicitDef();
6946 while (!ImpDef && Def && Def->isCopy()) {
6947 if (Def->getOperand(1).getReg().isPhysical())
6949 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6950 ImpDef = Def && Def->isImplicitDef();
6952 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6971 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6977 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6978 unsigned NumSubRegs =
RegSize / 32;
6979 Register VScalarOp = ScalarOp->getReg();
6981 if (NumSubRegs == 1) {
6982 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6984 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6987 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6989 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6995 CondReg = NewCondReg;
6997 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7005 ScalarOp->setReg(CurReg);
7006 ScalarOp->setIsKill();
7010 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7011 "Unhandled register size");
7013 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7015 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7017 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7020 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7021 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7024 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7025 .
addReg(VScalarOp, VScalarOpUndef,
7026 TRI->getSubRegFromChannel(Idx + 1));
7032 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7033 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7039 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7040 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7043 if (NumSubRegs <= 2)
7044 Cmp.addReg(VScalarOp);
7046 Cmp.addReg(VScalarOp, VScalarOpUndef,
7047 TRI->getSubRegFromChannel(Idx, 2));
7051 CondReg = NewCondReg;
7053 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7061 const auto *SScalarOpRC =
7062 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7063 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7067 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7068 unsigned Channel = 0;
7069 for (
Register Piece : ReadlanePieces) {
7070 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7074 ScalarOp->setReg(SScalarOp);
7075 ScalarOp->setIsKill();
7079 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7080 MRI.setSimpleHint(SaveExec, CondReg);
7111 if (!Begin.isValid())
7113 if (!End.isValid()) {
7119 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7127 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7128 std::numeric_limits<unsigned>::max()) !=
7131 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7137 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7146 for (
auto I = Begin;
I != AfterMI;
I++) {
7147 for (
auto &MO :
I->all_uses())
7148 MRI.clearKillFlags(MO.getReg());
7173 MBB.addSuccessor(LoopBB);
7183 for (
auto &Succ : RemainderBB->
successors()) {
7207static std::tuple<unsigned, unsigned>
7215 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7216 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7219 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7220 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7221 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7222 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7223 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7240 .
addImm(AMDGPU::sub0_sub1)
7246 return std::tuple(RsrcPtr, NewSRsrc);
7283 if (
MI.getOpcode() == AMDGPU::PHI) {
7285 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7286 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7289 MRI.getRegClass(
MI.getOperand(i).getReg());
7290 if (RI.hasVectorRegisters(OpRC)) {
7304 VRC = &AMDGPU::VReg_1RegClass;
7307 ? RI.getEquivalentAGPRClass(SRC)
7308 : RI.getEquivalentVGPRClass(SRC);
7311 ? RI.getEquivalentAGPRClass(VRC)
7312 : RI.getEquivalentVGPRClass(VRC);
7320 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7322 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7338 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7341 if (RI.hasVGPRs(DstRC)) {
7345 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7347 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7365 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7370 if (DstRC != Src0RC) {
7379 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7381 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7387 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7388 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7389 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7390 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7391 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7392 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7393 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7395 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7408 ? AMDGPU::OpName::rsrc
7409 : AMDGPU::OpName::srsrc;
7411 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7414 AMDGPU::OpName SampOpName =
7415 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7417 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7424 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7426 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7430 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7431 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7436 while (Start->getOpcode() != FrameSetupOpcode)
7439 while (End->getOpcode() != FrameDestroyOpcode)
7443 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7444 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7452 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7454 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7456 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7466 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7467 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7468 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7469 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7471 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7478 bool isSoffsetLegal =
true;
7480 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7481 if (SoffsetIdx != -1) {
7484 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7485 isSoffsetLegal =
false;
7489 bool isRsrcLegal =
true;
7491 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7492 if (RsrcIdx != -1) {
7495 isRsrcLegal =
false;
7499 if (isRsrcLegal && isSoffsetLegal)
7523 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7524 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7525 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7527 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7528 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7529 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7531 unsigned RsrcPtr, NewSRsrc;
7538 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7545 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7559 }
else if (!VAddr && ST.hasAddr64()) {
7563 "FIXME: Need to emit flat atomics here");
7565 unsigned RsrcPtr, NewSRsrc;
7568 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7591 MIB.
addImm(CPol->getImm());
7596 MIB.
addImm(TFE->getImm());
7616 MI.removeFromParent();
7621 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7623 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7627 if (!isSoffsetLegal) {
7639 if (!isSoffsetLegal) {
7651 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7652 if (RsrcIdx != -1) {
7653 DeferredList.insert(
MI);
7658 return DeferredList.contains(
MI);
7668 if (!ST.useRealTrue16Insts())
7671 unsigned Opcode =
MI.getOpcode();
7675 OpIdx >=
get(Opcode).getNumOperands() ||
7676 get(Opcode).operands()[
OpIdx].RegClass == -1)
7680 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7684 if (!RI.isVGPRClass(CurrRC))
7687 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7689 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7690 Op.setSubReg(AMDGPU::lo16);
7691 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7693 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7694 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7701 Op.setReg(NewDstReg);
7713 while (!Worklist.
empty()) {
7727 "Deferred MachineInstr are not supposed to re-populate worklist");
7747 case AMDGPU::S_ADD_I32:
7748 case AMDGPU::S_SUB_I32: {
7752 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7760 case AMDGPU::S_MUL_U64:
7761 if (ST.hasVectorMulU64()) {
7762 NewOpcode = AMDGPU::V_MUL_U64_e64;
7766 splitScalarSMulU64(Worklist, Inst, MDT);
7770 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7771 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7774 splitScalarSMulPseudo(Worklist, Inst, MDT);
7778 case AMDGPU::S_AND_B64:
7779 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7783 case AMDGPU::S_OR_B64:
7784 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7788 case AMDGPU::S_XOR_B64:
7789 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7793 case AMDGPU::S_NAND_B64:
7794 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7798 case AMDGPU::S_NOR_B64:
7799 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7803 case AMDGPU::S_XNOR_B64:
7804 if (ST.hasDLInsts())
7805 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7807 splitScalar64BitXnor(Worklist, Inst, MDT);
7811 case AMDGPU::S_ANDN2_B64:
7812 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7816 case AMDGPU::S_ORN2_B64:
7817 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7821 case AMDGPU::S_BREV_B64:
7822 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7826 case AMDGPU::S_NOT_B64:
7827 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7831 case AMDGPU::S_BCNT1_I32_B64:
7832 splitScalar64BitBCNT(Worklist, Inst);
7836 case AMDGPU::S_BFE_I64:
7837 splitScalar64BitBFE(Worklist, Inst);
7841 case AMDGPU::S_FLBIT_I32_B64:
7842 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7845 case AMDGPU::S_FF1_I32_B64:
7846 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7850 case AMDGPU::S_LSHL_B32:
7851 if (ST.hasOnlyRevVALUShifts()) {
7852 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7856 case AMDGPU::S_ASHR_I32:
7857 if (ST.hasOnlyRevVALUShifts()) {
7858 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7862 case AMDGPU::S_LSHR_B32:
7863 if (ST.hasOnlyRevVALUShifts()) {
7864 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7868 case AMDGPU::S_LSHL_B64:
7869 if (ST.hasOnlyRevVALUShifts()) {
7871 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7872 : AMDGPU::V_LSHLREV_B64_e64;
7876 case AMDGPU::S_ASHR_I64:
7877 if (ST.hasOnlyRevVALUShifts()) {
7878 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7882 case AMDGPU::S_LSHR_B64:
7883 if (ST.hasOnlyRevVALUShifts()) {
7884 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7889 case AMDGPU::S_ABS_I32:
7890 lowerScalarAbs(Worklist, Inst);
7894 case AMDGPU::S_ABSDIFF_I32:
7895 lowerScalarAbsDiff(Worklist, Inst);
7899 case AMDGPU::S_CBRANCH_SCC0:
7900 case AMDGPU::S_CBRANCH_SCC1: {
7903 bool IsSCC = CondReg == AMDGPU::SCC;
7911 case AMDGPU::S_BFE_U64:
7912 case AMDGPU::S_BFM_B64:
7915 case AMDGPU::S_PACK_LL_B32_B16:
7916 case AMDGPU::S_PACK_LH_B32_B16:
7917 case AMDGPU::S_PACK_HL_B32_B16:
7918 case AMDGPU::S_PACK_HH_B32_B16:
7919 movePackToVALU(Worklist,
MRI, Inst);
7923 case AMDGPU::S_XNOR_B32:
7924 lowerScalarXnor(Worklist, Inst);
7928 case AMDGPU::S_NAND_B32:
7929 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7933 case AMDGPU::S_NOR_B32:
7934 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7938 case AMDGPU::S_ANDN2_B32:
7939 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7943 case AMDGPU::S_ORN2_B32:
7944 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7952 case AMDGPU::S_ADD_CO_PSEUDO:
7953 case AMDGPU::S_SUB_CO_PSEUDO: {
7954 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7955 ? AMDGPU::V_ADDC_U32_e64
7956 : AMDGPU::V_SUBB_U32_e64;
7957 const auto *CarryRC = RI.getWaveMaskRegClass();
7960 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7961 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7968 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7979 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7983 case AMDGPU::S_UADDO_PSEUDO:
7984 case AMDGPU::S_USUBO_PSEUDO: {
7990 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7991 ? AMDGPU::V_ADD_CO_U32_e64
7992 : AMDGPU::V_SUB_CO_U32_e64;
7994 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7995 Register DestReg =
MRI.createVirtualRegister(NewRC);
8003 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8004 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8008 case AMDGPU::S_LSHL1_ADD_U32:
8009 case AMDGPU::S_LSHL2_ADD_U32:
8010 case AMDGPU::S_LSHL3_ADD_U32:
8011 case AMDGPU::S_LSHL4_ADD_U32: {
8015 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8016 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8017 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8021 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8022 Register DestReg =
MRI.createVirtualRegister(NewRC);
8030 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8031 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8035 case AMDGPU::S_CSELECT_B32:
8036 case AMDGPU::S_CSELECT_B64:
8037 lowerSelect(Worklist, Inst, MDT);
8040 case AMDGPU::S_CMP_EQ_I32:
8041 case AMDGPU::S_CMP_LG_I32:
8042 case AMDGPU::S_CMP_GT_I32:
8043 case AMDGPU::S_CMP_GE_I32:
8044 case AMDGPU::S_CMP_LT_I32:
8045 case AMDGPU::S_CMP_LE_I32:
8046 case AMDGPU::S_CMP_EQ_U32:
8047 case AMDGPU::S_CMP_LG_U32:
8048 case AMDGPU::S_CMP_GT_U32:
8049 case AMDGPU::S_CMP_GE_U32:
8050 case AMDGPU::S_CMP_LT_U32:
8051 case AMDGPU::S_CMP_LE_U32:
8052 case AMDGPU::S_CMP_EQ_U64:
8053 case AMDGPU::S_CMP_LG_U64:
8054 case AMDGPU::S_CMP_LT_F32:
8055 case AMDGPU::S_CMP_EQ_F32:
8056 case AMDGPU::S_CMP_LE_F32:
8057 case AMDGPU::S_CMP_GT_F32:
8058 case AMDGPU::S_CMP_LG_F32:
8059 case AMDGPU::S_CMP_GE_F32:
8060 case AMDGPU::S_CMP_O_F32:
8061 case AMDGPU::S_CMP_U_F32:
8062 case AMDGPU::S_CMP_NGE_F32:
8063 case AMDGPU::S_CMP_NLG_F32:
8064 case AMDGPU::S_CMP_NGT_F32:
8065 case AMDGPU::S_CMP_NLE_F32:
8066 case AMDGPU::S_CMP_NEQ_F32:
8067 case AMDGPU::S_CMP_NLT_F32: {
8068 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8072 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8086 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8090 case AMDGPU::S_CMP_LT_F16:
8091 case AMDGPU::S_CMP_EQ_F16:
8092 case AMDGPU::S_CMP_LE_F16:
8093 case AMDGPU::S_CMP_GT_F16:
8094 case AMDGPU::S_CMP_LG_F16:
8095 case AMDGPU::S_CMP_GE_F16:
8096 case AMDGPU::S_CMP_O_F16:
8097 case AMDGPU::S_CMP_U_F16:
8098 case AMDGPU::S_CMP_NGE_F16:
8099 case AMDGPU::S_CMP_NLG_F16:
8100 case AMDGPU::S_CMP_NGT_F16:
8101 case AMDGPU::S_CMP_NLE_F16:
8102 case AMDGPU::S_CMP_NEQ_F16:
8103 case AMDGPU::S_CMP_NLT_F16: {
8104 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8126 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8130 case AMDGPU::S_CVT_HI_F32_F16: {
8131 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8132 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8133 if (ST.useRealTrue16Insts()) {
8138 .
addReg(TmpReg, 0, AMDGPU::hi16)
8154 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8158 case AMDGPU::S_MINIMUM_F32:
8159 case AMDGPU::S_MAXIMUM_F32: {
8160 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8171 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8175 case AMDGPU::S_MINIMUM_F16:
8176 case AMDGPU::S_MAXIMUM_F16: {
8177 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8178 ? &AMDGPU::VGPR_16RegClass
8179 : &AMDGPU::VGPR_32RegClass);
8191 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8195 case AMDGPU::V_S_EXP_F16_e64:
8196 case AMDGPU::V_S_LOG_F16_e64:
8197 case AMDGPU::V_S_RCP_F16_e64:
8198 case AMDGPU::V_S_RSQ_F16_e64:
8199 case AMDGPU::V_S_SQRT_F16_e64: {
8200 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8201 ? &AMDGPU::VGPR_16RegClass
8202 : &AMDGPU::VGPR_32RegClass);
8214 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8220 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8228 if (NewOpcode == Opcode) {
8236 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8238 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8252 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8259 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8260 MRI.replaceRegWith(DstReg, NewDstReg);
8261 MRI.clearKillFlags(NewDstReg);
8264 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8281 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8285 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8286 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8287 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8289 get(AMDGPU::IMPLICIT_DEF), Undef);
8291 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8297 MRI.replaceRegWith(DstReg, NewDstReg);
8298 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8300 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8303 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8304 MRI.replaceRegWith(DstReg, NewDstReg);
8305 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8310 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8311 MRI.replaceRegWith(DstReg, NewDstReg);
8313 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8323 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8324 AMDGPU::OpName::src0_modifiers) >= 0)
8328 NewInstr->addOperand(Src);
8331 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8334 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8336 NewInstr.addImm(
Size);
8337 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8341 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8346 "Scalar BFE is only implemented for constant width and offset");
8354 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8355 AMDGPU::OpName::src1_modifiers) >= 0)
8357 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8359 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8360 AMDGPU::OpName::src2_modifiers) >= 0)
8362 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8364 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8366 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8368 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8374 NewInstr->addOperand(
Op);
8381 if (
Op.getReg() == AMDGPU::SCC) {
8383 if (
Op.isDef() && !
Op.isDead())
8384 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8386 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8391 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8392 Register DstReg = NewInstr->getOperand(0).getReg();
8397 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8398 MRI.replaceRegWith(DstReg, NewDstReg);
8407 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8411std::pair<bool, MachineBasicBlock *>
8423 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8426 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8428 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8429 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8437 MRI.replaceRegWith(OldDstReg, ResultReg);
8440 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8441 return std::pair(
true, NewBB);
8444 return std::pair(
false,
nullptr);
8461 bool IsSCC = (CondReg == AMDGPU::SCC);
8469 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8475 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8476 NewCondReg =
MRI.createVirtualRegister(TC);
8480 bool CopyFound =
false;
8481 for (MachineInstr &CandI :
8484 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8486 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8488 .
addReg(CandI.getOperand(1).getReg());
8500 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8508 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8509 MachineInstr *NewInst;
8510 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8511 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8524 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8526 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8538 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8539 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8541 unsigned SubOp = ST.hasAddNoCarry() ?
8542 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8552 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8553 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8566 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8567 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8568 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8571 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8583 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8584 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8598 if (ST.hasDLInsts()) {
8599 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8607 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8608 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8614 bool Src0IsSGPR = Src0.
isReg() &&
8615 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8616 bool Src1IsSGPR = Src1.
isReg() &&
8617 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8619 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8620 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8630 }
else if (Src1IsSGPR) {
8644 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8648 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8654 unsigned Opcode)
const {
8664 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8665 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8677 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8678 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8683 unsigned Opcode)
const {
8693 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8694 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8706 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8707 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8722 const MCInstrDesc &InstDesc =
get(Opcode);
8723 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8725 &AMDGPU::SGPR_32RegClass;
8727 const TargetRegisterClass *Src0SubRC =
8728 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8731 AMDGPU::sub0, Src0SubRC);
8733 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8734 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8735 const TargetRegisterClass *NewDestSubRC =
8736 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8738 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8739 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8742 AMDGPU::sub1, Src0SubRC);
8744 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8745 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8750 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8757 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8759 Worklist.
insert(&LoHalf);
8760 Worklist.
insert(&HiHalf);
8766 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8777 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8778 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8779 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8787 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8788 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8789 const TargetRegisterClass *Src0SubRC =
8790 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8791 if (RI.isSGPRClass(Src0SubRC))
8792 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8793 const TargetRegisterClass *Src1SubRC =
8794 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8795 if (RI.isSGPRClass(Src1SubRC))
8796 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8800 MachineOperand Op0L =
8802 MachineOperand Op1L =
8804 MachineOperand Op0H =
8806 MachineOperand Op1H =
8824 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8825 MachineInstr *Op1L_Op0H =
8830 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8831 MachineInstr *Op1H_Op0L =
8836 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8837 MachineInstr *Carry =
8842 MachineInstr *LoHalf =
8847 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8852 MachineInstr *HiHalf =
8863 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8875 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8886 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8887 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8888 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8896 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8897 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8898 const TargetRegisterClass *Src0SubRC =
8899 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8900 if (RI.isSGPRClass(Src0SubRC))
8901 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8902 const TargetRegisterClass *Src1SubRC =
8903 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8904 if (RI.isSGPRClass(Src1SubRC))
8905 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8909 MachineOperand Op0L =
8911 MachineOperand Op1L =
8915 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8916 ? AMDGPU::V_MUL_HI_U32_e64
8917 : AMDGPU::V_MUL_HI_I32_e64;
8918 MachineInstr *HiHalf =
8921 MachineInstr *LoHalf =
8932 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8940 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8956 const MCInstrDesc &InstDesc =
get(Opcode);
8957 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8959 &AMDGPU::SGPR_32RegClass;
8961 const TargetRegisterClass *Src0SubRC =
8962 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8963 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8965 &AMDGPU::SGPR_32RegClass;
8967 const TargetRegisterClass *Src1SubRC =
8968 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8971 AMDGPU::sub0, Src0SubRC);
8973 AMDGPU::sub0, Src1SubRC);
8975 AMDGPU::sub1, Src0SubRC);
8977 AMDGPU::sub1, Src1SubRC);
8979 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8980 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8981 const TargetRegisterClass *NewDestSubRC =
8982 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8984 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8985 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8989 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8990 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8994 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9001 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9003 Worklist.
insert(&LoHalf);
9004 Worklist.
insert(&HiHalf);
9007 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9023 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9025 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9027 MachineOperand* Op0;
9028 MachineOperand* Op1;
9041 Register NewDest =
MRI.createVirtualRegister(DestRC);
9047 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9063 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9064 const TargetRegisterClass *SrcRC = Src.isReg() ?
9065 MRI.getRegClass(Src.getReg()) :
9066 &AMDGPU::SGPR_32RegClass;
9068 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9069 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9071 const TargetRegisterClass *SrcSubRC =
9072 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9075 AMDGPU::sub0, SrcSubRC);
9077 AMDGPU::sub1, SrcSubRC);
9083 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9087 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9106 Offset == 0 &&
"Not implemented");
9109 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9110 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9111 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9128 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9129 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9134 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9135 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9139 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9142 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9147 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9148 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9167 const MCInstrDesc &InstDesc =
get(Opcode);
9169 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9170 unsigned OpcodeAdd =
9171 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9173 const TargetRegisterClass *SrcRC =
9174 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9175 const TargetRegisterClass *SrcSubRC =
9176 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9178 MachineOperand SrcRegSub0 =
9180 MachineOperand SrcRegSub1 =
9183 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9184 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9185 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9186 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9193 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9199 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9201 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9203 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9206void SIInstrInfo::addUsersToMoveToVALUWorklist(
9210 MachineInstr &
UseMI = *MO.getParent();
9214 switch (
UseMI.getOpcode()) {
9217 case AMDGPU::SOFT_WQM:
9218 case AMDGPU::STRICT_WWM:
9219 case AMDGPU::STRICT_WQM:
9220 case AMDGPU::REG_SEQUENCE:
9222 case AMDGPU::INSERT_SUBREG:
9225 OpNo = MO.getOperandNo();
9230 MRI.constrainRegClass(DstReg, OpRC);
9232 if (!RI.hasVectorRegisters(OpRC))
9243 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9249 if (ST.useRealTrue16Insts()) {
9252 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9259 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9265 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9266 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9268 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9270 case AMDGPU::S_PACK_LL_B32_B16:
9273 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9274 .addImm(AMDGPU::lo16)
9276 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9277 .addImm(AMDGPU::hi16);
9279 case AMDGPU::S_PACK_LH_B32_B16:
9282 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9283 .addImm(AMDGPU::lo16)
9284 .addReg(SrcReg1, 0, AMDGPU::hi16)
9285 .addImm(AMDGPU::hi16);
9287 case AMDGPU::S_PACK_HL_B32_B16:
9288 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9289 .addImm(AMDGPU::lo16)
9291 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9292 .addImm(AMDGPU::hi16);
9294 case AMDGPU::S_PACK_HH_B32_B16:
9295 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9296 .addImm(AMDGPU::lo16)
9297 .addReg(SrcReg1, 0, AMDGPU::hi16)
9298 .addImm(AMDGPU::hi16);
9305 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9306 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9311 case AMDGPU::S_PACK_LL_B32_B16: {
9312 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9313 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9330 case AMDGPU::S_PACK_LH_B32_B16: {
9331 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9340 case AMDGPU::S_PACK_HL_B32_B16: {
9341 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9351 case AMDGPU::S_PACK_HH_B32_B16: {
9352 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9353 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9370 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9371 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9380 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9381 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9382 SmallVector<MachineInstr *, 4> CopyToDelete;
9385 for (MachineInstr &
MI :
9389 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9392 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9393 Register DestReg =
MI.getOperand(0).getReg();
9395 MRI.replaceRegWith(DestReg, NewCond);
9400 MI.getOperand(SCCIdx).setReg(NewCond);
9406 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9409 for (
auto &Copy : CopyToDelete)
9410 Copy->eraseFromParent();
9418void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9424 for (MachineInstr &
MI :
9427 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9429 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9438 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9446 case AMDGPU::REG_SEQUENCE:
9447 case AMDGPU::INSERT_SUBREG:
9449 case AMDGPU::SOFT_WQM:
9450 case AMDGPU::STRICT_WWM:
9451 case AMDGPU::STRICT_WQM: {
9453 if (RI.isAGPRClass(SrcRC)) {
9454 if (RI.isAGPRClass(NewDstRC))
9459 case AMDGPU::REG_SEQUENCE:
9460 case AMDGPU::INSERT_SUBREG:
9461 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9464 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9470 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9473 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9487 int OpIndices[3])
const {
9488 const MCInstrDesc &
Desc =
MI.getDesc();
9504 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9506 for (
unsigned i = 0; i < 3; ++i) {
9507 int Idx = OpIndices[i];
9511 const MachineOperand &MO =
MI.getOperand(Idx);
9517 const TargetRegisterClass *OpRC =
9518 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9519 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9525 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9526 if (RI.isSGPRClass(RegRC))
9544 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9545 SGPRReg = UsedSGPRs[0];
9548 if (!SGPRReg && UsedSGPRs[1]) {
9549 if (UsedSGPRs[1] == UsedSGPRs[2])
9550 SGPRReg = UsedSGPRs[1];
9557 AMDGPU::OpName OperandName)
const {
9558 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9561 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9565 return &
MI.getOperand(Idx);
9579 if (ST.isAmdHsaOS()) {
9582 RsrcDataFormat |= (1ULL << 56);
9587 RsrcDataFormat |= (2ULL << 59);
9590 return RsrcDataFormat;
9600 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9605 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9612 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9618 unsigned Opc =
MI.getOpcode();
9624 return get(
Opc).mayLoad() &&
9629 int &FrameIndex)
const {
9631 if (!Addr || !Addr->
isFI())
9642 int &FrameIndex)
const {
9650 int &FrameIndex)
const {
9664 int &FrameIndex)
const {
9681 while (++
I != E &&
I->isInsideBundle()) {
9682 assert(!
I->isBundle() &&
"No nested bundle!");
9690 unsigned Opc =
MI.getOpcode();
9692 unsigned DescSize =
Desc.getSize();
9697 unsigned Size = DescSize;
9701 if (
MI.isBranch() && ST.hasOffset3fBug())
9712 bool HasLiteral =
false;
9713 unsigned LiteralSize = 4;
9714 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9719 if (ST.has64BitLiterals()) {
9720 switch (OpInfo.OperandType) {
9736 return HasLiteral ? DescSize + LiteralSize : DescSize;
9741 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9745 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9746 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9750 case TargetOpcode::BUNDLE:
9752 case TargetOpcode::INLINEASM:
9753 case TargetOpcode::INLINEASM_BR: {
9755 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9759 if (
MI.isMetaInstruction())
9763 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9766 unsigned LoInstOpcode = D16Info->LoOp;
9768 DescSize =
Desc.getSize();
9772 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9775 DescSize =
Desc.getSize();
9786 if (
MI.memoperands_empty())
9798 static const std::pair<int, const char *> TargetIndices[] = {
9836std::pair<unsigned, unsigned>
9843 static const std::pair<unsigned, const char *> TargetFlags[] = {
9861 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9876 return AMDGPU::WWM_COPY;
9878 return AMDGPU::COPY;
9890 bool IsNullOrVectorRegister =
true;
9893 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9898 return IsNullOrVectorRegister &&
9900 (Opcode == AMDGPU::IMPLICIT_DEF &&
9902 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9903 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9911 if (ST.hasAddNoCarry())
9915 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9916 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9927 if (ST.hasAddNoCarry())
9931 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9933 : RS.scavengeRegisterBackwards(
9934 *RI.getBoolRC(),
I,
false,
9947 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9948 case AMDGPU::SI_KILL_I1_TERMINATOR:
9957 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9958 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9959 case AMDGPU::SI_KILL_I1_PSEUDO:
9960 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9972 const unsigned OffsetBits =
9974 return (1 << OffsetBits) - 1;
9981 if (
MI.isInlineAsm())
9984 for (
auto &
Op :
MI.implicit_operands()) {
9985 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9986 Op.setReg(AMDGPU::VCC_LO);
9995 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9999 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10000 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10016 if (Imm > MaxImm) {
10017 if (Imm <= MaxImm + 64) {
10019 Overflow = Imm - MaxImm;
10038 if (Overflow > 0) {
10046 if (ST.hasRestrictedSOffset())
10051 SOffset = Overflow;
10089 if (!ST.hasFlatInstOffsets())
10097 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10109std::pair<int64_t, int64_t>
10112 int64_t RemainderOffset = COffsetVal;
10113 int64_t ImmField = 0;
10118 if (AllowNegative) {
10120 int64_t
D = 1LL << NumBits;
10121 RemainderOffset = (COffsetVal /
D) *
D;
10122 ImmField = COffsetVal - RemainderOffset;
10124 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10126 (ImmField % 4) != 0) {
10128 RemainderOffset += ImmField % 4;
10129 ImmField -= ImmField % 4;
10131 }
else if (COffsetVal >= 0) {
10133 RemainderOffset = COffsetVal - ImmField;
10137 assert(RemainderOffset + ImmField == COffsetVal);
10138 return {ImmField, RemainderOffset};
10142 if (ST.hasNegativeScratchOffsetBug() &&
10150 switch (ST.getGeneration()) {
10176 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10177 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10178 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10179 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10180 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10181 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10182 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10183 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10190#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10191 case OPCODE##_dpp: \
10192 case OPCODE##_e32: \
10193 case OPCODE##_e64: \
10194 case OPCODE##_e64_dpp: \
10195 case OPCODE##_sdwa:
10209 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10210 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10211 case AMDGPU::V_FMA_F16_gfx9_e64:
10212 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10213 case AMDGPU::V_INTERP_P2_F16:
10214 case AMDGPU::V_MAD_F16_e64:
10215 case AMDGPU::V_MAD_U16_e64:
10216 case AMDGPU::V_MAD_I16_e64:
10238 switch (ST.getGeneration()) {
10251 if (
isMAI(Opcode)) {
10259 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10266 if (ST.hasGFX90AInsts()) {
10268 if (ST.hasGFX940Insts())
10299 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10300 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10301 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10313 switch (
MI.getOpcode()) {
10315 case AMDGPU::REG_SEQUENCE:
10319 case AMDGPU::INSERT_SUBREG:
10320 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10337 if (!
P.Reg.isVirtual())
10341 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10342 while (
auto *
MI = DefInst) {
10344 switch (
MI->getOpcode()) {
10346 case AMDGPU::V_MOV_B32_e32: {
10347 auto &Op1 =
MI->getOperand(1);
10352 DefInst =
MRI.getVRegDef(RSR.Reg);
10360 DefInst =
MRI.getVRegDef(RSR.Reg);
10373 assert(
MRI.isSSA() &&
"Must be run on SSA");
10375 auto *
TRI =
MRI.getTargetRegisterInfo();
10376 auto *DefBB =
DefMI.getParent();
10380 if (
UseMI.getParent() != DefBB)
10383 const int MaxInstScan = 20;
10387 auto E =
UseMI.getIterator();
10388 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10389 if (
I->isDebugInstr())
10392 if (++NumInst > MaxInstScan)
10395 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10405 assert(
MRI.isSSA() &&
"Must be run on SSA");
10407 auto *
TRI =
MRI.getTargetRegisterInfo();
10408 auto *DefBB =
DefMI.getParent();
10410 const int MaxUseScan = 10;
10413 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10414 auto &UseInst = *
Use.getParent();
10417 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10420 if (++NumUse > MaxUseScan)
10427 const int MaxInstScan = 20;
10431 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10434 if (
I->isDebugInstr())
10437 if (++NumInst > MaxInstScan)
10450 if (Reg == VReg && --NumUse == 0)
10452 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10461 auto Cur =
MBB.begin();
10462 if (Cur !=
MBB.end())
10464 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10467 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10476 if (InsPt !=
MBB.end() &&
10477 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10478 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10479 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10480 InsPt->definesRegister(Src,
nullptr)) {
10484 .
addReg(Src, 0, SrcSubReg)
10509 if (isFullCopyInstr(
MI)) {
10510 Register DstReg =
MI.getOperand(0).getReg();
10511 Register SrcReg =
MI.getOperand(1).getReg();
10518 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10522 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10533 unsigned *PredCost)
const {
10534 if (
MI.isBundle()) {
10537 unsigned Lat = 0,
Count = 0;
10538 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10540 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10542 return Lat +
Count - 1;
10545 return SchedModel.computeInstrLatency(&
MI);
10551 unsigned Opcode =
MI.getOpcode();
10556 :
MI.getOperand(1).getReg();
10557 LLT DstTy =
MRI.getType(Dst);
10558 LLT SrcTy =
MRI.getType(Src);
10560 unsigned SrcAS = SrcTy.getAddressSpace();
10563 ST.hasGloballyAddressableScratch()
10571 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10572 return HandleAddrSpaceCast(
MI);
10575 auto IID = GI->getIntrinsicID();
10582 case Intrinsic::amdgcn_addrspacecast_nonnull:
10583 return HandleAddrSpaceCast(
MI);
10584 case Intrinsic::amdgcn_if:
10585 case Intrinsic::amdgcn_else:
10599 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10600 Opcode == AMDGPU::G_SEXTLOAD) {
10601 if (
MI.memoperands_empty())
10605 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10606 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10614 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10615 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10616 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10629 unsigned opcode =
MI.getOpcode();
10630 if (opcode == AMDGPU::V_READLANE_B32 ||
10631 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10632 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10635 if (isCopyInstr(
MI)) {
10639 RI.getPhysRegBaseClass(srcOp.
getReg());
10647 if (
MI.isPreISelOpcode())
10662 if (
MI.memoperands_empty())
10666 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10667 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10682 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10684 if (!
SrcOp.isReg())
10688 if (!Reg || !
SrcOp.readsReg())
10694 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10721 F,
"ds_ordered_count unsupported for this calling conv"));
10735 Register &SrcReg2, int64_t &CmpMask,
10736 int64_t &CmpValue)
const {
10737 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10740 switch (
MI.getOpcode()) {
10743 case AMDGPU::S_CMP_EQ_U32:
10744 case AMDGPU::S_CMP_EQ_I32:
10745 case AMDGPU::S_CMP_LG_U32:
10746 case AMDGPU::S_CMP_LG_I32:
10747 case AMDGPU::S_CMP_LT_U32:
10748 case AMDGPU::S_CMP_LT_I32:
10749 case AMDGPU::S_CMP_GT_U32:
10750 case AMDGPU::S_CMP_GT_I32:
10751 case AMDGPU::S_CMP_LE_U32:
10752 case AMDGPU::S_CMP_LE_I32:
10753 case AMDGPU::S_CMP_GE_U32:
10754 case AMDGPU::S_CMP_GE_I32:
10755 case AMDGPU::S_CMP_EQ_U64:
10756 case AMDGPU::S_CMP_LG_U64:
10757 SrcReg =
MI.getOperand(0).getReg();
10758 if (
MI.getOperand(1).isReg()) {
10759 if (
MI.getOperand(1).getSubReg())
10761 SrcReg2 =
MI.getOperand(1).getReg();
10763 }
else if (
MI.getOperand(1).isImm()) {
10765 CmpValue =
MI.getOperand(1).getImm();
10771 case AMDGPU::S_CMPK_EQ_U32:
10772 case AMDGPU::S_CMPK_EQ_I32:
10773 case AMDGPU::S_CMPK_LG_U32:
10774 case AMDGPU::S_CMPK_LG_I32:
10775 case AMDGPU::S_CMPK_LT_U32:
10776 case AMDGPU::S_CMPK_LT_I32:
10777 case AMDGPU::S_CMPK_GT_U32:
10778 case AMDGPU::S_CMPK_GT_I32:
10779 case AMDGPU::S_CMPK_LE_U32:
10780 case AMDGPU::S_CMPK_LE_I32:
10781 case AMDGPU::S_CMPK_GE_U32:
10782 case AMDGPU::S_CMPK_GE_I32:
10783 SrcReg =
MI.getOperand(0).getReg();
10785 CmpValue =
MI.getOperand(1).getImm();
10804 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10806 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10811 SccDef->setIsDead(
false);
10819 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10820 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10822 bool Op1IsNonZeroImm =
10823 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10824 bool Op2IsZeroImm =
10825 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10826 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10832 Register SrcReg2, int64_t CmpMask,
10841 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10873 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10874 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10880 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
10895 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10896 this](int64_t ExpectedValue,
unsigned SrcSize,
10897 bool IsReversible,
bool IsSigned) ->
bool {
10925 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10926 Def->getOpcode() != AMDGPU::S_AND_B64)
10930 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10941 SrcOp = &Def->getOperand(2);
10942 else if (isMask(&Def->getOperand(2)))
10943 SrcOp = &Def->getOperand(1);
10951 if (IsSigned && BitNo == SrcSize - 1)
10954 ExpectedValue <<= BitNo;
10956 bool IsReversedCC =
false;
10957 if (CmpValue != ExpectedValue) {
10960 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10965 Register DefReg = Def->getOperand(0).getReg();
10966 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10972 if (!
MRI->use_nodbg_empty(DefReg)) {
10980 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10981 : AMDGPU::S_BITCMP1_B32
10982 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10983 : AMDGPU::S_BITCMP1_B64;
10988 Def->eraseFromParent();
10996 case AMDGPU::S_CMP_EQ_U32:
10997 case AMDGPU::S_CMP_EQ_I32:
10998 case AMDGPU::S_CMPK_EQ_U32:
10999 case AMDGPU::S_CMPK_EQ_I32:
11000 return optimizeCmpAnd(1, 32,
true,
false);
11001 case AMDGPU::S_CMP_GE_U32:
11002 case AMDGPU::S_CMPK_GE_U32:
11003 return optimizeCmpAnd(1, 32,
false,
false);
11004 case AMDGPU::S_CMP_GE_I32:
11005 case AMDGPU::S_CMPK_GE_I32:
11006 return optimizeCmpAnd(1, 32,
false,
true);
11007 case AMDGPU::S_CMP_EQ_U64:
11008 return optimizeCmpAnd(1, 64,
true,
false);
11009 case AMDGPU::S_CMP_LG_U32:
11010 case AMDGPU::S_CMP_LG_I32:
11011 case AMDGPU::S_CMPK_LG_U32:
11012 case AMDGPU::S_CMPK_LG_I32:
11013 return optimizeCmpAnd(0, 32,
true,
false) || optimizeCmpSelect();
11014 case AMDGPU::S_CMP_GT_U32:
11015 case AMDGPU::S_CMPK_GT_U32:
11016 return optimizeCmpAnd(0, 32,
false,
false);
11017 case AMDGPU::S_CMP_GT_I32:
11018 case AMDGPU::S_CMPK_GT_I32:
11019 return optimizeCmpAnd(0, 32,
false,
true);
11020 case AMDGPU::S_CMP_LG_U64:
11021 return optimizeCmpAnd(0, 64,
true,
false) || optimizeCmpSelect();
11028 AMDGPU::OpName
OpName)
const {
11029 if (!ST.needsAlignedVGPRs())
11032 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11044 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11046 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11049 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11050 : &AMDGPU::VReg_64_Align2RegClass);
11052 .
addReg(DataReg, 0,
Op.getSubReg())
11057 Op.setSubReg(AMDGPU::sub0);
11079 unsigned Opcode =
MI.getOpcode();
11085 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11086 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11089 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.