33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO: {
1333 return MI.getOperand(0).getReg() == Reg;
1338 case AMDGPU::S_BREV_B32:
1339 case AMDGPU::V_BFREV_B32_e32:
1340 case AMDGPU::V_BFREV_B32_e64: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_NOT_B32:
1350 case AMDGPU::V_NOT_B32_e32:
1351 case AMDGPU::V_NOT_B32_e64: {
1354 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1355 return MI.getOperand(0).getReg() == Reg;
1367 if (RI.isAGPRClass(DstRC))
1368 return AMDGPU::COPY;
1369 if (RI.getRegSizeInBits(*DstRC) == 16) {
1372 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1374 if (RI.getRegSizeInBits(*DstRC) == 32)
1375 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1376 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1377 return AMDGPU::S_MOV_B64;
1378 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1379 return AMDGPU::V_MOV_B64_PSEUDO;
1380 return AMDGPU::COPY;
1385 bool IsIndirectSrc)
const {
1386 if (IsIndirectSrc) {
1388 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1390 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1392 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1394 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1396 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1409 if (VecSize <= 1024)
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1416 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1418 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1420 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1422 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1424 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1426 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1428 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1437 if (VecSize <= 1024)
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1445 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1447 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1449 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1451 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1453 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1455 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1457 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1466 if (VecSize <= 1024)
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1474 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1476 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1478 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1480 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1482 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1484 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1486 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1495 if (VecSize <= 1024)
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1510 if (VecSize <= 1024)
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1518 bool IsSGPR)
const {
1530 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1537 return AMDGPU::SI_SPILL_S32_SAVE;
1539 return AMDGPU::SI_SPILL_S64_SAVE;
1541 return AMDGPU::SI_SPILL_S96_SAVE;
1543 return AMDGPU::SI_SPILL_S128_SAVE;
1545 return AMDGPU::SI_SPILL_S160_SAVE;
1547 return AMDGPU::SI_SPILL_S192_SAVE;
1549 return AMDGPU::SI_SPILL_S224_SAVE;
1551 return AMDGPU::SI_SPILL_S256_SAVE;
1553 return AMDGPU::SI_SPILL_S288_SAVE;
1555 return AMDGPU::SI_SPILL_S320_SAVE;
1557 return AMDGPU::SI_SPILL_S352_SAVE;
1559 return AMDGPU::SI_SPILL_S384_SAVE;
1561 return AMDGPU::SI_SPILL_S512_SAVE;
1563 return AMDGPU::SI_SPILL_S1024_SAVE;
1572 return AMDGPU::SI_SPILL_V16_SAVE;
1574 return AMDGPU::SI_SPILL_V32_SAVE;
1576 return AMDGPU::SI_SPILL_V64_SAVE;
1578 return AMDGPU::SI_SPILL_V96_SAVE;
1580 return AMDGPU::SI_SPILL_V128_SAVE;
1582 return AMDGPU::SI_SPILL_V160_SAVE;
1584 return AMDGPU::SI_SPILL_V192_SAVE;
1586 return AMDGPU::SI_SPILL_V224_SAVE;
1588 return AMDGPU::SI_SPILL_V256_SAVE;
1590 return AMDGPU::SI_SPILL_V288_SAVE;
1592 return AMDGPU::SI_SPILL_V320_SAVE;
1594 return AMDGPU::SI_SPILL_V352_SAVE;
1596 return AMDGPU::SI_SPILL_V384_SAVE;
1598 return AMDGPU::SI_SPILL_V512_SAVE;
1600 return AMDGPU::SI_SPILL_V1024_SAVE;
1609 return AMDGPU::SI_SPILL_AV32_SAVE;
1611 return AMDGPU::SI_SPILL_AV64_SAVE;
1613 return AMDGPU::SI_SPILL_AV96_SAVE;
1615 return AMDGPU::SI_SPILL_AV128_SAVE;
1617 return AMDGPU::SI_SPILL_AV160_SAVE;
1619 return AMDGPU::SI_SPILL_AV192_SAVE;
1621 return AMDGPU::SI_SPILL_AV224_SAVE;
1623 return AMDGPU::SI_SPILL_AV256_SAVE;
1625 return AMDGPU::SI_SPILL_AV288_SAVE;
1627 return AMDGPU::SI_SPILL_AV320_SAVE;
1629 return AMDGPU::SI_SPILL_AV352_SAVE;
1631 return AMDGPU::SI_SPILL_AV384_SAVE;
1633 return AMDGPU::SI_SPILL_AV512_SAVE;
1635 return AMDGPU::SI_SPILL_AV1024_SAVE;
1642 bool IsVectorSuperClass) {
1647 if (IsVectorSuperClass)
1648 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1650 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1656 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1663 if (ST.hasMAIInsts())
1682 FrameInfo.getObjectAlign(FrameIndex));
1683 unsigned SpillSize = RI.getSpillSize(*RC);
1686 if (RI.isSGPRClass(RC)) {
1688 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1689 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1690 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1698 if (SrcReg.
isVirtual() && SpillSize == 4) {
1699 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1708 if (RI.spillSGPRToVGPR())
1728 return AMDGPU::SI_SPILL_S32_RESTORE;
1730 return AMDGPU::SI_SPILL_S64_RESTORE;
1732 return AMDGPU::SI_SPILL_S96_RESTORE;
1734 return AMDGPU::SI_SPILL_S128_RESTORE;
1736 return AMDGPU::SI_SPILL_S160_RESTORE;
1738 return AMDGPU::SI_SPILL_S192_RESTORE;
1740 return AMDGPU::SI_SPILL_S224_RESTORE;
1742 return AMDGPU::SI_SPILL_S256_RESTORE;
1744 return AMDGPU::SI_SPILL_S288_RESTORE;
1746 return AMDGPU::SI_SPILL_S320_RESTORE;
1748 return AMDGPU::SI_SPILL_S352_RESTORE;
1750 return AMDGPU::SI_SPILL_S384_RESTORE;
1752 return AMDGPU::SI_SPILL_S512_RESTORE;
1754 return AMDGPU::SI_SPILL_S1024_RESTORE;
1763 return AMDGPU::SI_SPILL_V16_RESTORE;
1765 return AMDGPU::SI_SPILL_V32_RESTORE;
1767 return AMDGPU::SI_SPILL_V64_RESTORE;
1769 return AMDGPU::SI_SPILL_V96_RESTORE;
1771 return AMDGPU::SI_SPILL_V128_RESTORE;
1773 return AMDGPU::SI_SPILL_V160_RESTORE;
1775 return AMDGPU::SI_SPILL_V192_RESTORE;
1777 return AMDGPU::SI_SPILL_V224_RESTORE;
1779 return AMDGPU::SI_SPILL_V256_RESTORE;
1781 return AMDGPU::SI_SPILL_V288_RESTORE;
1783 return AMDGPU::SI_SPILL_V320_RESTORE;
1785 return AMDGPU::SI_SPILL_V352_RESTORE;
1787 return AMDGPU::SI_SPILL_V384_RESTORE;
1789 return AMDGPU::SI_SPILL_V512_RESTORE;
1791 return AMDGPU::SI_SPILL_V1024_RESTORE;
1800 return AMDGPU::SI_SPILL_AV32_RESTORE;
1802 return AMDGPU::SI_SPILL_AV64_RESTORE;
1804 return AMDGPU::SI_SPILL_AV96_RESTORE;
1806 return AMDGPU::SI_SPILL_AV128_RESTORE;
1808 return AMDGPU::SI_SPILL_AV160_RESTORE;
1810 return AMDGPU::SI_SPILL_AV192_RESTORE;
1812 return AMDGPU::SI_SPILL_AV224_RESTORE;
1814 return AMDGPU::SI_SPILL_AV256_RESTORE;
1816 return AMDGPU::SI_SPILL_AV288_RESTORE;
1818 return AMDGPU::SI_SPILL_AV320_RESTORE;
1820 return AMDGPU::SI_SPILL_AV352_RESTORE;
1822 return AMDGPU::SI_SPILL_AV384_RESTORE;
1824 return AMDGPU::SI_SPILL_AV512_RESTORE;
1826 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1833 bool IsVectorSuperClass) {
1838 if (IsVectorSuperClass)
1839 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1841 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1847 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1854 if (ST.hasMAIInsts())
1857 assert(!RI.isAGPRClass(RC));
1871 unsigned SpillSize = RI.getSpillSize(*RC);
1878 FrameInfo.getObjectAlign(FrameIndex));
1880 if (RI.isSGPRClass(RC)) {
1882 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1883 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1884 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1889 if (DestReg.
isVirtual() && SpillSize == 4) {
1891 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1894 if (RI.spillSGPRToVGPR())
1920 unsigned Quantity)
const {
1922 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1923 while (Quantity > 0) {
1924 unsigned Arg = std::min(Quantity, MaxSNopCount);
1931 auto *MF =
MBB.getParent();
1934 assert(Info->isEntryFunction());
1936 if (
MBB.succ_empty()) {
1937 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1938 if (HasNoTerminator) {
1939 if (Info->returnsVoid()) {
1953 constexpr unsigned DoorbellIDMask = 0x3ff;
1954 constexpr unsigned ECQueueWaveAbort = 0x400;
1960 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1961 ContBB =
MBB.splitAt(
MI,
false);
1965 MBB.addSuccessor(TrapBB);
1972 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1976 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1979 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1980 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1984 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1985 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1986 .
addUse(DoorbellRegMasked)
1987 .
addImm(ECQueueWaveAbort);
1988 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1989 .
addUse(SetWaveAbortBit);
1992 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2007 switch (
MI.getOpcode()) {
2009 if (
MI.isMetaInstruction())
2014 return MI.getOperand(0).getImm() + 1;
2024 switch (
MI.getOpcode()) {
2026 case AMDGPU::S_MOV_B64_term:
2029 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2032 case AMDGPU::S_MOV_B32_term:
2035 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2038 case AMDGPU::S_XOR_B64_term:
2041 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2044 case AMDGPU::S_XOR_B32_term:
2047 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2049 case AMDGPU::S_OR_B64_term:
2052 MI.setDesc(
get(AMDGPU::S_OR_B64));
2054 case AMDGPU::S_OR_B32_term:
2057 MI.setDesc(
get(AMDGPU::S_OR_B32));
2060 case AMDGPU::S_ANDN2_B64_term:
2063 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2066 case AMDGPU::S_ANDN2_B32_term:
2069 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2072 case AMDGPU::S_AND_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_AND_B64));
2078 case AMDGPU::S_AND_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_AND_B32));
2084 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2090 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2096 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2097 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2100 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2101 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2103 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2107 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2110 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2113 int64_t Imm =
MI.getOperand(1).getImm();
2115 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2116 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2123 MI.eraseFromParent();
2129 case AMDGPU::V_MOV_B64_PSEUDO: {
2131 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2132 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2137 if (ST.hasMovB64()) {
2138 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2143 if (
SrcOp.isImm()) {
2145 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2146 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2168 if (ST.hasPkMovB32() &&
2189 MI.eraseFromParent();
2192 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2196 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2200 if (ST.has64BitLiterals()) {
2201 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2207 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2212 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2213 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2215 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2216 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2223 MI.eraseFromParent();
2226 case AMDGPU::V_SET_INACTIVE_B32: {
2230 .
add(
MI.getOperand(3))
2231 .
add(
MI.getOperand(4))
2232 .
add(
MI.getOperand(1))
2233 .
add(
MI.getOperand(2))
2234 .
add(
MI.getOperand(5));
2235 MI.eraseFromParent();
2238 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2239 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2240 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2241 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2242 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2243 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2244 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2245 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2246 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2247 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2248 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2249 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2250 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2251 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2252 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2253 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2254 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2255 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2256 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2257 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2258 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2259 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2260 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2261 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2262 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2263 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2270 if (RI.hasVGPRs(EltRC)) {
2271 Opc = AMDGPU::V_MOVRELD_B32_e32;
2273 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2274 : AMDGPU::S_MOVRELD_B32;
2279 bool IsUndef =
MI.getOperand(1).isUndef();
2280 unsigned SubReg =
MI.getOperand(3).getImm();
2281 assert(VecReg ==
MI.getOperand(1).getReg());
2286 .
add(
MI.getOperand(2))
2290 const int ImpDefIdx =
2292 const int ImpUseIdx = ImpDefIdx + 1;
2294 MI.eraseFromParent();
2297 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2309 assert(ST.useVGPRIndexMode());
2311 bool IsUndef =
MI.getOperand(1).isUndef();
2320 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2324 .
add(
MI.getOperand(2))
2329 const int ImpDefIdx =
2331 const int ImpUseIdx = ImpDefIdx + 1;
2338 MI.eraseFromParent();
2341 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2342 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2343 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2344 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2345 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2346 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2347 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2348 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2349 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2350 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2351 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2352 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2353 assert(ST.useVGPRIndexMode());
2356 bool IsUndef =
MI.getOperand(1).isUndef();
2374 MI.eraseFromParent();
2377 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2380 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2381 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2400 if (ST.hasGetPCZeroExtension()) {
2404 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2411 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2421 MI.eraseFromParent();
2424 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2434 Op.setOffset(
Op.getOffset() + 4);
2436 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2440 MI.eraseFromParent();
2443 case AMDGPU::ENTER_STRICT_WWM: {
2449 case AMDGPU::ENTER_STRICT_WQM: {
2456 MI.eraseFromParent();
2459 case AMDGPU::EXIT_STRICT_WWM:
2460 case AMDGPU::EXIT_STRICT_WQM: {
2466 case AMDGPU::SI_RETURN: {
2480 MI.eraseFromParent();
2484 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2485 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2486 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2489 case AMDGPU::S_GETPC_B64_pseudo:
2490 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2491 if (ST.hasGetPCZeroExtension()) {
2493 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2502 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2503 assert(ST.hasBF16PackedInsts());
2504 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2528 case AMDGPU::S_LOAD_DWORDX16_IMM:
2529 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2542 for (
auto &CandMO :
I->operands()) {
2543 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2551 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2555 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2559 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2561 unsigned NewOpcode = -1;
2562 if (SubregSize == 256)
2563 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2564 else if (SubregSize == 128)
2565 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2572 MRI.setRegClass(DestReg, NewRC);
2575 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2580 MI->getOperand(0).setReg(DestReg);
2581 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2585 OffsetMO->
setImm(FinalOffset);
2591 MI->setMemRefs(*MF, NewMMOs);
2604std::pair<MachineInstr*, MachineInstr*>
2606 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2608 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2611 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2612 return std::pair(&
MI,
nullptr);
2623 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2625 if (Dst.isPhysical()) {
2626 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2629 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2633 for (
unsigned I = 1;
I <= 2; ++
I) {
2636 if (
SrcOp.isImm()) {
2638 Imm.ashrInPlace(Part * 32);
2639 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2643 if (Src.isPhysical())
2644 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2651 MovDPP.addImm(MO.getImm());
2653 Split[Part] = MovDPP;
2657 if (Dst.isVirtual())
2664 MI.eraseFromParent();
2665 return std::pair(Split[0], Split[1]);
2668std::optional<DestSourcePair>
2670 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2673 return std::nullopt;
2677 AMDGPU::OpName Src0OpName,
2679 AMDGPU::OpName Src1OpName)
const {
2686 "All commutable instructions have both src0 and src1 modifiers");
2688 int Src0ModsVal = Src0Mods->
getImm();
2689 int Src1ModsVal = Src1Mods->
getImm();
2691 Src1Mods->
setImm(Src0ModsVal);
2692 Src0Mods->
setImm(Src1ModsVal);
2701 bool IsKill = RegOp.
isKill();
2703 bool IsUndef = RegOp.
isUndef();
2704 bool IsDebug = RegOp.
isDebug();
2706 if (NonRegOp.
isImm())
2708 else if (NonRegOp.
isFI())
2729 int64_t NonRegVal = NonRegOp1.
getImm();
2732 NonRegOp2.
setImm(NonRegVal);
2739 unsigned OpIdx1)
const {
2744 unsigned Opc =
MI.getOpcode();
2745 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2755 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2758 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2763 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2769 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2784 unsigned Src1Idx)
const {
2785 assert(!NewMI &&
"this should never be used");
2787 unsigned Opc =
MI.getOpcode();
2789 if (CommutedOpcode == -1)
2792 if (Src0Idx > Src1Idx)
2795 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2796 static_cast<int>(Src0Idx) &&
2797 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2798 static_cast<int>(Src1Idx) &&
2799 "inconsistency with findCommutedOpIndices");
2824 Src1, AMDGPU::OpName::src1_modifiers);
2827 AMDGPU::OpName::src1_sel);
2839 unsigned &SrcOpIdx0,
2840 unsigned &SrcOpIdx1)
const {
2845 unsigned &SrcOpIdx0,
2846 unsigned &SrcOpIdx1)
const {
2847 if (!
Desc.isCommutable())
2850 unsigned Opc =
Desc.getOpcode();
2851 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2855 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2859 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2863 int64_t BrOffset)
const {
2880 return MI.getOperand(0).getMBB();
2885 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2886 MI.getOpcode() == AMDGPU::SI_LOOP)
2898 "new block should be inserted for expanding unconditional branch");
2901 "restore block should be inserted for restoring clobbered registers");
2909 if (ST.hasAddPC64Inst()) {
2911 MCCtx.createTempSymbol(
"offset",
true);
2915 MCCtx.createTempSymbol(
"post_addpc",
true);
2916 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2920 Offset->setVariableValue(OffsetExpr);
2924 assert(RS &&
"RegScavenger required for long branching");
2928 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2932 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2933 ST.hasVALUReadSGPRHazard();
2934 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2935 if (FlushSGPRWrites)
2943 ApplyHazardWorkarounds();
2946 MCCtx.createTempSymbol(
"post_getpc",
true);
2950 MCCtx.createTempSymbol(
"offset_lo",
true);
2952 MCCtx.createTempSymbol(
"offset_hi",
true);
2955 .
addReg(PCReg, 0, AMDGPU::sub0)
2959 .
addReg(PCReg, 0, AMDGPU::sub1)
2961 ApplyHazardWorkarounds();
3002 if (LongBranchReservedReg) {
3003 RS->enterBasicBlock(
MBB);
3004 Scav = LongBranchReservedReg;
3006 RS->enterBasicBlockEnd(
MBB);
3007 Scav = RS->scavengeRegisterBackwards(
3012 RS->setRegUsed(Scav);
3013 MRI.replaceRegWith(PCReg, Scav);
3014 MRI.clearVirtRegs();
3020 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3021 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3022 MRI.clearVirtRegs();
3037unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3039 case SIInstrInfo::SCC_TRUE:
3040 return AMDGPU::S_CBRANCH_SCC1;
3041 case SIInstrInfo::SCC_FALSE:
3042 return AMDGPU::S_CBRANCH_SCC0;
3043 case SIInstrInfo::VCCNZ:
3044 return AMDGPU::S_CBRANCH_VCCNZ;
3045 case SIInstrInfo::VCCZ:
3046 return AMDGPU::S_CBRANCH_VCCZ;
3047 case SIInstrInfo::EXECNZ:
3048 return AMDGPU::S_CBRANCH_EXECNZ;
3049 case SIInstrInfo::EXECZ:
3050 return AMDGPU::S_CBRANCH_EXECZ;
3056SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3058 case AMDGPU::S_CBRANCH_SCC0:
3060 case AMDGPU::S_CBRANCH_SCC1:
3062 case AMDGPU::S_CBRANCH_VCCNZ:
3064 case AMDGPU::S_CBRANCH_VCCZ:
3066 case AMDGPU::S_CBRANCH_EXECNZ:
3068 case AMDGPU::S_CBRANCH_EXECZ:
3080 bool AllowModify)
const {
3081 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3083 TBB =
I->getOperand(0).getMBB();
3087 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3088 if (Pred == INVALID_BR)
3093 Cond.push_back(
I->getOperand(1));
3097 if (
I ==
MBB.end()) {
3103 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3105 FBB =
I->getOperand(0).getMBB();
3115 bool AllowModify)
const {
3123 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3124 switch (
I->getOpcode()) {
3125 case AMDGPU::S_MOV_B64_term:
3126 case AMDGPU::S_XOR_B64_term:
3127 case AMDGPU::S_OR_B64_term:
3128 case AMDGPU::S_ANDN2_B64_term:
3129 case AMDGPU::S_AND_B64_term:
3130 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3131 case AMDGPU::S_MOV_B32_term:
3132 case AMDGPU::S_XOR_B32_term:
3133 case AMDGPU::S_OR_B32_term:
3134 case AMDGPU::S_ANDN2_B32_term:
3135 case AMDGPU::S_AND_B32_term:
3136 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3139 case AMDGPU::SI_ELSE:
3140 case AMDGPU::SI_KILL_I1_TERMINATOR:
3141 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3158 int *BytesRemoved)
const {
3160 unsigned RemovedSize = 0;
3163 if (
MI.isBranch() ||
MI.isReturn()) {
3165 MI.eraseFromParent();
3171 *BytesRemoved = RemovedSize;
3188 int *BytesAdded)
const {
3189 if (!FBB &&
Cond.empty()) {
3193 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3200 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3212 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3230 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3237 if (
Cond.size() != 2) {
3241 if (
Cond[0].isImm()) {
3252 Register FalseReg,
int &CondCycles,
3253 int &TrueCycles,
int &FalseCycles)
const {
3259 if (
MRI.getRegClass(FalseReg) != RC)
3263 CondCycles = TrueCycles = FalseCycles = NumInsts;
3266 return RI.hasVGPRs(RC) && NumInsts <= 6;
3274 if (
MRI.getRegClass(FalseReg) != RC)
3280 if (NumInsts % 2 == 0)
3283 CondCycles = TrueCycles = FalseCycles = NumInsts;
3284 return RI.isSGPRClass(RC);
3295 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3296 if (Pred == VCCZ || Pred == SCC_FALSE) {
3297 Pred =
static_cast<BranchPredicate
>(-Pred);
3303 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3305 if (DstSize == 32) {
3307 if (Pred == SCC_TRUE) {
3322 if (DstSize == 64 && Pred == SCC_TRUE) {
3332 static const int16_t Sub0_15[] = {
3333 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3334 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3335 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3336 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3339 static const int16_t Sub0_15_64[] = {
3340 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3341 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3342 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3343 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3346 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3348 const int16_t *SubIndices = Sub0_15;
3349 int NElts = DstSize / 32;
3353 if (Pred == SCC_TRUE) {
3355 SelOp = AMDGPU::S_CSELECT_B32;
3356 EltRC = &AMDGPU::SGPR_32RegClass;
3358 SelOp = AMDGPU::S_CSELECT_B64;
3359 EltRC = &AMDGPU::SGPR_64RegClass;
3360 SubIndices = Sub0_15_64;
3366 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3371 for (
int Idx = 0; Idx != NElts; ++Idx) {
3372 Register DstElt =
MRI.createVirtualRegister(EltRC);
3375 unsigned SubIdx = SubIndices[Idx];
3378 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3381 .
addReg(FalseReg, 0, SubIdx)
3382 .
addReg(TrueReg, 0, SubIdx);
3386 .
addReg(TrueReg, 0, SubIdx)
3387 .
addReg(FalseReg, 0, SubIdx);
3399 switch (
MI.getOpcode()) {
3400 case AMDGPU::V_MOV_B16_t16_e32:
3401 case AMDGPU::V_MOV_B16_t16_e64:
3402 case AMDGPU::V_MOV_B32_e32:
3403 case AMDGPU::V_MOV_B32_e64:
3404 case AMDGPU::V_MOV_B64_PSEUDO:
3405 case AMDGPU::V_MOV_B64_e32:
3406 case AMDGPU::V_MOV_B64_e64:
3407 case AMDGPU::S_MOV_B32:
3408 case AMDGPU::S_MOV_B64:
3409 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3411 case AMDGPU::WWM_COPY:
3412 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3413 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3414 case AMDGPU::V_ACCVGPR_MOV_B32:
3415 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3416 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3424 switch (
MI.getOpcode()) {
3425 case AMDGPU::V_MOV_B16_t16_e32:
3426 case AMDGPU::V_MOV_B16_t16_e64:
3428 case AMDGPU::V_MOV_B32_e32:
3429 case AMDGPU::V_MOV_B32_e64:
3430 case AMDGPU::V_MOV_B64_PSEUDO:
3431 case AMDGPU::V_MOV_B64_e32:
3432 case AMDGPU::V_MOV_B64_e64:
3433 case AMDGPU::S_MOV_B32:
3434 case AMDGPU::S_MOV_B64:
3435 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3437 case AMDGPU::WWM_COPY:
3438 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3439 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3440 case AMDGPU::V_ACCVGPR_MOV_B32:
3441 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3442 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3450 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3451 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3452 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3455 unsigned Opc =
MI.getOpcode();
3457 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3459 MI.removeOperand(Idx);
3464 unsigned SubRegIndex) {
3465 switch (SubRegIndex) {
3466 case AMDGPU::NoSubRegister:
3476 case AMDGPU::sub1_lo16:
3478 case AMDGPU::sub1_hi16:
3481 return std::nullopt;
3489 case AMDGPU::V_MAC_F16_e32:
3490 case AMDGPU::V_MAC_F16_e64:
3491 case AMDGPU::V_MAD_F16_e64:
3492 return AMDGPU::V_MADAK_F16;
3493 case AMDGPU::V_MAC_F32_e32:
3494 case AMDGPU::V_MAC_F32_e64:
3495 case AMDGPU::V_MAD_F32_e64:
3496 return AMDGPU::V_MADAK_F32;
3497 case AMDGPU::V_FMAC_F32_e32:
3498 case AMDGPU::V_FMAC_F32_e64:
3499 case AMDGPU::V_FMA_F32_e64:
3500 return AMDGPU::V_FMAAK_F32;
3501 case AMDGPU::V_FMAC_F16_e32:
3502 case AMDGPU::V_FMAC_F16_e64:
3503 case AMDGPU::V_FMAC_F16_t16_e64:
3504 case AMDGPU::V_FMAC_F16_fake16_e64:
3505 case AMDGPU::V_FMA_F16_e64:
3506 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3507 ? AMDGPU::V_FMAAK_F16_t16
3508 : AMDGPU::V_FMAAK_F16_fake16
3509 : AMDGPU::V_FMAAK_F16;
3510 case AMDGPU::V_FMAC_F64_e32:
3511 case AMDGPU::V_FMAC_F64_e64:
3512 case AMDGPU::V_FMA_F64_e64:
3513 return AMDGPU::V_FMAAK_F64;
3521 case AMDGPU::V_MAC_F16_e32:
3522 case AMDGPU::V_MAC_F16_e64:
3523 case AMDGPU::V_MAD_F16_e64:
3524 return AMDGPU::V_MADMK_F16;
3525 case AMDGPU::V_MAC_F32_e32:
3526 case AMDGPU::V_MAC_F32_e64:
3527 case AMDGPU::V_MAD_F32_e64:
3528 return AMDGPU::V_MADMK_F32;
3529 case AMDGPU::V_FMAC_F32_e32:
3530 case AMDGPU::V_FMAC_F32_e64:
3531 case AMDGPU::V_FMA_F32_e64:
3532 return AMDGPU::V_FMAMK_F32;
3533 case AMDGPU::V_FMAC_F16_e32:
3534 case AMDGPU::V_FMAC_F16_e64:
3535 case AMDGPU::V_FMAC_F16_t16_e64:
3536 case AMDGPU::V_FMAC_F16_fake16_e64:
3537 case AMDGPU::V_FMA_F16_e64:
3538 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3539 ? AMDGPU::V_FMAMK_F16_t16
3540 : AMDGPU::V_FMAMK_F16_fake16
3541 : AMDGPU::V_FMAMK_F16;
3542 case AMDGPU::V_FMAC_F64_e32:
3543 case AMDGPU::V_FMAC_F64_e64:
3544 case AMDGPU::V_FMA_F64_e64:
3545 return AMDGPU::V_FMAMK_F64;
3557 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3559 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3562 if (
Opc == AMDGPU::COPY) {
3563 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3570 if (HasMultipleUses) {
3573 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3576 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3584 if (ImmDefSize == 32 &&
3589 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3590 RI.getSubRegIdxSize(UseSubReg) == 16;
3593 if (RI.hasVGPRs(DstRC))
3596 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3602 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3609 for (
unsigned MovOp :
3610 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3611 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3619 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3623 if (MovDstPhysReg) {
3627 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3634 if (MovDstPhysReg) {
3635 if (!MovDstRC->
contains(MovDstPhysReg))
3637 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3651 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3659 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3663 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3665 UseMI.getOperand(0).setReg(MovDstPhysReg);
3670 UseMI.setDesc(NewMCID);
3671 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3672 UseMI.addImplicitDefUseOperands(*MF);
3676 if (HasMultipleUses)
3679 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3680 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3681 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3682 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3683 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3684 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3685 Opc == AMDGPU::V_FMAC_F64_e64) {
3694 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3709 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3710 if (!RegSrc->
isReg())
3712 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3713 ST.getConstantBusLimit(
Opc) < 2)
3716 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3728 if (Def && Def->isMoveImmediate() &&
3739 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3740 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3750 unsigned SrcSubReg = RegSrc->
getSubReg();
3755 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3756 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3757 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3758 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3759 UseMI.untieRegOperand(
3760 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3767 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3769 DefMI.eraseFromParent();
3776 if (ST.getConstantBusLimit(
Opc) < 2) {
3779 bool Src0Inlined =
false;
3780 if (Src0->
isReg()) {
3785 if (Def && Def->isMoveImmediate() &&
3790 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3797 if (Src1->
isReg() && !Src0Inlined) {
3800 if (Def && Def->isMoveImmediate() &&
3802 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3804 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3817 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3818 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3824 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3825 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3826 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3827 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3828 UseMI.untieRegOperand(
3829 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3831 const std::optional<int64_t> SubRegImm =
3845 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3847 DefMI.eraseFromParent();
3859 if (BaseOps1.
size() != BaseOps2.
size())
3861 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3862 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3870 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3871 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3872 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3874 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3877bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3880 int64_t Offset0, Offset1;
3883 bool Offset0IsScalable, Offset1IsScalable;
3897 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3898 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3905 "MIa must load from or modify a memory location");
3907 "MIb must load from or modify a memory location");
3929 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3936 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3946 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3960 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3971 if (
Reg.isPhysical())
3973 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3975 Imm = Def->getOperand(1).getImm();
3995 unsigned NumOps =
MI.getNumOperands();
3998 if (
Op.isReg() &&
Op.isKill())
4006 case AMDGPU::V_MAC_F16_e32:
4007 case AMDGPU::V_MAC_F16_e64:
4008 return AMDGPU::V_MAD_F16_e64;
4009 case AMDGPU::V_MAC_F32_e32:
4010 case AMDGPU::V_MAC_F32_e64:
4011 return AMDGPU::V_MAD_F32_e64;
4012 case AMDGPU::V_MAC_LEGACY_F32_e32:
4013 case AMDGPU::V_MAC_LEGACY_F32_e64:
4014 return AMDGPU::V_MAD_LEGACY_F32_e64;
4015 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4016 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4017 return AMDGPU::V_FMA_LEGACY_F32_e64;
4018 case AMDGPU::V_FMAC_F16_e32:
4019 case AMDGPU::V_FMAC_F16_e64:
4020 case AMDGPU::V_FMAC_F16_t16_e64:
4021 case AMDGPU::V_FMAC_F16_fake16_e64:
4022 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4023 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4024 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4025 : AMDGPU::V_FMA_F16_gfx9_e64;
4026 case AMDGPU::V_FMAC_F32_e32:
4027 case AMDGPU::V_FMAC_F32_e64:
4028 return AMDGPU::V_FMA_F32_e64;
4029 case AMDGPU::V_FMAC_F64_e32:
4030 case AMDGPU::V_FMAC_F64_e64:
4031 return AMDGPU::V_FMA_F64_e64;
4058 if (Def.isEarlyClobber() && Def.isReg() &&
4063 auto UpdateDefIndex = [&](
LiveRange &LR) {
4064 auto *S = LR.find(OldIndex);
4065 if (S != LR.end() && S->start == OldIndex) {
4066 assert(S->valno && S->valno->def == OldIndex);
4067 S->start = NewIndex;
4068 S->valno->def = NewIndex;
4072 for (
auto &SR : LI.subranges())
4078 if (U.RemoveMIUse) {
4081 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4083 if (
MRI.hasOneNonDBGUse(DefReg)) {
4085 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4086 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4087 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4088 U.RemoveMIUse->removeOperand(
I);
4100 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4102 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4103 MIOp.setIsUndef(
true);
4104 MIOp.setReg(DummyReg);
4117 ThreeAddressUpdates &U)
const {
4119 unsigned Opc =
MI.getOpcode();
4123 if (NewMFMAOpc != -1) {
4126 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4127 MIB.
add(
MI.getOperand(
I));
4135 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
4140 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4141 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4142 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4146 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4147 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4148 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4149 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4150 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4151 bool Src0Literal =
false;
4156 case AMDGPU::V_MAC_F16_e64:
4157 case AMDGPU::V_FMAC_F16_e64:
4158 case AMDGPU::V_FMAC_F16_t16_e64:
4159 case AMDGPU::V_FMAC_F16_fake16_e64:
4160 case AMDGPU::V_MAC_F32_e64:
4161 case AMDGPU::V_MAC_LEGACY_F32_e64:
4162 case AMDGPU::V_FMAC_F32_e64:
4163 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4164 case AMDGPU::V_FMAC_F64_e64:
4166 case AMDGPU::V_MAC_F16_e32:
4167 case AMDGPU::V_FMAC_F16_e32:
4168 case AMDGPU::V_MAC_F32_e32:
4169 case AMDGPU::V_MAC_LEGACY_F32_e32:
4170 case AMDGPU::V_FMAC_F32_e32:
4171 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4172 case AMDGPU::V_FMAC_F64_e32: {
4173 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4174 AMDGPU::OpName::src0);
4175 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4186 MachineInstrBuilder MIB;
4189 const MachineOperand *Src0Mods =
4192 const MachineOperand *Src1Mods =
4195 const MachineOperand *Src2Mods =
4201 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4202 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4204 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4206 MachineInstr *
DefMI;
4242 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4258 if (Src0Literal && !ST.hasVOP3Literal())
4286 switch (
MI.getOpcode()) {
4287 case AMDGPU::S_SET_GPR_IDX_ON:
4288 case AMDGPU::S_SET_GPR_IDX_MODE:
4289 case AMDGPU::S_SET_GPR_IDX_OFF:
4307 if (
MI.isTerminator() ||
MI.isPosition())
4311 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4314 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4320 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4321 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4322 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4323 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4324 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4329 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4330 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4331 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4339 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4348 if (
MI.memoperands_empty())
4353 unsigned AS = Memop->getAddrSpace();
4354 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4355 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4356 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4357 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4372 if (
MI.memoperands_empty())
4381 unsigned AS = Memop->getAddrSpace();
4398 if (ST.isTgSplitEnabled())
4403 if (
MI.memoperands_empty())
4408 unsigned AS = Memop->getAddrSpace();
4424 unsigned Opcode =
MI.getOpcode();
4439 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4440 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4441 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4444 if (
MI.isCall() ||
MI.isInlineAsm())
4460 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4461 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4462 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4463 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4471 if (
MI.isMetaInstruction())
4475 if (
MI.isCopyLike()) {
4476 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4480 return MI.readsRegister(AMDGPU::EXEC, &RI);
4491 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4495 switch (Imm.getBitWidth()) {
4501 ST.hasInv2PiInlineImm());
4504 ST.hasInv2PiInlineImm());
4506 return ST.has16BitInsts() &&
4508 ST.hasInv2PiInlineImm());
4515 APInt IntImm = Imm.bitcastToAPInt();
4517 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4525 return ST.has16BitInsts() &&
4528 return ST.has16BitInsts() &&
4538 switch (OperandType) {
4548 int32_t Trunc =
static_cast<int32_t
>(Imm);
4588 int16_t Trunc =
static_cast<int16_t
>(Imm);
4589 return ST.has16BitInsts() &&
4598 int16_t Trunc =
static_cast<int16_t
>(Imm);
4599 return ST.has16BitInsts() &&
4650 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4656 return ST.hasVOP3Literal();
4660 int64_t ImmVal)
const {
4663 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4664 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4665 AMDGPU::OpName::src2))
4667 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4679 "unexpected imm-like operand kind");
4692 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4710 AMDGPU::OpName
OpName)
const {
4712 return Mods && Mods->
getImm();
4725 switch (
MI.getOpcode()) {
4726 default:
return false;
4728 case AMDGPU::V_ADDC_U32_e64:
4729 case AMDGPU::V_SUBB_U32_e64:
4730 case AMDGPU::V_SUBBREV_U32_e64: {
4738 case AMDGPU::V_MAC_F16_e64:
4739 case AMDGPU::V_MAC_F32_e64:
4740 case AMDGPU::V_MAC_LEGACY_F32_e64:
4741 case AMDGPU::V_FMAC_F16_e64:
4742 case AMDGPU::V_FMAC_F16_t16_e64:
4743 case AMDGPU::V_FMAC_F16_fake16_e64:
4744 case AMDGPU::V_FMAC_F32_e64:
4745 case AMDGPU::V_FMAC_F64_e64:
4746 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4752 case AMDGPU::V_CNDMASK_B32_e64:
4758 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4788 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4797 unsigned Op32)
const {
4811 Inst32.
add(
MI.getOperand(
I));
4815 int Idx =
MI.getNumExplicitDefs();
4817 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4822 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4844 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4852 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4855 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4856 AMDGPU::SReg_64RegClass.contains(Reg);
4862 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4874 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4884 switch (MO.getReg()) {
4886 case AMDGPU::VCC_LO:
4887 case AMDGPU::VCC_HI:
4889 case AMDGPU::FLAT_SCR:
4902 switch (
MI.getOpcode()) {
4903 case AMDGPU::V_READLANE_B32:
4904 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4905 case AMDGPU::V_WRITELANE_B32:
4906 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4913 if (
MI.isPreISelOpcode() ||
4914 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4929 if (
SubReg.getReg().isPhysical())
4932 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4943 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4944 ErrInfo =
"illegal copy from vector register to SGPR";
4962 if (!
MRI.isSSA() &&
MI.isCopy())
4963 return verifyCopy(
MI,
MRI, ErrInfo);
4965 if (SIInstrInfo::isGenericOpcode(Opcode))
4968 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4969 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4970 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4972 if (Src0Idx == -1) {
4974 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4975 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4976 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4977 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4982 if (!
Desc.isVariadic() &&
4983 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4984 ErrInfo =
"Instruction has wrong number of operands.";
4988 if (
MI.isInlineAsm()) {
5001 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5002 ErrInfo =
"inlineasm operand has incorrect register class.";
5010 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5011 ErrInfo =
"missing memory operand from image instruction.";
5016 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5019 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5020 "all fp values to integers.";
5025 int16_t RegClass = getOpRegClassID(OpInfo);
5027 switch (OpInfo.OperandType) {
5029 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5030 ErrInfo =
"Illegal immediate value for operand.";
5064 ErrInfo =
"Illegal immediate value for operand.";
5071 ErrInfo =
"Expected inline constant for operand.";
5086 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5087 ErrInfo =
"Expected immediate, but got non-immediate";
5096 if (OpInfo.isGenericType())
5111 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5113 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5115 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5116 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5123 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5124 ErrInfo =
"Subtarget requires even aligned vector registers";
5129 if (RegClass != -1) {
5130 if (Reg.isVirtual())
5135 ErrInfo =
"Operand has incorrect register class.";
5143 if (!ST.hasSDWA()) {
5144 ErrInfo =
"SDWA is not supported on this target";
5148 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5149 AMDGPU::OpName::dst_sel}) {
5153 int64_t Imm = MO->
getImm();
5155 ErrInfo =
"Invalid SDWA selection";
5160 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5162 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5167 if (!ST.hasSDWAScalar()) {
5169 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5170 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5177 "Only reg allowed as operands in SDWA instructions on GFX9+";
5183 if (!ST.hasSDWAOmod()) {
5186 if (OMod !=
nullptr &&
5188 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5193 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5194 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5195 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5196 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5199 unsigned Mods = Src0ModsMO->
getImm();
5202 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5208 if (
isVOPC(BasicOpcode)) {
5209 if (!ST.hasSDWASdst() && DstIdx != -1) {
5212 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5213 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5216 }
else if (!ST.hasSDWAOutModsVOPC()) {
5219 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5220 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5226 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5227 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5234 if (DstUnused && DstUnused->isImm() &&
5237 if (!Dst.isReg() || !Dst.isTied()) {
5238 ErrInfo =
"Dst register should have tied register";
5243 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5246 "Dst register should be tied to implicit use of preserved register";
5250 ErrInfo =
"Dst register should use same physical register as preserved";
5257 if (
isImage(Opcode) && !
MI.mayStore()) {
5269 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5277 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5281 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5282 if (RegCount > DstSize) {
5283 ErrInfo =
"Image instruction returns too many registers for dst "
5292 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5293 unsigned ConstantBusCount = 0;
5294 bool UsesLiteral =
false;
5297 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5301 LiteralVal = &
MI.getOperand(ImmIdx);
5310 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5321 }
else if (!MO.
isFI()) {
5328 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5338 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5339 return !RI.regsOverlap(SGPRUsed, SGPR);
5348 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5349 Opcode != AMDGPU::V_WRITELANE_B32) {
5350 ErrInfo =
"VOP* instruction violates constant bus restriction";
5354 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5355 ErrInfo =
"VOP3 instruction uses literal";
5362 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5363 unsigned SGPRCount = 0;
5366 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5374 if (MO.
getReg() != SGPRUsed)
5379 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5380 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5387 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5388 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5395 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5405 ErrInfo =
"ABS not allowed in VOP3B instructions";
5418 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5425 if (
Desc.isBranch()) {
5427 ErrInfo =
"invalid branch target for SOPK instruction";
5434 ErrInfo =
"invalid immediate for SOPK instruction";
5439 ErrInfo =
"invalid immediate for SOPK instruction";
5446 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5447 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5448 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5449 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5450 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5451 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5453 const unsigned StaticNumOps =
5454 Desc.getNumOperands() +
Desc.implicit_uses().size();
5455 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5460 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5461 ErrInfo =
"missing implicit register operands";
5467 if (!Dst->isUse()) {
5468 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5473 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5474 UseOpIdx != StaticNumOps + 1) {
5475 ErrInfo =
"movrel implicit operands should be tied";
5482 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5484 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5485 ErrInfo =
"src0 should be subreg of implicit vector use";
5493 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5494 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5500 if (
MI.mayStore() &&
5505 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5506 ErrInfo =
"scalar stores must use m0 as offset register";
5512 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5514 if (
Offset->getImm() != 0) {
5515 ErrInfo =
"subtarget does not support offsets in flat instructions";
5520 if (
isDS(
MI) && !ST.hasGDS()) {
5522 if (GDSOp && GDSOp->
getImm() != 0) {
5523 ErrInfo =
"GDS is not supported on this subtarget";
5531 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5532 AMDGPU::OpName::vaddr0);
5533 AMDGPU::OpName RSrcOpName =
5534 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5535 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5543 ErrInfo =
"dim is out of range";
5548 if (ST.hasR128A16()) {
5550 IsA16 = R128A16->
getImm() != 0;
5551 }
else if (ST.hasA16()) {
5553 IsA16 = A16->
getImm() != 0;
5556 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5558 unsigned AddrWords =
5561 unsigned VAddrWords;
5563 VAddrWords = RsrcIdx - VAddr0Idx;
5564 if (ST.hasPartialNSAEncoding() &&
5566 unsigned LastVAddrIdx = RsrcIdx - 1;
5567 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5575 if (VAddrWords != AddrWords) {
5577 <<
" but got " << VAddrWords <<
"\n");
5578 ErrInfo =
"bad vaddr size";
5588 unsigned DC = DppCt->
getImm();
5589 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5590 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5591 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5592 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5593 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5594 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5595 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5596 ErrInfo =
"Invalid dpp_ctrl value";
5599 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5601 ErrInfo =
"Invalid dpp_ctrl value: "
5602 "wavefront shifts are not supported on GFX10+";
5605 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5607 ErrInfo =
"Invalid dpp_ctrl value: "
5608 "broadcasts are not supported on GFX10+";
5611 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5613 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5614 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5615 !ST.hasGFX90AInsts()) {
5616 ErrInfo =
"Invalid dpp_ctrl value: "
5617 "row_newbroadcast/row_share is not supported before "
5621 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5622 ErrInfo =
"Invalid dpp_ctrl value: "
5623 "row_share and row_xmask are not supported before GFX10";
5628 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5631 ErrInfo =
"Invalid dpp_ctrl value: "
5632 "DP ALU dpp only support row_newbcast";
5639 AMDGPU::OpName DataName =
5640 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5646 if (ST.hasGFX90AInsts()) {
5647 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5648 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5649 ErrInfo =
"Invalid register class: "
5650 "vdata and vdst should be both VGPR or AGPR";
5653 if (
Data && Data2 &&
5655 ErrInfo =
"Invalid register class: "
5656 "both data operands should be VGPR or AGPR";
5660 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5662 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5663 ErrInfo =
"Invalid register class: "
5664 "agpr loads and stores not supported on this GPU";
5670 if (ST.needsAlignedVGPRs()) {
5671 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5676 if (Reg.isPhysical())
5677 return !(RI.getHWRegIndex(Reg) & 1);
5679 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5680 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5683 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5684 Opcode == AMDGPU::DS_GWS_BARRIER) {
5686 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5687 ErrInfo =
"Subtarget requires even aligned vector registers "
5688 "for DS_GWS instructions";
5694 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5695 ErrInfo =
"Subtarget requires even aligned vector registers "
5696 "for vaddr operand of image instructions";
5702 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5704 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5705 ErrInfo =
"Invalid register class: "
5706 "v_accvgpr_write with an SGPR is not supported on this GPU";
5711 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5714 ErrInfo =
"pseudo expects only physical SGPRs";
5721 if (!ST.hasScaleOffset()) {
5722 ErrInfo =
"Subtarget does not support offset scaling";
5726 ErrInfo =
"Instruction does not support offset scaling";
5735 for (
unsigned I = 0;
I < 3; ++
I) {
5748 switch (
MI.getOpcode()) {
5749 default:
return AMDGPU::INSTRUCTION_LIST_END;
5750 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5751 case AMDGPU::COPY:
return AMDGPU::COPY;
5752 case AMDGPU::PHI:
return AMDGPU::PHI;
5753 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5754 case AMDGPU::WQM:
return AMDGPU::WQM;
5755 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5756 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5757 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5758 case AMDGPU::S_MOV_B32: {
5760 return MI.getOperand(1).isReg() ||
5761 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5762 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5764 case AMDGPU::S_ADD_I32:
5765 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5766 case AMDGPU::S_ADDC_U32:
5767 return AMDGPU::V_ADDC_U32_e32;
5768 case AMDGPU::S_SUB_I32:
5769 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5772 case AMDGPU::S_ADD_U32:
5773 return AMDGPU::V_ADD_CO_U32_e32;
5774 case AMDGPU::S_SUB_U32:
5775 return AMDGPU::V_SUB_CO_U32_e32;
5776 case AMDGPU::S_ADD_U64_PSEUDO:
5777 return AMDGPU::V_ADD_U64_PSEUDO;
5778 case AMDGPU::S_SUB_U64_PSEUDO:
5779 return AMDGPU::V_SUB_U64_PSEUDO;
5780 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5781 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5782 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5783 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5784 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5785 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5786 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5787 case AMDGPU::S_XNOR_B32:
5788 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5789 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5790 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5791 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5792 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5793 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5794 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5795 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5796 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5797 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5798 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5799 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5800 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5801 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5802 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5803 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5804 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5805 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5806 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5807 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5808 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5809 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5810 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5811 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5812 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5813 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5814 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5815 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5816 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5817 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5818 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5819 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5820 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5821 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5822 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5823 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5824 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5825 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5826 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5827 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5828 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5829 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5830 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5831 case AMDGPU::S_CVT_F32_F16:
5832 case AMDGPU::S_CVT_HI_F32_F16:
5833 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5834 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5835 case AMDGPU::S_CVT_F16_F32:
5836 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5837 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5838 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5839 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5840 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5841 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5842 case AMDGPU::S_CEIL_F16:
5843 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5844 : AMDGPU::V_CEIL_F16_fake16_e64;
5845 case AMDGPU::S_FLOOR_F16:
5846 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5847 : AMDGPU::V_FLOOR_F16_fake16_e64;
5848 case AMDGPU::S_TRUNC_F16:
5849 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5850 : AMDGPU::V_TRUNC_F16_fake16_e64;
5851 case AMDGPU::S_RNDNE_F16:
5852 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5853 : AMDGPU::V_RNDNE_F16_fake16_e64;
5854 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5855 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5856 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5857 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5858 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5859 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5860 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5861 case AMDGPU::S_ADD_F16:
5862 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5863 : AMDGPU::V_ADD_F16_fake16_e64;
5864 case AMDGPU::S_SUB_F16:
5865 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5866 : AMDGPU::V_SUB_F16_fake16_e64;
5867 case AMDGPU::S_MIN_F16:
5868 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5869 : AMDGPU::V_MIN_F16_fake16_e64;
5870 case AMDGPU::S_MAX_F16:
5871 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5872 : AMDGPU::V_MAX_F16_fake16_e64;
5873 case AMDGPU::S_MINIMUM_F16:
5874 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5875 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5876 case AMDGPU::S_MAXIMUM_F16:
5877 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5878 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5879 case AMDGPU::S_MUL_F16:
5880 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5881 : AMDGPU::V_MUL_F16_fake16_e64;
5882 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5883 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5884 case AMDGPU::S_FMAC_F16:
5885 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5886 : AMDGPU::V_FMAC_F16_fake16_e64;
5887 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5888 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5889 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5890 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5891 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5892 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5893 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5894 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5895 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5896 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5897 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5898 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5899 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5900 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5901 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5902 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5903 case AMDGPU::S_CMP_LT_F16:
5904 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5905 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5906 case AMDGPU::S_CMP_EQ_F16:
5907 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5908 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5909 case AMDGPU::S_CMP_LE_F16:
5910 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5911 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5912 case AMDGPU::S_CMP_GT_F16:
5913 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5914 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5915 case AMDGPU::S_CMP_LG_F16:
5916 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5917 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5918 case AMDGPU::S_CMP_GE_F16:
5919 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5920 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5921 case AMDGPU::S_CMP_O_F16:
5922 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5923 : AMDGPU::V_CMP_O_F16_fake16_e64;
5924 case AMDGPU::S_CMP_U_F16:
5925 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5926 : AMDGPU::V_CMP_U_F16_fake16_e64;
5927 case AMDGPU::S_CMP_NGE_F16:
5928 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5929 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5930 case AMDGPU::S_CMP_NLG_F16:
5931 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5932 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5933 case AMDGPU::S_CMP_NGT_F16:
5934 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5935 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5936 case AMDGPU::S_CMP_NLE_F16:
5937 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5938 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5939 case AMDGPU::S_CMP_NEQ_F16:
5940 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5941 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5942 case AMDGPU::S_CMP_NLT_F16:
5943 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5944 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5945 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5946 case AMDGPU::V_S_EXP_F16_e64:
5947 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5948 : AMDGPU::V_EXP_F16_fake16_e64;
5949 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5950 case AMDGPU::V_S_LOG_F16_e64:
5951 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5952 : AMDGPU::V_LOG_F16_fake16_e64;
5953 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5954 case AMDGPU::V_S_RCP_F16_e64:
5955 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5956 : AMDGPU::V_RCP_F16_fake16_e64;
5957 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5958 case AMDGPU::V_S_RSQ_F16_e64:
5959 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5960 : AMDGPU::V_RSQ_F16_fake16_e64;
5961 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5962 case AMDGPU::V_S_SQRT_F16_e64:
5963 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5964 : AMDGPU::V_SQRT_F16_fake16_e64;
5967 "Unexpected scalar opcode without corresponding vector one!");
6016 "Not a whole wave func");
6019 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6020 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6027 unsigned OpNo)
const {
6029 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6030 Desc.operands()[OpNo].RegClass == -1) {
6033 if (Reg.isVirtual()) {
6035 MI.getParent()->getParent()->getRegInfo();
6036 return MRI.getRegClass(Reg);
6038 return RI.getPhysRegBaseClass(Reg);
6041 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6042 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6050 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6052 unsigned Size = RI.getRegSizeInBits(*RC);
6053 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6054 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6055 : AMDGPU::V_MOV_B32_e32;
6057 Opcode = AMDGPU::COPY;
6058 else if (RI.isSGPRClass(RC))
6059 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6073 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6079 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6090 if (SubIdx == AMDGPU::sub0)
6092 if (SubIdx == AMDGPU::sub1)
6104void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6120 if (Reg.isPhysical())
6130 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6133 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6140 unsigned Opc =
MI.getOpcode();
6146 constexpr AMDGPU::OpName OpNames[] = {
6147 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6150 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6151 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6161 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6162 if (IsAGPR && !ST.hasMAIInsts())
6164 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6168 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6169 const int DataIdx = AMDGPU::getNamedOperandIdx(
6170 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6171 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6172 MI.getOperand(DataIdx).isReg() &&
6173 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6175 if ((
int)
OpIdx == DataIdx) {
6176 if (VDstIdx != -1 &&
6177 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6180 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6181 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6182 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6187 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6188 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6208 constexpr unsigned NumOps = 3;
6209 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6210 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6211 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6212 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6217 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6220 MO = &
MI.getOperand(SrcIdx);
6227 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6231 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6235 return !OpSel && !OpSelHi;
6244 int64_t RegClass = getOpRegClassID(OpInfo);
6246 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6255 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6256 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6260 if (!LiteralLimit--)
6270 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6278 if (--ConstantBusLimit <= 0)
6290 if (!LiteralLimit--)
6292 if (--ConstantBusLimit <= 0)
6298 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6302 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6304 !
Op.isIdenticalTo(*MO))
6314 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6328 bool Is64BitOp = Is64BitFPOp ||
6335 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6344 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6362 bool IsGFX950Only = ST.hasGFX950Insts();
6363 bool IsGFX940Only = ST.hasGFX940Insts();
6365 if (!IsGFX950Only && !IsGFX940Only)
6383 unsigned Opcode =
MI.getOpcode();
6385 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6386 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6387 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6388 case AMDGPU::V_MQSAD_U32_U8_e64:
6389 case AMDGPU::V_PK_ADD_F16:
6390 case AMDGPU::V_PK_ADD_F32:
6391 case AMDGPU::V_PK_ADD_I16:
6392 case AMDGPU::V_PK_ADD_U16:
6393 case AMDGPU::V_PK_ASHRREV_I16:
6394 case AMDGPU::V_PK_FMA_F16:
6395 case AMDGPU::V_PK_FMA_F32:
6396 case AMDGPU::V_PK_FMAC_F16_e32:
6397 case AMDGPU::V_PK_FMAC_F16_e64:
6398 case AMDGPU::V_PK_LSHLREV_B16:
6399 case AMDGPU::V_PK_LSHRREV_B16:
6400 case AMDGPU::V_PK_MAD_I16:
6401 case AMDGPU::V_PK_MAD_U16:
6402 case AMDGPU::V_PK_MAX_F16:
6403 case AMDGPU::V_PK_MAX_I16:
6404 case AMDGPU::V_PK_MAX_U16:
6405 case AMDGPU::V_PK_MIN_F16:
6406 case AMDGPU::V_PK_MIN_I16:
6407 case AMDGPU::V_PK_MIN_U16:
6408 case AMDGPU::V_PK_MOV_B32:
6409 case AMDGPU::V_PK_MUL_F16:
6410 case AMDGPU::V_PK_MUL_F32:
6411 case AMDGPU::V_PK_MUL_LO_U16:
6412 case AMDGPU::V_PK_SUB_I16:
6413 case AMDGPU::V_PK_SUB_U16:
6414 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6423 unsigned Opc =
MI.getOpcode();
6426 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6429 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6435 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6442 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6445 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6451 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6461 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6462 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6463 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6475 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6477 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6489 if (HasImplicitSGPR || !
MI.isCommutable()) {
6506 if (CommutedOpc == -1) {
6511 MI.setDesc(
get(CommutedOpc));
6515 bool Src0Kill = Src0.
isKill();
6519 else if (Src1.
isReg()) {
6534 unsigned Opc =
MI.getOpcode();
6537 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6538 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6539 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6542 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6543 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6544 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6545 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6546 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6547 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6548 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6552 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6553 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6558 if (VOP3Idx[2] != -1) {
6560 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6561 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6570 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6571 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6573 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6575 SGPRsUsed.
insert(SGPRReg);
6579 for (
int Idx : VOP3Idx) {
6588 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6600 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6607 if (ConstantBusLimit > 0) {
6619 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6620 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6626 for (
unsigned I = 0;
I < 3; ++
I) {
6639 SRC = RI.getCommonSubClass(SRC, DstRC);
6642 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6644 if (RI.hasAGPRs(VRC)) {
6645 VRC = RI.getEquivalentVGPRClass(VRC);
6646 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6648 get(TargetOpcode::COPY), NewSrcReg)
6655 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6661 for (
unsigned i = 0; i < SubRegs; ++i) {
6662 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6664 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6665 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6671 get(AMDGPU::REG_SEQUENCE), DstReg);
6672 for (
unsigned i = 0; i < SubRegs; ++i) {
6674 MIB.
addImm(RI.getSubRegFromChannel(i));
6687 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6689 SBase->setReg(SGPR);
6692 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6700 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6701 if (OldSAddrIdx < 0)
6717 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6718 if (NewVAddrIdx < 0)
6721 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6725 if (OldVAddrIdx >= 0) {
6727 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6739 if (OldVAddrIdx == NewVAddrIdx) {
6742 MRI.removeRegOperandFromUseList(&NewVAddr);
6743 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6747 MRI.removeRegOperandFromUseList(&NewVAddr);
6748 MRI.addRegOperandToUseList(&NewVAddr);
6750 assert(OldSAddrIdx == NewVAddrIdx);
6752 if (OldVAddrIdx >= 0) {
6753 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6754 AMDGPU::OpName::vdst_in);
6758 if (NewVDstIn != -1) {
6759 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6765 if (NewVDstIn != -1) {
6766 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6787 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6807 unsigned OpSubReg =
Op.getSubReg();
6810 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6816 Register DstReg =
MRI.createVirtualRegister(DstRC);
6826 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6829 bool ImpDef = Def->isImplicitDef();
6830 while (!ImpDef && Def && Def->isCopy()) {
6831 if (Def->getOperand(1).getReg().isPhysical())
6833 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6834 ImpDef = Def && Def->isImplicitDef();
6836 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6855 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6861 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6862 unsigned NumSubRegs =
RegSize / 32;
6863 Register VScalarOp = ScalarOp->getReg();
6865 if (NumSubRegs == 1) {
6866 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6868 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6871 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6873 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6879 CondReg = NewCondReg;
6881 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6889 ScalarOp->setReg(CurReg);
6890 ScalarOp->setIsKill();
6894 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6895 "Unhandled register size");
6897 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6899 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6901 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6904 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6905 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6908 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6909 .
addReg(VScalarOp, VScalarOpUndef,
6910 TRI->getSubRegFromChannel(Idx + 1));
6916 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6917 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6923 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6924 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6927 if (NumSubRegs <= 2)
6928 Cmp.addReg(VScalarOp);
6930 Cmp.addReg(VScalarOp, VScalarOpUndef,
6931 TRI->getSubRegFromChannel(Idx, 2));
6935 CondReg = NewCondReg;
6937 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6945 const auto *SScalarOpRC =
6946 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6947 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6951 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6952 unsigned Channel = 0;
6953 for (
Register Piece : ReadlanePieces) {
6954 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6958 ScalarOp->setReg(SScalarOp);
6959 ScalarOp->setIsKill();
6963 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6964 MRI.setSimpleHint(SaveExec, CondReg);
6995 if (!Begin.isValid())
6997 if (!End.isValid()) {
7003 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7011 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7012 std::numeric_limits<unsigned>::max()) !=
7015 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7021 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7030 for (
auto I = Begin;
I != AfterMI;
I++) {
7031 for (
auto &MO :
I->all_uses())
7032 MRI.clearKillFlags(MO.getReg());
7057 MBB.addSuccessor(LoopBB);
7067 for (
auto &Succ : RemainderBB->
successors()) {
7091static std::tuple<unsigned, unsigned>
7099 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7100 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7103 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7104 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7105 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7106 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7107 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7124 .
addImm(AMDGPU::sub0_sub1)
7130 return std::tuple(RsrcPtr, NewSRsrc);
7167 if (
MI.getOpcode() == AMDGPU::PHI) {
7169 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7170 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7173 MRI.getRegClass(
MI.getOperand(i).getReg());
7174 if (RI.hasVectorRegisters(OpRC)) {
7188 VRC = &AMDGPU::VReg_1RegClass;
7191 ? RI.getEquivalentAGPRClass(SRC)
7192 : RI.getEquivalentVGPRClass(SRC);
7195 ? RI.getEquivalentAGPRClass(VRC)
7196 : RI.getEquivalentVGPRClass(VRC);
7204 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7206 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7222 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7225 if (RI.hasVGPRs(DstRC)) {
7229 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7231 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7249 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7254 if (DstRC != Src0RC) {
7263 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7265 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7271 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7272 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7273 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7274 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7275 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7276 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7277 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7279 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7292 ? AMDGPU::OpName::rsrc
7293 : AMDGPU::OpName::srsrc;
7295 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7298 AMDGPU::OpName SampOpName =
7299 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7301 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7308 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7310 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7314 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7315 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7320 while (Start->getOpcode() != FrameSetupOpcode)
7323 while (End->getOpcode() != FrameDestroyOpcode)
7327 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7328 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7336 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7338 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7340 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7350 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7351 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7352 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7353 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7355 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7362 bool isSoffsetLegal =
true;
7364 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7365 if (SoffsetIdx != -1) {
7368 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7369 isSoffsetLegal =
false;
7373 bool isRsrcLegal =
true;
7375 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7376 if (RsrcIdx != -1) {
7379 isRsrcLegal =
false;
7383 if (isRsrcLegal && isSoffsetLegal)
7407 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7408 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7409 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7411 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7412 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7413 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7415 unsigned RsrcPtr, NewSRsrc;
7422 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7429 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7443 }
else if (!VAddr && ST.hasAddr64()) {
7447 "FIXME: Need to emit flat atomics here");
7449 unsigned RsrcPtr, NewSRsrc;
7452 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7475 MIB.
addImm(CPol->getImm());
7480 MIB.
addImm(TFE->getImm());
7500 MI.removeFromParent();
7505 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7507 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7511 if (!isSoffsetLegal) {
7523 if (!isSoffsetLegal) {
7535 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7536 if (RsrcIdx != -1) {
7537 DeferredList.insert(
MI);
7542 return DeferredList.contains(
MI);
7552 if (!ST.useRealTrue16Insts())
7555 unsigned Opcode =
MI.getOpcode();
7559 OpIdx >=
get(Opcode).getNumOperands() ||
7560 get(Opcode).operands()[
OpIdx].RegClass == -1)
7564 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7568 if (!RI.isVGPRClass(CurrRC))
7571 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7573 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7574 Op.setSubReg(AMDGPU::lo16);
7575 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7577 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7578 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7585 Op.setReg(NewDstReg);
7597 while (!Worklist.
empty()) {
7611 "Deferred MachineInstr are not supposed to re-populate worklist");
7631 case AMDGPU::S_ADD_I32:
7632 case AMDGPU::S_SUB_I32: {
7636 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7644 case AMDGPU::S_MUL_U64:
7645 if (ST.hasVectorMulU64()) {
7646 NewOpcode = AMDGPU::V_MUL_U64_e64;
7650 splitScalarSMulU64(Worklist, Inst, MDT);
7654 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7655 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7658 splitScalarSMulPseudo(Worklist, Inst, MDT);
7662 case AMDGPU::S_AND_B64:
7663 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7667 case AMDGPU::S_OR_B64:
7668 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7672 case AMDGPU::S_XOR_B64:
7673 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7677 case AMDGPU::S_NAND_B64:
7678 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7682 case AMDGPU::S_NOR_B64:
7683 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7687 case AMDGPU::S_XNOR_B64:
7688 if (ST.hasDLInsts())
7689 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7691 splitScalar64BitXnor(Worklist, Inst, MDT);
7695 case AMDGPU::S_ANDN2_B64:
7696 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7700 case AMDGPU::S_ORN2_B64:
7701 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7705 case AMDGPU::S_BREV_B64:
7706 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7710 case AMDGPU::S_NOT_B64:
7711 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7715 case AMDGPU::S_BCNT1_I32_B64:
7716 splitScalar64BitBCNT(Worklist, Inst);
7720 case AMDGPU::S_BFE_I64:
7721 splitScalar64BitBFE(Worklist, Inst);
7725 case AMDGPU::S_FLBIT_I32_B64:
7726 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7729 case AMDGPU::S_FF1_I32_B64:
7730 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7734 case AMDGPU::S_LSHL_B32:
7735 if (ST.hasOnlyRevVALUShifts()) {
7736 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7740 case AMDGPU::S_ASHR_I32:
7741 if (ST.hasOnlyRevVALUShifts()) {
7742 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7746 case AMDGPU::S_LSHR_B32:
7747 if (ST.hasOnlyRevVALUShifts()) {
7748 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7752 case AMDGPU::S_LSHL_B64:
7753 if (ST.hasOnlyRevVALUShifts()) {
7755 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7756 : AMDGPU::V_LSHLREV_B64_e64;
7760 case AMDGPU::S_ASHR_I64:
7761 if (ST.hasOnlyRevVALUShifts()) {
7762 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7766 case AMDGPU::S_LSHR_B64:
7767 if (ST.hasOnlyRevVALUShifts()) {
7768 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7773 case AMDGPU::S_ABS_I32:
7774 lowerScalarAbs(Worklist, Inst);
7778 case AMDGPU::S_CBRANCH_SCC0:
7779 case AMDGPU::S_CBRANCH_SCC1: {
7782 bool IsSCC = CondReg == AMDGPU::SCC;
7790 case AMDGPU::S_BFE_U64:
7791 case AMDGPU::S_BFM_B64:
7794 case AMDGPU::S_PACK_LL_B32_B16:
7795 case AMDGPU::S_PACK_LH_B32_B16:
7796 case AMDGPU::S_PACK_HL_B32_B16:
7797 case AMDGPU::S_PACK_HH_B32_B16:
7798 movePackToVALU(Worklist,
MRI, Inst);
7802 case AMDGPU::S_XNOR_B32:
7803 lowerScalarXnor(Worklist, Inst);
7807 case AMDGPU::S_NAND_B32:
7808 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7812 case AMDGPU::S_NOR_B32:
7813 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7817 case AMDGPU::S_ANDN2_B32:
7818 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7822 case AMDGPU::S_ORN2_B32:
7823 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7831 case AMDGPU::S_ADD_CO_PSEUDO:
7832 case AMDGPU::S_SUB_CO_PSEUDO: {
7833 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7834 ? AMDGPU::V_ADDC_U32_e64
7835 : AMDGPU::V_SUBB_U32_e64;
7836 const auto *CarryRC = RI.getWaveMaskRegClass();
7839 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7840 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7847 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7858 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7862 case AMDGPU::S_UADDO_PSEUDO:
7863 case AMDGPU::S_USUBO_PSEUDO: {
7869 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7870 ? AMDGPU::V_ADD_CO_U32_e64
7871 : AMDGPU::V_SUB_CO_U32_e64;
7873 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7874 Register DestReg =
MRI.createVirtualRegister(NewRC);
7882 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7883 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7887 case AMDGPU::S_LSHL1_ADD_U32:
7888 case AMDGPU::S_LSHL2_ADD_U32:
7889 case AMDGPU::S_LSHL3_ADD_U32:
7890 case AMDGPU::S_LSHL4_ADD_U32: {
7894 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
7895 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
7896 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
7900 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
7901 Register DestReg =
MRI.createVirtualRegister(NewRC);
7909 MRI.replaceRegWith(Dest.
getReg(), DestReg);
7910 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7914 case AMDGPU::S_CSELECT_B32:
7915 case AMDGPU::S_CSELECT_B64:
7916 lowerSelect(Worklist, Inst, MDT);
7919 case AMDGPU::S_CMP_EQ_I32:
7920 case AMDGPU::S_CMP_LG_I32:
7921 case AMDGPU::S_CMP_GT_I32:
7922 case AMDGPU::S_CMP_GE_I32:
7923 case AMDGPU::S_CMP_LT_I32:
7924 case AMDGPU::S_CMP_LE_I32:
7925 case AMDGPU::S_CMP_EQ_U32:
7926 case AMDGPU::S_CMP_LG_U32:
7927 case AMDGPU::S_CMP_GT_U32:
7928 case AMDGPU::S_CMP_GE_U32:
7929 case AMDGPU::S_CMP_LT_U32:
7930 case AMDGPU::S_CMP_LE_U32:
7931 case AMDGPU::S_CMP_EQ_U64:
7932 case AMDGPU::S_CMP_LG_U64:
7933 case AMDGPU::S_CMP_LT_F32:
7934 case AMDGPU::S_CMP_EQ_F32:
7935 case AMDGPU::S_CMP_LE_F32:
7936 case AMDGPU::S_CMP_GT_F32:
7937 case AMDGPU::S_CMP_LG_F32:
7938 case AMDGPU::S_CMP_GE_F32:
7939 case AMDGPU::S_CMP_O_F32:
7940 case AMDGPU::S_CMP_U_F32:
7941 case AMDGPU::S_CMP_NGE_F32:
7942 case AMDGPU::S_CMP_NLG_F32:
7943 case AMDGPU::S_CMP_NGT_F32:
7944 case AMDGPU::S_CMP_NLE_F32:
7945 case AMDGPU::S_CMP_NEQ_F32:
7946 case AMDGPU::S_CMP_NLT_F32: {
7947 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7951 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7965 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7969 case AMDGPU::S_CMP_LT_F16:
7970 case AMDGPU::S_CMP_EQ_F16:
7971 case AMDGPU::S_CMP_LE_F16:
7972 case AMDGPU::S_CMP_GT_F16:
7973 case AMDGPU::S_CMP_LG_F16:
7974 case AMDGPU::S_CMP_GE_F16:
7975 case AMDGPU::S_CMP_O_F16:
7976 case AMDGPU::S_CMP_U_F16:
7977 case AMDGPU::S_CMP_NGE_F16:
7978 case AMDGPU::S_CMP_NLG_F16:
7979 case AMDGPU::S_CMP_NGT_F16:
7980 case AMDGPU::S_CMP_NLE_F16:
7981 case AMDGPU::S_CMP_NEQ_F16:
7982 case AMDGPU::S_CMP_NLT_F16: {
7983 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8005 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8009 case AMDGPU::S_CVT_HI_F32_F16: {
8010 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8011 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8012 if (ST.useRealTrue16Insts()) {
8017 .
addReg(TmpReg, 0, AMDGPU::hi16)
8033 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8037 case AMDGPU::S_MINIMUM_F32:
8038 case AMDGPU::S_MAXIMUM_F32: {
8039 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8050 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8054 case AMDGPU::S_MINIMUM_F16:
8055 case AMDGPU::S_MAXIMUM_F16: {
8056 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8057 ? &AMDGPU::VGPR_16RegClass
8058 : &AMDGPU::VGPR_32RegClass);
8070 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8074 case AMDGPU::V_S_EXP_F16_e64:
8075 case AMDGPU::V_S_LOG_F16_e64:
8076 case AMDGPU::V_S_RCP_F16_e64:
8077 case AMDGPU::V_S_RSQ_F16_e64:
8078 case AMDGPU::V_S_SQRT_F16_e64: {
8079 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8080 ? &AMDGPU::VGPR_16RegClass
8081 : &AMDGPU::VGPR_32RegClass);
8093 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8099 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8107 if (NewOpcode == Opcode) {
8115 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8117 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8135 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8137 MRI.replaceRegWith(DstReg, NewDstReg);
8138 MRI.clearKillFlags(NewDstReg);
8152 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8156 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8157 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8158 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8160 get(AMDGPU::IMPLICIT_DEF), Undef);
8162 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8168 MRI.replaceRegWith(DstReg, NewDstReg);
8169 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8171 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8174 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8175 MRI.replaceRegWith(DstReg, NewDstReg);
8176 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8181 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8182 MRI.replaceRegWith(DstReg, NewDstReg);
8184 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8194 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8195 AMDGPU::OpName::src0_modifiers) >= 0)
8199 NewInstr->addOperand(Src);
8202 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8205 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8207 NewInstr.addImm(
Size);
8208 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8212 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8217 "Scalar BFE is only implemented for constant width and offset");
8225 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8226 AMDGPU::OpName::src1_modifiers) >= 0)
8228 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8230 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8231 AMDGPU::OpName::src2_modifiers) >= 0)
8233 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8235 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8237 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8239 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8245 NewInstr->addOperand(
Op);
8252 if (
Op.getReg() == AMDGPU::SCC) {
8254 if (
Op.isDef() && !
Op.isDead())
8255 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8257 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8262 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8263 Register DstReg = NewInstr->getOperand(0).getReg();
8268 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8269 MRI.replaceRegWith(DstReg, NewDstReg);
8278 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8282std::pair<bool, MachineBasicBlock *>
8294 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8297 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8299 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8300 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8308 MRI.replaceRegWith(OldDstReg, ResultReg);
8311 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8312 return std::pair(
true, NewBB);
8315 return std::pair(
false,
nullptr);
8332 bool IsSCC = (CondReg == AMDGPU::SCC);
8340 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8346 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8347 NewCondReg =
MRI.createVirtualRegister(TC);
8351 bool CopyFound =
false;
8352 for (MachineInstr &CandI :
8355 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8357 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8359 .
addReg(CandI.getOperand(1).getReg());
8371 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8379 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8380 MachineInstr *NewInst;
8381 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8382 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8395 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8397 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8409 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8410 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8412 unsigned SubOp = ST.hasAddNoCarry() ?
8413 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8423 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8424 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8438 if (ST.hasDLInsts()) {
8439 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8447 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8448 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8454 bool Src0IsSGPR = Src0.
isReg() &&
8455 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8456 bool Src1IsSGPR = Src1.
isReg() &&
8457 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8459 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8460 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8470 }
else if (Src1IsSGPR) {
8484 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8488 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8494 unsigned Opcode)
const {
8504 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8505 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8517 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8518 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8523 unsigned Opcode)
const {
8533 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8534 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8546 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8547 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8562 const MCInstrDesc &InstDesc =
get(Opcode);
8563 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8565 &AMDGPU::SGPR_32RegClass;
8567 const TargetRegisterClass *Src0SubRC =
8568 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8571 AMDGPU::sub0, Src0SubRC);
8573 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8574 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8575 const TargetRegisterClass *NewDestSubRC =
8576 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8578 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8579 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8582 AMDGPU::sub1, Src0SubRC);
8584 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8585 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8590 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8597 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8599 Worklist.
insert(&LoHalf);
8600 Worklist.
insert(&HiHalf);
8606 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8617 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8618 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8619 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8627 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8628 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8629 const TargetRegisterClass *Src0SubRC =
8630 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8631 if (RI.isSGPRClass(Src0SubRC))
8632 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8633 const TargetRegisterClass *Src1SubRC =
8634 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8635 if (RI.isSGPRClass(Src1SubRC))
8636 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8640 MachineOperand Op0L =
8642 MachineOperand Op1L =
8644 MachineOperand Op0H =
8646 MachineOperand Op1H =
8664 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8665 MachineInstr *Op1L_Op0H =
8670 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8671 MachineInstr *Op1H_Op0L =
8676 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8677 MachineInstr *Carry =
8682 MachineInstr *LoHalf =
8687 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8692 MachineInstr *HiHalf =
8703 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8715 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8726 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8727 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8728 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8736 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8737 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8738 const TargetRegisterClass *Src0SubRC =
8739 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8740 if (RI.isSGPRClass(Src0SubRC))
8741 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8742 const TargetRegisterClass *Src1SubRC =
8743 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8744 if (RI.isSGPRClass(Src1SubRC))
8745 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8749 MachineOperand Op0L =
8751 MachineOperand Op1L =
8755 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8756 ? AMDGPU::V_MUL_HI_U32_e64
8757 : AMDGPU::V_MUL_HI_I32_e64;
8758 MachineInstr *HiHalf =
8761 MachineInstr *LoHalf =
8772 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8780 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8796 const MCInstrDesc &InstDesc =
get(Opcode);
8797 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8799 &AMDGPU::SGPR_32RegClass;
8801 const TargetRegisterClass *Src0SubRC =
8802 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8803 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8805 &AMDGPU::SGPR_32RegClass;
8807 const TargetRegisterClass *Src1SubRC =
8808 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8811 AMDGPU::sub0, Src0SubRC);
8813 AMDGPU::sub0, Src1SubRC);
8815 AMDGPU::sub1, Src0SubRC);
8817 AMDGPU::sub1, Src1SubRC);
8819 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8820 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8821 const TargetRegisterClass *NewDestSubRC =
8822 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8824 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8825 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8829 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8830 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8834 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8841 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8843 Worklist.
insert(&LoHalf);
8844 Worklist.
insert(&HiHalf);
8847 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8863 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8865 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8867 MachineOperand* Op0;
8868 MachineOperand* Op1;
8881 Register NewDest =
MRI.createVirtualRegister(DestRC);
8887 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8903 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8904 const TargetRegisterClass *SrcRC = Src.isReg() ?
8905 MRI.getRegClass(Src.getReg()) :
8906 &AMDGPU::SGPR_32RegClass;
8908 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8909 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8911 const TargetRegisterClass *SrcSubRC =
8912 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8915 AMDGPU::sub0, SrcSubRC);
8917 AMDGPU::sub1, SrcSubRC);
8923 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8927 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8946 Offset == 0 &&
"Not implemented");
8949 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8950 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8951 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8968 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8969 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8974 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8975 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8979 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8982 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8987 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8988 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9007 const MCInstrDesc &InstDesc =
get(Opcode);
9009 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9010 unsigned OpcodeAdd =
9011 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9013 const TargetRegisterClass *SrcRC =
9014 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9015 const TargetRegisterClass *SrcSubRC =
9016 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9018 MachineOperand SrcRegSub0 =
9020 MachineOperand SrcRegSub1 =
9023 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9024 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9025 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9026 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9033 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9039 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9041 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9043 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9046void SIInstrInfo::addUsersToMoveToVALUWorklist(
9050 MachineInstr &
UseMI = *MO.getParent();
9054 switch (
UseMI.getOpcode()) {
9057 case AMDGPU::SOFT_WQM:
9058 case AMDGPU::STRICT_WWM:
9059 case AMDGPU::STRICT_WQM:
9060 case AMDGPU::REG_SEQUENCE:
9062 case AMDGPU::INSERT_SUBREG:
9065 OpNo = MO.getOperandNo();
9070 MRI.constrainRegClass(DstReg, OpRC);
9072 if (!RI.hasVectorRegisters(OpRC))
9083 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9089 if (ST.useRealTrue16Insts()) {
9092 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9099 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9105 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9106 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9108 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9110 case AMDGPU::S_PACK_LL_B32_B16:
9113 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9114 .addImm(AMDGPU::lo16)
9116 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9117 .addImm(AMDGPU::hi16);
9119 case AMDGPU::S_PACK_LH_B32_B16:
9122 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9123 .addImm(AMDGPU::lo16)
9124 .addReg(SrcReg1, 0, AMDGPU::hi16)
9125 .addImm(AMDGPU::hi16);
9127 case AMDGPU::S_PACK_HL_B32_B16:
9128 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9129 .addImm(AMDGPU::lo16)
9131 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9132 .addImm(AMDGPU::hi16);
9134 case AMDGPU::S_PACK_HH_B32_B16:
9135 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9136 .addImm(AMDGPU::lo16)
9137 .addReg(SrcReg1, 0, AMDGPU::hi16)
9138 .addImm(AMDGPU::hi16);
9145 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9146 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9151 case AMDGPU::S_PACK_LL_B32_B16: {
9152 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9153 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9170 case AMDGPU::S_PACK_LH_B32_B16: {
9171 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9180 case AMDGPU::S_PACK_HL_B32_B16: {
9181 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9191 case AMDGPU::S_PACK_HH_B32_B16: {
9192 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9193 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9210 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9211 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9220 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9221 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9222 SmallVector<MachineInstr *, 4> CopyToDelete;
9225 for (MachineInstr &
MI :
9229 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9232 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9233 Register DestReg =
MI.getOperand(0).getReg();
9235 MRI.replaceRegWith(DestReg, NewCond);
9240 MI.getOperand(SCCIdx).setReg(NewCond);
9246 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9249 for (
auto &Copy : CopyToDelete)
9250 Copy->eraseFromParent();
9258void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9264 for (MachineInstr &
MI :
9267 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9269 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9278 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9286 case AMDGPU::REG_SEQUENCE:
9287 case AMDGPU::INSERT_SUBREG:
9289 case AMDGPU::SOFT_WQM:
9290 case AMDGPU::STRICT_WWM:
9291 case AMDGPU::STRICT_WQM: {
9293 if (RI.isAGPRClass(SrcRC)) {
9294 if (RI.isAGPRClass(NewDstRC))
9299 case AMDGPU::REG_SEQUENCE:
9300 case AMDGPU::INSERT_SUBREG:
9301 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9304 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9310 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9313 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9327 int OpIndices[3])
const {
9328 const MCInstrDesc &
Desc =
MI.getDesc();
9344 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9346 for (
unsigned i = 0; i < 3; ++i) {
9347 int Idx = OpIndices[i];
9351 const MachineOperand &MO =
MI.getOperand(Idx);
9357 const TargetRegisterClass *OpRC =
9358 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9359 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9365 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9366 if (RI.isSGPRClass(RegRC))
9384 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9385 SGPRReg = UsedSGPRs[0];
9388 if (!SGPRReg && UsedSGPRs[1]) {
9389 if (UsedSGPRs[1] == UsedSGPRs[2])
9390 SGPRReg = UsedSGPRs[1];
9397 AMDGPU::OpName OperandName)
const {
9398 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9401 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9405 return &
MI.getOperand(Idx);
9419 if (ST.isAmdHsaOS()) {
9422 RsrcDataFormat |= (1ULL << 56);
9427 RsrcDataFormat |= (2ULL << 59);
9430 return RsrcDataFormat;
9440 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9445 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9452 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9458 unsigned Opc =
MI.getOpcode();
9464 return get(
Opc).mayLoad() &&
9469 int &FrameIndex)
const {
9471 if (!Addr || !Addr->
isFI())
9482 int &FrameIndex)
const {
9490 int &FrameIndex)
const {
9504 int &FrameIndex)
const {
9521 while (++
I != E &&
I->isInsideBundle()) {
9522 assert(!
I->isBundle() &&
"No nested bundle!");
9530 unsigned Opc =
MI.getOpcode();
9532 unsigned DescSize =
Desc.getSize();
9537 unsigned Size = DescSize;
9541 if (
MI.isBranch() && ST.hasOffset3fBug())
9552 bool HasLiteral =
false;
9553 unsigned LiteralSize = 4;
9554 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9559 if (ST.has64BitLiterals()) {
9560 switch (OpInfo.OperandType) {
9576 return HasLiteral ? DescSize + LiteralSize : DescSize;
9581 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9585 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9586 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9590 case TargetOpcode::BUNDLE:
9592 case TargetOpcode::INLINEASM:
9593 case TargetOpcode::INLINEASM_BR: {
9595 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9599 if (
MI.isMetaInstruction())
9603 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9606 unsigned LoInstOpcode = D16Info->LoOp;
9608 DescSize =
Desc.getSize();
9612 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9615 DescSize =
Desc.getSize();
9626 if (
MI.memoperands_empty())
9638 static const std::pair<int, const char *> TargetIndices[] = {
9676std::pair<unsigned, unsigned>
9683 static const std::pair<unsigned, const char *> TargetFlags[] = {
9701 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9716 return AMDGPU::WWM_COPY;
9718 return AMDGPU::COPY;
9730 bool IsNullOrVectorRegister =
true;
9733 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9738 return IsNullOrVectorRegister &&
9740 (Opcode == AMDGPU::IMPLICIT_DEF &&
9742 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9743 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9751 if (ST.hasAddNoCarry())
9755 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9756 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9767 if (ST.hasAddNoCarry())
9771 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9773 : RS.scavengeRegisterBackwards(
9774 *RI.getBoolRC(),
I,
false,
9787 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9788 case AMDGPU::SI_KILL_I1_TERMINATOR:
9797 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9798 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9799 case AMDGPU::SI_KILL_I1_PSEUDO:
9800 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9812 const unsigned OffsetBits =
9814 return (1 << OffsetBits) - 1;
9821 if (
MI.isInlineAsm())
9824 for (
auto &
Op :
MI.implicit_operands()) {
9825 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9826 Op.setReg(AMDGPU::VCC_LO);
9835 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9839 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
9840 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9857 if (Imm <= MaxImm + 64) {
9859 Overflow = Imm - MaxImm;
9886 if (ST.hasRestrictedSOffset())
9929 if (!ST.hasFlatInstOffsets())
9937 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9949std::pair<int64_t, int64_t>
9952 int64_t RemainderOffset = COffsetVal;
9953 int64_t ImmField = 0;
9958 if (AllowNegative) {
9960 int64_t
D = 1LL << NumBits;
9961 RemainderOffset = (COffsetVal /
D) *
D;
9962 ImmField = COffsetVal - RemainderOffset;
9964 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9966 (ImmField % 4) != 0) {
9968 RemainderOffset += ImmField % 4;
9969 ImmField -= ImmField % 4;
9971 }
else if (COffsetVal >= 0) {
9973 RemainderOffset = COffsetVal - ImmField;
9977 assert(RemainderOffset + ImmField == COffsetVal);
9978 return {ImmField, RemainderOffset};
9982 if (ST.hasNegativeScratchOffsetBug() &&
9990 switch (ST.getGeneration()) {
10016 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10017 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10018 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10019 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10020 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10021 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10022 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10023 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10030#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10031 case OPCODE##_dpp: \
10032 case OPCODE##_e32: \
10033 case OPCODE##_e64: \
10034 case OPCODE##_e64_dpp: \
10035 case OPCODE##_sdwa:
10049 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10050 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10051 case AMDGPU::V_FMA_F16_gfx9_e64:
10052 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10053 case AMDGPU::V_INTERP_P2_F16:
10054 case AMDGPU::V_MAD_F16_e64:
10055 case AMDGPU::V_MAD_U16_e64:
10056 case AMDGPU::V_MAD_I16_e64:
10078 switch (ST.getGeneration()) {
10091 if (
isMAI(Opcode)) {
10099 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10106 if (ST.hasGFX90AInsts()) {
10108 if (ST.hasGFX940Insts())
10139 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10140 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10141 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10153 switch (
MI.getOpcode()) {
10155 case AMDGPU::REG_SEQUENCE:
10159 case AMDGPU::INSERT_SUBREG:
10160 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10177 if (!
P.Reg.isVirtual())
10181 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10182 while (
auto *
MI = DefInst) {
10184 switch (
MI->getOpcode()) {
10186 case AMDGPU::V_MOV_B32_e32: {
10187 auto &Op1 =
MI->getOperand(1);
10192 DefInst =
MRI.getVRegDef(RSR.Reg);
10200 DefInst =
MRI.getVRegDef(RSR.Reg);
10213 assert(
MRI.isSSA() &&
"Must be run on SSA");
10215 auto *
TRI =
MRI.getTargetRegisterInfo();
10216 auto *DefBB =
DefMI.getParent();
10220 if (
UseMI.getParent() != DefBB)
10223 const int MaxInstScan = 20;
10227 auto E =
UseMI.getIterator();
10228 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10229 if (
I->isDebugInstr())
10232 if (++NumInst > MaxInstScan)
10235 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10245 assert(
MRI.isSSA() &&
"Must be run on SSA");
10247 auto *
TRI =
MRI.getTargetRegisterInfo();
10248 auto *DefBB =
DefMI.getParent();
10250 const int MaxUseScan = 10;
10253 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10254 auto &UseInst = *
Use.getParent();
10257 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10260 if (++NumUse > MaxUseScan)
10267 const int MaxInstScan = 20;
10271 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10274 if (
I->isDebugInstr())
10277 if (++NumInst > MaxInstScan)
10290 if (Reg == VReg && --NumUse == 0)
10292 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10301 auto Cur =
MBB.begin();
10302 if (Cur !=
MBB.end())
10304 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10307 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10316 if (InsPt !=
MBB.end() &&
10317 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10318 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10319 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10320 InsPt->definesRegister(Src,
nullptr)) {
10324 .
addReg(Src, 0, SrcSubReg)
10349 if (isFullCopyInstr(
MI)) {
10350 Register DstReg =
MI.getOperand(0).getReg();
10351 Register SrcReg =
MI.getOperand(1).getReg();
10358 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10362 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10373 unsigned *PredCost)
const {
10374 if (
MI.isBundle()) {
10377 unsigned Lat = 0,
Count = 0;
10378 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10380 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10382 return Lat +
Count - 1;
10385 return SchedModel.computeInstrLatency(&
MI);
10391 unsigned Opcode =
MI.getOpcode();
10396 :
MI.getOperand(1).getReg();
10397 LLT DstTy =
MRI.getType(Dst);
10398 LLT SrcTy =
MRI.getType(Src);
10400 unsigned SrcAS = SrcTy.getAddressSpace();
10403 ST.hasGloballyAddressableScratch()
10411 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10412 return HandleAddrSpaceCast(
MI);
10415 auto IID = GI->getIntrinsicID();
10422 case Intrinsic::amdgcn_addrspacecast_nonnull:
10423 return HandleAddrSpaceCast(
MI);
10424 case Intrinsic::amdgcn_if:
10425 case Intrinsic::amdgcn_else:
10439 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10440 Opcode == AMDGPU::G_SEXTLOAD) {
10441 if (
MI.memoperands_empty())
10445 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10446 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10454 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10455 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10456 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10469 unsigned opcode =
MI.getOpcode();
10470 if (opcode == AMDGPU::V_READLANE_B32 ||
10471 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10472 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10475 if (isCopyInstr(
MI)) {
10479 RI.getPhysRegBaseClass(srcOp.
getReg());
10487 if (
MI.isPreISelOpcode())
10502 if (
MI.memoperands_empty())
10506 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10507 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10522 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10524 if (!
SrcOp.isReg())
10528 if (!Reg || !
SrcOp.readsReg())
10534 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10561 F,
"ds_ordered_count unsupported for this calling conv"));
10575 Register &SrcReg2, int64_t &CmpMask,
10576 int64_t &CmpValue)
const {
10577 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10580 switch (
MI.getOpcode()) {
10583 case AMDGPU::S_CMP_EQ_U32:
10584 case AMDGPU::S_CMP_EQ_I32:
10585 case AMDGPU::S_CMP_LG_U32:
10586 case AMDGPU::S_CMP_LG_I32:
10587 case AMDGPU::S_CMP_LT_U32:
10588 case AMDGPU::S_CMP_LT_I32:
10589 case AMDGPU::S_CMP_GT_U32:
10590 case AMDGPU::S_CMP_GT_I32:
10591 case AMDGPU::S_CMP_LE_U32:
10592 case AMDGPU::S_CMP_LE_I32:
10593 case AMDGPU::S_CMP_GE_U32:
10594 case AMDGPU::S_CMP_GE_I32:
10595 case AMDGPU::S_CMP_EQ_U64:
10596 case AMDGPU::S_CMP_LG_U64:
10597 SrcReg =
MI.getOperand(0).getReg();
10598 if (
MI.getOperand(1).isReg()) {
10599 if (
MI.getOperand(1).getSubReg())
10601 SrcReg2 =
MI.getOperand(1).getReg();
10603 }
else if (
MI.getOperand(1).isImm()) {
10605 CmpValue =
MI.getOperand(1).getImm();
10611 case AMDGPU::S_CMPK_EQ_U32:
10612 case AMDGPU::S_CMPK_EQ_I32:
10613 case AMDGPU::S_CMPK_LG_U32:
10614 case AMDGPU::S_CMPK_LG_I32:
10615 case AMDGPU::S_CMPK_LT_U32:
10616 case AMDGPU::S_CMPK_LT_I32:
10617 case AMDGPU::S_CMPK_GT_U32:
10618 case AMDGPU::S_CMPK_GT_I32:
10619 case AMDGPU::S_CMPK_LE_U32:
10620 case AMDGPU::S_CMPK_LE_I32:
10621 case AMDGPU::S_CMPK_GE_U32:
10622 case AMDGPU::S_CMPK_GE_I32:
10623 SrcReg =
MI.getOperand(0).getReg();
10625 CmpValue =
MI.getOperand(1).getImm();
10644 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10646 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10651 SccDef->setIsDead(
false);
10659 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10660 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10662 bool Op1IsNonZeroImm =
10663 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10664 bool Op2IsZeroImm =
10665 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10666 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10672 Register SrcReg2, int64_t CmpMask,
10681 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10713 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10714 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10720 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
10735 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10736 this](int64_t ExpectedValue,
unsigned SrcSize,
10737 bool IsReversible,
bool IsSigned) ->
bool {
10765 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10766 Def->getOpcode() != AMDGPU::S_AND_B64)
10770 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10781 SrcOp = &Def->getOperand(2);
10782 else if (isMask(&Def->getOperand(2)))
10783 SrcOp = &Def->getOperand(1);
10791 if (IsSigned && BitNo == SrcSize - 1)
10794 ExpectedValue <<= BitNo;
10796 bool IsReversedCC =
false;
10797 if (CmpValue != ExpectedValue) {
10800 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10805 Register DefReg = Def->getOperand(0).getReg();
10806 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10812 if (!
MRI->use_nodbg_empty(DefReg)) {
10820 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10821 : AMDGPU::S_BITCMP1_B32
10822 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10823 : AMDGPU::S_BITCMP1_B64;
10828 Def->eraseFromParent();
10836 case AMDGPU::S_CMP_EQ_U32:
10837 case AMDGPU::S_CMP_EQ_I32:
10838 case AMDGPU::S_CMPK_EQ_U32:
10839 case AMDGPU::S_CMPK_EQ_I32:
10840 return optimizeCmpAnd(1, 32,
true,
false);
10841 case AMDGPU::S_CMP_GE_U32:
10842 case AMDGPU::S_CMPK_GE_U32:
10843 return optimizeCmpAnd(1, 32,
false,
false);
10844 case AMDGPU::S_CMP_GE_I32:
10845 case AMDGPU::S_CMPK_GE_I32:
10846 return optimizeCmpAnd(1, 32,
false,
true);
10847 case AMDGPU::S_CMP_EQ_U64:
10848 return optimizeCmpAnd(1, 64,
true,
false);
10849 case AMDGPU::S_CMP_LG_U32:
10850 case AMDGPU::S_CMP_LG_I32:
10851 case AMDGPU::S_CMPK_LG_U32:
10852 case AMDGPU::S_CMPK_LG_I32:
10853 return optimizeCmpAnd(0, 32,
true,
false) || optimizeCmpSelect();
10854 case AMDGPU::S_CMP_GT_U32:
10855 case AMDGPU::S_CMPK_GT_U32:
10856 return optimizeCmpAnd(0, 32,
false,
false);
10857 case AMDGPU::S_CMP_GT_I32:
10858 case AMDGPU::S_CMPK_GT_I32:
10859 return optimizeCmpAnd(0, 32,
false,
true);
10860 case AMDGPU::S_CMP_LG_U64:
10861 return optimizeCmpAnd(0, 64,
true,
false) || optimizeCmpSelect();
10868 AMDGPU::OpName
OpName)
const {
10869 if (!ST.needsAlignedVGPRs())
10872 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10884 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10886 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10889 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10890 : &AMDGPU::VReg_64_Align2RegClass);
10892 .
addReg(DataReg, 0,
Op.getSubReg())
10897 Op.setSubReg(AMDGPU::sub0);
10919 unsigned Opcode =
MI.getOpcode();
10925 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10926 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10929 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.