33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO: {
1333 return MI.getOperand(0).getReg() == Reg;
1338 case AMDGPU::S_BREV_B32:
1339 case AMDGPU::V_BFREV_B32_e32:
1340 case AMDGPU::V_BFREV_B32_e64: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_NOT_B32:
1350 case AMDGPU::V_NOT_B32_e32:
1351 case AMDGPU::V_NOT_B32_e64: {
1354 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1355 return MI.getOperand(0).getReg() == Reg;
1367 if (RI.isAGPRClass(DstRC))
1368 return AMDGPU::COPY;
1369 if (RI.getRegSizeInBits(*DstRC) == 16) {
1372 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1374 if (RI.getRegSizeInBits(*DstRC) == 32)
1375 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1376 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1377 return AMDGPU::S_MOV_B64;
1378 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1379 return AMDGPU::V_MOV_B64_PSEUDO;
1380 return AMDGPU::COPY;
1385 bool IsIndirectSrc)
const {
1386 if (IsIndirectSrc) {
1388 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1390 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1392 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1394 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1396 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1409 if (VecSize <= 1024)
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1416 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1418 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1420 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1422 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1424 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1426 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1428 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1437 if (VecSize <= 1024)
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1445 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1447 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1449 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1451 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1453 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1455 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1457 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1466 if (VecSize <= 1024)
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1474 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1476 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1478 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1480 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1482 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1484 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1486 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1495 if (VecSize <= 1024)
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1510 if (VecSize <= 1024)
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1518 bool IsSGPR)
const {
1530 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1537 return AMDGPU::SI_SPILL_S32_SAVE;
1539 return AMDGPU::SI_SPILL_S64_SAVE;
1541 return AMDGPU::SI_SPILL_S96_SAVE;
1543 return AMDGPU::SI_SPILL_S128_SAVE;
1545 return AMDGPU::SI_SPILL_S160_SAVE;
1547 return AMDGPU::SI_SPILL_S192_SAVE;
1549 return AMDGPU::SI_SPILL_S224_SAVE;
1551 return AMDGPU::SI_SPILL_S256_SAVE;
1553 return AMDGPU::SI_SPILL_S288_SAVE;
1555 return AMDGPU::SI_SPILL_S320_SAVE;
1557 return AMDGPU::SI_SPILL_S352_SAVE;
1559 return AMDGPU::SI_SPILL_S384_SAVE;
1561 return AMDGPU::SI_SPILL_S512_SAVE;
1563 return AMDGPU::SI_SPILL_S1024_SAVE;
1572 return AMDGPU::SI_SPILL_V16_SAVE;
1574 return AMDGPU::SI_SPILL_V32_SAVE;
1576 return AMDGPU::SI_SPILL_V64_SAVE;
1578 return AMDGPU::SI_SPILL_V96_SAVE;
1580 return AMDGPU::SI_SPILL_V128_SAVE;
1582 return AMDGPU::SI_SPILL_V160_SAVE;
1584 return AMDGPU::SI_SPILL_V192_SAVE;
1586 return AMDGPU::SI_SPILL_V224_SAVE;
1588 return AMDGPU::SI_SPILL_V256_SAVE;
1590 return AMDGPU::SI_SPILL_V288_SAVE;
1592 return AMDGPU::SI_SPILL_V320_SAVE;
1594 return AMDGPU::SI_SPILL_V352_SAVE;
1596 return AMDGPU::SI_SPILL_V384_SAVE;
1598 return AMDGPU::SI_SPILL_V512_SAVE;
1600 return AMDGPU::SI_SPILL_V1024_SAVE;
1609 return AMDGPU::SI_SPILL_AV32_SAVE;
1611 return AMDGPU::SI_SPILL_AV64_SAVE;
1613 return AMDGPU::SI_SPILL_AV96_SAVE;
1615 return AMDGPU::SI_SPILL_AV128_SAVE;
1617 return AMDGPU::SI_SPILL_AV160_SAVE;
1619 return AMDGPU::SI_SPILL_AV192_SAVE;
1621 return AMDGPU::SI_SPILL_AV224_SAVE;
1623 return AMDGPU::SI_SPILL_AV256_SAVE;
1625 return AMDGPU::SI_SPILL_AV288_SAVE;
1627 return AMDGPU::SI_SPILL_AV320_SAVE;
1629 return AMDGPU::SI_SPILL_AV352_SAVE;
1631 return AMDGPU::SI_SPILL_AV384_SAVE;
1633 return AMDGPU::SI_SPILL_AV512_SAVE;
1635 return AMDGPU::SI_SPILL_AV1024_SAVE;
1642 bool IsVectorSuperClass) {
1647 if (IsVectorSuperClass)
1648 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1650 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1656 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1663 if (ST.hasMAIInsts())
1682 FrameInfo.getObjectAlign(FrameIndex));
1683 unsigned SpillSize = RI.getSpillSize(*RC);
1686 if (RI.isSGPRClass(RC)) {
1688 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1689 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1690 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1698 if (SrcReg.
isVirtual() && SpillSize == 4) {
1699 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1708 if (RI.spillSGPRToVGPR())
1728 return AMDGPU::SI_SPILL_S32_RESTORE;
1730 return AMDGPU::SI_SPILL_S64_RESTORE;
1732 return AMDGPU::SI_SPILL_S96_RESTORE;
1734 return AMDGPU::SI_SPILL_S128_RESTORE;
1736 return AMDGPU::SI_SPILL_S160_RESTORE;
1738 return AMDGPU::SI_SPILL_S192_RESTORE;
1740 return AMDGPU::SI_SPILL_S224_RESTORE;
1742 return AMDGPU::SI_SPILL_S256_RESTORE;
1744 return AMDGPU::SI_SPILL_S288_RESTORE;
1746 return AMDGPU::SI_SPILL_S320_RESTORE;
1748 return AMDGPU::SI_SPILL_S352_RESTORE;
1750 return AMDGPU::SI_SPILL_S384_RESTORE;
1752 return AMDGPU::SI_SPILL_S512_RESTORE;
1754 return AMDGPU::SI_SPILL_S1024_RESTORE;
1763 return AMDGPU::SI_SPILL_V16_RESTORE;
1765 return AMDGPU::SI_SPILL_V32_RESTORE;
1767 return AMDGPU::SI_SPILL_V64_RESTORE;
1769 return AMDGPU::SI_SPILL_V96_RESTORE;
1771 return AMDGPU::SI_SPILL_V128_RESTORE;
1773 return AMDGPU::SI_SPILL_V160_RESTORE;
1775 return AMDGPU::SI_SPILL_V192_RESTORE;
1777 return AMDGPU::SI_SPILL_V224_RESTORE;
1779 return AMDGPU::SI_SPILL_V256_RESTORE;
1781 return AMDGPU::SI_SPILL_V288_RESTORE;
1783 return AMDGPU::SI_SPILL_V320_RESTORE;
1785 return AMDGPU::SI_SPILL_V352_RESTORE;
1787 return AMDGPU::SI_SPILL_V384_RESTORE;
1789 return AMDGPU::SI_SPILL_V512_RESTORE;
1791 return AMDGPU::SI_SPILL_V1024_RESTORE;
1800 return AMDGPU::SI_SPILL_AV32_RESTORE;
1802 return AMDGPU::SI_SPILL_AV64_RESTORE;
1804 return AMDGPU::SI_SPILL_AV96_RESTORE;
1806 return AMDGPU::SI_SPILL_AV128_RESTORE;
1808 return AMDGPU::SI_SPILL_AV160_RESTORE;
1810 return AMDGPU::SI_SPILL_AV192_RESTORE;
1812 return AMDGPU::SI_SPILL_AV224_RESTORE;
1814 return AMDGPU::SI_SPILL_AV256_RESTORE;
1816 return AMDGPU::SI_SPILL_AV288_RESTORE;
1818 return AMDGPU::SI_SPILL_AV320_RESTORE;
1820 return AMDGPU::SI_SPILL_AV352_RESTORE;
1822 return AMDGPU::SI_SPILL_AV384_RESTORE;
1824 return AMDGPU::SI_SPILL_AV512_RESTORE;
1826 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1833 bool IsVectorSuperClass) {
1838 if (IsVectorSuperClass)
1839 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1841 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1847 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1854 if (ST.hasMAIInsts())
1857 assert(!RI.isAGPRClass(RC));
1871 unsigned SpillSize = RI.getSpillSize(*RC);
1878 FrameInfo.getObjectAlign(FrameIndex));
1880 if (RI.isSGPRClass(RC)) {
1882 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1883 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1884 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1889 if (DestReg.
isVirtual() && SpillSize == 4) {
1891 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1894 if (RI.spillSGPRToVGPR())
1920 unsigned Quantity)
const {
1922 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1923 while (Quantity > 0) {
1924 unsigned Arg = std::min(Quantity, MaxSNopCount);
1931 auto *MF =
MBB.getParent();
1934 assert(Info->isEntryFunction());
1936 if (
MBB.succ_empty()) {
1937 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1938 if (HasNoTerminator) {
1939 if (Info->returnsVoid()) {
1953 constexpr unsigned DoorbellIDMask = 0x3ff;
1954 constexpr unsigned ECQueueWaveAbort = 0x400;
1960 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1961 ContBB =
MBB.splitAt(
MI,
false);
1965 MBB.addSuccessor(TrapBB);
1972 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1976 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1979 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1980 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1984 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1985 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1986 .
addUse(DoorbellRegMasked)
1987 .
addImm(ECQueueWaveAbort);
1988 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1989 .
addUse(SetWaveAbortBit);
1992 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2007 switch (
MI.getOpcode()) {
2009 if (
MI.isMetaInstruction())
2014 return MI.getOperand(0).getImm() + 1;
2024 switch (
MI.getOpcode()) {
2026 case AMDGPU::S_MOV_B64_term:
2029 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2032 case AMDGPU::S_MOV_B32_term:
2035 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2038 case AMDGPU::S_XOR_B64_term:
2041 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2044 case AMDGPU::S_XOR_B32_term:
2047 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2049 case AMDGPU::S_OR_B64_term:
2052 MI.setDesc(
get(AMDGPU::S_OR_B64));
2054 case AMDGPU::S_OR_B32_term:
2057 MI.setDesc(
get(AMDGPU::S_OR_B32));
2060 case AMDGPU::S_ANDN2_B64_term:
2063 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2066 case AMDGPU::S_ANDN2_B32_term:
2069 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2072 case AMDGPU::S_AND_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_AND_B64));
2078 case AMDGPU::S_AND_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_AND_B32));
2084 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2090 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2096 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2097 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2100 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2101 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2103 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2107 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2110 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2113 int64_t Imm =
MI.getOperand(1).getImm();
2115 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2116 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2123 MI.eraseFromParent();
2129 case AMDGPU::V_MOV_B64_PSEUDO: {
2131 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2132 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2137 if (ST.hasMovB64()) {
2138 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2143 if (
SrcOp.isImm()) {
2145 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2146 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2168 if (ST.hasPkMovB32() &&
2189 MI.eraseFromParent();
2192 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2196 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2200 if (ST.has64BitLiterals()) {
2201 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2207 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2212 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2213 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2215 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2216 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2223 MI.eraseFromParent();
2226 case AMDGPU::V_SET_INACTIVE_B32: {
2230 .
add(
MI.getOperand(3))
2231 .
add(
MI.getOperand(4))
2232 .
add(
MI.getOperand(1))
2233 .
add(
MI.getOperand(2))
2234 .
add(
MI.getOperand(5));
2235 MI.eraseFromParent();
2238 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2239 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2240 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2241 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2242 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2243 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2244 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2245 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2246 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2247 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2248 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2249 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2250 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2251 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2252 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2253 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2254 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2255 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2256 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2257 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2258 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2259 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2260 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2261 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2262 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2263 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2270 if (RI.hasVGPRs(EltRC)) {
2271 Opc = AMDGPU::V_MOVRELD_B32_e32;
2273 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2274 : AMDGPU::S_MOVRELD_B32;
2279 bool IsUndef =
MI.getOperand(1).isUndef();
2280 unsigned SubReg =
MI.getOperand(3).getImm();
2281 assert(VecReg ==
MI.getOperand(1).getReg());
2286 .
add(
MI.getOperand(2))
2290 const int ImpDefIdx =
2292 const int ImpUseIdx = ImpDefIdx + 1;
2294 MI.eraseFromParent();
2297 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2309 assert(ST.useVGPRIndexMode());
2311 bool IsUndef =
MI.getOperand(1).isUndef();
2320 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2324 .
add(
MI.getOperand(2))
2329 const int ImpDefIdx =
2331 const int ImpUseIdx = ImpDefIdx + 1;
2338 MI.eraseFromParent();
2341 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2342 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2343 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2344 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2345 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2346 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2347 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2348 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2349 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2350 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2351 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2352 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2353 assert(ST.useVGPRIndexMode());
2356 bool IsUndef =
MI.getOperand(1).isUndef();
2374 MI.eraseFromParent();
2377 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2380 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2381 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2400 if (ST.hasGetPCZeroExtension()) {
2404 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2411 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2421 MI.eraseFromParent();
2424 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2434 Op.setOffset(
Op.getOffset() + 4);
2436 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2440 MI.eraseFromParent();
2443 case AMDGPU::ENTER_STRICT_WWM: {
2449 case AMDGPU::ENTER_STRICT_WQM: {
2456 MI.eraseFromParent();
2459 case AMDGPU::EXIT_STRICT_WWM:
2460 case AMDGPU::EXIT_STRICT_WQM: {
2466 case AMDGPU::SI_RETURN: {
2480 MI.eraseFromParent();
2484 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2485 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2486 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2489 case AMDGPU::S_GETPC_B64_pseudo:
2490 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2491 if (ST.hasGetPCZeroExtension()) {
2493 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2502 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2503 assert(ST.hasBF16PackedInsts());
2504 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2528 case AMDGPU::S_LOAD_DWORDX16_IMM:
2529 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2542 for (
auto &CandMO :
I->operands()) {
2543 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2551 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2555 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2559 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2561 unsigned NewOpcode = -1;
2562 if (SubregSize == 256)
2563 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2564 else if (SubregSize == 128)
2565 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2572 MRI.setRegClass(DestReg, NewRC);
2575 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2580 MI->getOperand(0).setReg(DestReg);
2581 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2585 OffsetMO->
setImm(FinalOffset);
2591 MI->setMemRefs(*MF, NewMMOs);
2604std::pair<MachineInstr*, MachineInstr*>
2606 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2608 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2611 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2612 return std::pair(&
MI,
nullptr);
2623 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2625 if (Dst.isPhysical()) {
2626 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2629 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2633 for (
unsigned I = 1;
I <= 2; ++
I) {
2636 if (
SrcOp.isImm()) {
2638 Imm.ashrInPlace(Part * 32);
2639 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2643 if (Src.isPhysical())
2644 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2651 MovDPP.addImm(MO.getImm());
2653 Split[Part] = MovDPP;
2657 if (Dst.isVirtual())
2664 MI.eraseFromParent();
2665 return std::pair(Split[0], Split[1]);
2668std::optional<DestSourcePair>
2670 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2673 return std::nullopt;
2677 AMDGPU::OpName Src0OpName,
2679 AMDGPU::OpName Src1OpName)
const {
2686 "All commutable instructions have both src0 and src1 modifiers");
2688 int Src0ModsVal = Src0Mods->
getImm();
2689 int Src1ModsVal = Src1Mods->
getImm();
2691 Src1Mods->
setImm(Src0ModsVal);
2692 Src0Mods->
setImm(Src1ModsVal);
2701 bool IsKill = RegOp.
isKill();
2703 bool IsUndef = RegOp.
isUndef();
2704 bool IsDebug = RegOp.
isDebug();
2706 if (NonRegOp.
isImm())
2708 else if (NonRegOp.
isFI())
2729 int64_t NonRegVal = NonRegOp1.
getImm();
2732 NonRegOp2.
setImm(NonRegVal);
2739 unsigned OpIdx1)
const {
2744 unsigned Opc =
MI.getOpcode();
2745 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2755 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2758 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2763 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2769 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2784 unsigned Src1Idx)
const {
2785 assert(!NewMI &&
"this should never be used");
2787 unsigned Opc =
MI.getOpcode();
2789 if (CommutedOpcode == -1)
2792 if (Src0Idx > Src1Idx)
2795 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2796 static_cast<int>(Src0Idx) &&
2797 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2798 static_cast<int>(Src1Idx) &&
2799 "inconsistency with findCommutedOpIndices");
2824 Src1, AMDGPU::OpName::src1_modifiers);
2827 AMDGPU::OpName::src1_sel);
2839 unsigned &SrcOpIdx0,
2840 unsigned &SrcOpIdx1)
const {
2845 unsigned &SrcOpIdx0,
2846 unsigned &SrcOpIdx1)
const {
2847 if (!
Desc.isCommutable())
2850 unsigned Opc =
Desc.getOpcode();
2851 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2855 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2859 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2863 int64_t BrOffset)
const {
2880 return MI.getOperand(0).getMBB();
2885 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2886 MI.getOpcode() == AMDGPU::SI_LOOP)
2898 "new block should be inserted for expanding unconditional branch");
2901 "restore block should be inserted for restoring clobbered registers");
2909 if (ST.hasAddPC64Inst()) {
2911 MCCtx.createTempSymbol(
"offset",
true);
2915 MCCtx.createTempSymbol(
"post_addpc",
true);
2916 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2920 Offset->setVariableValue(OffsetExpr);
2924 assert(RS &&
"RegScavenger required for long branching");
2928 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2932 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2933 ST.hasVALUReadSGPRHazard();
2934 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2935 if (FlushSGPRWrites)
2943 ApplyHazardWorkarounds();
2946 MCCtx.createTempSymbol(
"post_getpc",
true);
2950 MCCtx.createTempSymbol(
"offset_lo",
true);
2952 MCCtx.createTempSymbol(
"offset_hi",
true);
2955 .
addReg(PCReg, 0, AMDGPU::sub0)
2959 .
addReg(PCReg, 0, AMDGPU::sub1)
2961 ApplyHazardWorkarounds();
3002 if (LongBranchReservedReg) {
3003 RS->enterBasicBlock(
MBB);
3004 Scav = LongBranchReservedReg;
3006 RS->enterBasicBlockEnd(
MBB);
3007 Scav = RS->scavengeRegisterBackwards(
3012 RS->setRegUsed(Scav);
3013 MRI.replaceRegWith(PCReg, Scav);
3014 MRI.clearVirtRegs();
3020 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3021 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3022 MRI.clearVirtRegs();
3037unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3039 case SIInstrInfo::SCC_TRUE:
3040 return AMDGPU::S_CBRANCH_SCC1;
3041 case SIInstrInfo::SCC_FALSE:
3042 return AMDGPU::S_CBRANCH_SCC0;
3043 case SIInstrInfo::VCCNZ:
3044 return AMDGPU::S_CBRANCH_VCCNZ;
3045 case SIInstrInfo::VCCZ:
3046 return AMDGPU::S_CBRANCH_VCCZ;
3047 case SIInstrInfo::EXECNZ:
3048 return AMDGPU::S_CBRANCH_EXECNZ;
3049 case SIInstrInfo::EXECZ:
3050 return AMDGPU::S_CBRANCH_EXECZ;
3056SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3058 case AMDGPU::S_CBRANCH_SCC0:
3060 case AMDGPU::S_CBRANCH_SCC1:
3062 case AMDGPU::S_CBRANCH_VCCNZ:
3064 case AMDGPU::S_CBRANCH_VCCZ:
3066 case AMDGPU::S_CBRANCH_EXECNZ:
3068 case AMDGPU::S_CBRANCH_EXECZ:
3080 bool AllowModify)
const {
3081 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3083 TBB =
I->getOperand(0).getMBB();
3087 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3088 if (Pred == INVALID_BR)
3093 Cond.push_back(
I->getOperand(1));
3097 if (
I ==
MBB.end()) {
3103 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3105 FBB =
I->getOperand(0).getMBB();
3115 bool AllowModify)
const {
3123 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3124 switch (
I->getOpcode()) {
3125 case AMDGPU::S_MOV_B64_term:
3126 case AMDGPU::S_XOR_B64_term:
3127 case AMDGPU::S_OR_B64_term:
3128 case AMDGPU::S_ANDN2_B64_term:
3129 case AMDGPU::S_AND_B64_term:
3130 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3131 case AMDGPU::S_MOV_B32_term:
3132 case AMDGPU::S_XOR_B32_term:
3133 case AMDGPU::S_OR_B32_term:
3134 case AMDGPU::S_ANDN2_B32_term:
3135 case AMDGPU::S_AND_B32_term:
3136 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3139 case AMDGPU::SI_ELSE:
3140 case AMDGPU::SI_KILL_I1_TERMINATOR:
3141 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3158 int *BytesRemoved)
const {
3160 unsigned RemovedSize = 0;
3163 if (
MI.isBranch() ||
MI.isReturn()) {
3165 MI.eraseFromParent();
3171 *BytesRemoved = RemovedSize;
3188 int *BytesAdded)
const {
3189 if (!FBB &&
Cond.empty()) {
3193 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3200 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3212 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3230 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3237 if (
Cond.size() != 2) {
3241 if (
Cond[0].isImm()) {
3252 Register FalseReg,
int &CondCycles,
3253 int &TrueCycles,
int &FalseCycles)
const {
3259 if (
MRI.getRegClass(FalseReg) != RC)
3263 CondCycles = TrueCycles = FalseCycles = NumInsts;
3266 return RI.hasVGPRs(RC) && NumInsts <= 6;
3274 if (
MRI.getRegClass(FalseReg) != RC)
3280 if (NumInsts % 2 == 0)
3283 CondCycles = TrueCycles = FalseCycles = NumInsts;
3284 return RI.isSGPRClass(RC);
3295 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3296 if (Pred == VCCZ || Pred == SCC_FALSE) {
3297 Pred =
static_cast<BranchPredicate
>(-Pred);
3303 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3305 if (DstSize == 32) {
3307 if (Pred == SCC_TRUE) {
3322 if (DstSize == 64 && Pred == SCC_TRUE) {
3332 static const int16_t Sub0_15[] = {
3333 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3334 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3335 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3336 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3339 static const int16_t Sub0_15_64[] = {
3340 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3341 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3342 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3343 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3346 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3348 const int16_t *SubIndices = Sub0_15;
3349 int NElts = DstSize / 32;
3353 if (Pred == SCC_TRUE) {
3355 SelOp = AMDGPU::S_CSELECT_B32;
3356 EltRC = &AMDGPU::SGPR_32RegClass;
3358 SelOp = AMDGPU::S_CSELECT_B64;
3359 EltRC = &AMDGPU::SGPR_64RegClass;
3360 SubIndices = Sub0_15_64;
3366 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3371 for (
int Idx = 0; Idx != NElts; ++Idx) {
3372 Register DstElt =
MRI.createVirtualRegister(EltRC);
3375 unsigned SubIdx = SubIndices[Idx];
3378 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3381 .
addReg(FalseReg, 0, SubIdx)
3382 .
addReg(TrueReg, 0, SubIdx);
3386 .
addReg(TrueReg, 0, SubIdx)
3387 .
addReg(FalseReg, 0, SubIdx);
3399 switch (
MI.getOpcode()) {
3400 case AMDGPU::V_MOV_B16_t16_e32:
3401 case AMDGPU::V_MOV_B16_t16_e64:
3402 case AMDGPU::V_MOV_B32_e32:
3403 case AMDGPU::V_MOV_B32_e64:
3404 case AMDGPU::V_MOV_B64_PSEUDO:
3405 case AMDGPU::V_MOV_B64_e32:
3406 case AMDGPU::V_MOV_B64_e64:
3407 case AMDGPU::S_MOV_B32:
3408 case AMDGPU::S_MOV_B64:
3409 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3411 case AMDGPU::WWM_COPY:
3412 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3413 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3414 case AMDGPU::V_ACCVGPR_MOV_B32:
3415 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3416 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3424 switch (
MI.getOpcode()) {
3425 case AMDGPU::V_MOV_B16_t16_e32:
3426 case AMDGPU::V_MOV_B16_t16_e64:
3428 case AMDGPU::V_MOV_B32_e32:
3429 case AMDGPU::V_MOV_B32_e64:
3430 case AMDGPU::V_MOV_B64_PSEUDO:
3431 case AMDGPU::V_MOV_B64_e32:
3432 case AMDGPU::V_MOV_B64_e64:
3433 case AMDGPU::S_MOV_B32:
3434 case AMDGPU::S_MOV_B64:
3435 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3437 case AMDGPU::WWM_COPY:
3438 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3439 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3440 case AMDGPU::V_ACCVGPR_MOV_B32:
3441 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3442 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3450 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3451 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3452 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3455 unsigned Opc =
MI.getOpcode();
3457 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3459 MI.removeOperand(Idx);
3465 MI.setDesc(NewDesc);
3471 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3472 Desc.implicit_defs().size();
3474 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3475 MI.removeOperand(
I);
3479 unsigned SubRegIndex) {
3480 switch (SubRegIndex) {
3481 case AMDGPU::NoSubRegister:
3491 case AMDGPU::sub1_lo16:
3493 case AMDGPU::sub1_hi16:
3496 return std::nullopt;
3504 case AMDGPU::V_MAC_F16_e32:
3505 case AMDGPU::V_MAC_F16_e64:
3506 case AMDGPU::V_MAD_F16_e64:
3507 return AMDGPU::V_MADAK_F16;
3508 case AMDGPU::V_MAC_F32_e32:
3509 case AMDGPU::V_MAC_F32_e64:
3510 case AMDGPU::V_MAD_F32_e64:
3511 return AMDGPU::V_MADAK_F32;
3512 case AMDGPU::V_FMAC_F32_e32:
3513 case AMDGPU::V_FMAC_F32_e64:
3514 case AMDGPU::V_FMA_F32_e64:
3515 return AMDGPU::V_FMAAK_F32;
3516 case AMDGPU::V_FMAC_F16_e32:
3517 case AMDGPU::V_FMAC_F16_e64:
3518 case AMDGPU::V_FMAC_F16_t16_e64:
3519 case AMDGPU::V_FMAC_F16_fake16_e64:
3520 case AMDGPU::V_FMA_F16_e64:
3521 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3522 ? AMDGPU::V_FMAAK_F16_t16
3523 : AMDGPU::V_FMAAK_F16_fake16
3524 : AMDGPU::V_FMAAK_F16;
3525 case AMDGPU::V_FMAC_F64_e32:
3526 case AMDGPU::V_FMAC_F64_e64:
3527 case AMDGPU::V_FMA_F64_e64:
3528 return AMDGPU::V_FMAAK_F64;
3536 case AMDGPU::V_MAC_F16_e32:
3537 case AMDGPU::V_MAC_F16_e64:
3538 case AMDGPU::V_MAD_F16_e64:
3539 return AMDGPU::V_MADMK_F16;
3540 case AMDGPU::V_MAC_F32_e32:
3541 case AMDGPU::V_MAC_F32_e64:
3542 case AMDGPU::V_MAD_F32_e64:
3543 return AMDGPU::V_MADMK_F32;
3544 case AMDGPU::V_FMAC_F32_e32:
3545 case AMDGPU::V_FMAC_F32_e64:
3546 case AMDGPU::V_FMA_F32_e64:
3547 return AMDGPU::V_FMAMK_F32;
3548 case AMDGPU::V_FMAC_F16_e32:
3549 case AMDGPU::V_FMAC_F16_e64:
3550 case AMDGPU::V_FMAC_F16_t16_e64:
3551 case AMDGPU::V_FMAC_F16_fake16_e64:
3552 case AMDGPU::V_FMA_F16_e64:
3553 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3554 ? AMDGPU::V_FMAMK_F16_t16
3555 : AMDGPU::V_FMAMK_F16_fake16
3556 : AMDGPU::V_FMAMK_F16;
3557 case AMDGPU::V_FMAC_F64_e32:
3558 case AMDGPU::V_FMAC_F64_e64:
3559 case AMDGPU::V_FMA_F64_e64:
3560 return AMDGPU::V_FMAMK_F64;
3572 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3574 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3577 if (
Opc == AMDGPU::COPY) {
3578 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3585 if (HasMultipleUses) {
3588 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3591 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3599 if (ImmDefSize == 32 &&
3604 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3605 RI.getSubRegIdxSize(UseSubReg) == 16;
3608 if (RI.hasVGPRs(DstRC))
3611 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3617 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3624 for (
unsigned MovOp :
3625 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3626 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3634 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3638 if (MovDstPhysReg) {
3642 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3649 if (MovDstPhysReg) {
3650 if (!MovDstRC->
contains(MovDstPhysReg))
3652 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3666 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3674 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3678 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3680 UseMI.getOperand(0).setReg(MovDstPhysReg);
3685 UseMI.setDesc(NewMCID);
3686 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3687 UseMI.addImplicitDefUseOperands(*MF);
3691 if (HasMultipleUses)
3694 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3695 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3696 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3697 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3698 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3699 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3700 Opc == AMDGPU::V_FMAC_F64_e64) {
3709 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3724 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3725 if (!RegSrc->
isReg())
3727 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3728 ST.getConstantBusLimit(
Opc) < 2)
3731 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3743 if (Def && Def->isMoveImmediate() &&
3754 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3755 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3765 unsigned SrcSubReg = RegSrc->
getSubReg();
3770 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3771 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3772 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3773 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3774 UseMI.untieRegOperand(
3775 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3782 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3784 DefMI.eraseFromParent();
3791 if (ST.getConstantBusLimit(
Opc) < 2) {
3794 bool Src0Inlined =
false;
3795 if (Src0->
isReg()) {
3800 if (Def && Def->isMoveImmediate() &&
3805 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3812 if (Src1->
isReg() && !Src0Inlined) {
3815 if (Def && Def->isMoveImmediate() &&
3817 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3819 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3832 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3833 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3839 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3840 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3841 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3842 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3843 UseMI.untieRegOperand(
3844 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3846 const std::optional<int64_t> SubRegImm =
3860 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3862 DefMI.eraseFromParent();
3874 if (BaseOps1.
size() != BaseOps2.
size())
3876 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3877 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3885 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3886 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3887 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3889 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3892bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3895 int64_t Offset0, Offset1;
3898 bool Offset0IsScalable, Offset1IsScalable;
3912 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3913 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3920 "MIa must load from or modify a memory location");
3922 "MIb must load from or modify a memory location");
3944 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3951 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3961 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3975 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3986 if (
Reg.isPhysical())
3988 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3990 Imm = Def->getOperand(1).getImm();
4010 unsigned NumOps =
MI.getNumOperands();
4013 if (
Op.isReg() &&
Op.isKill())
4021 case AMDGPU::V_MAC_F16_e32:
4022 case AMDGPU::V_MAC_F16_e64:
4023 return AMDGPU::V_MAD_F16_e64;
4024 case AMDGPU::V_MAC_F32_e32:
4025 case AMDGPU::V_MAC_F32_e64:
4026 return AMDGPU::V_MAD_F32_e64;
4027 case AMDGPU::V_MAC_LEGACY_F32_e32:
4028 case AMDGPU::V_MAC_LEGACY_F32_e64:
4029 return AMDGPU::V_MAD_LEGACY_F32_e64;
4030 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4031 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4032 return AMDGPU::V_FMA_LEGACY_F32_e64;
4033 case AMDGPU::V_FMAC_F16_e32:
4034 case AMDGPU::V_FMAC_F16_e64:
4035 case AMDGPU::V_FMAC_F16_t16_e64:
4036 case AMDGPU::V_FMAC_F16_fake16_e64:
4037 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4038 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4039 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4040 : AMDGPU::V_FMA_F16_gfx9_e64;
4041 case AMDGPU::V_FMAC_F32_e32:
4042 case AMDGPU::V_FMAC_F32_e64:
4043 return AMDGPU::V_FMA_F32_e64;
4044 case AMDGPU::V_FMAC_F64_e32:
4045 case AMDGPU::V_FMAC_F64_e64:
4046 return AMDGPU::V_FMA_F64_e64;
4066 if (
MI.isBundle()) {
4069 if (
MI.getBundleSize() != 1)
4071 CandidateMI =
MI.getNextNode();
4075 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4079 if (
MI.isBundle()) {
4084 MI.untieRegOperand(MO.getOperandNo());
4092 if (Def.isEarlyClobber() && Def.isReg() &&
4097 auto UpdateDefIndex = [&](
LiveRange &LR) {
4098 auto *S = LR.find(OldIndex);
4099 if (S != LR.end() && S->start == OldIndex) {
4100 assert(S->valno && S->valno->def == OldIndex);
4101 S->start = NewIndex;
4102 S->valno->def = NewIndex;
4106 for (
auto &SR : LI.subranges())
4112 if (U.RemoveMIUse) {
4115 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4117 if (
MRI.hasOneNonDBGUse(DefReg)) {
4119 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4120 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4121 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4122 U.RemoveMIUse->removeOperand(
I);
4127 if (
MI.isBundle()) {
4131 if (MO.isReg() && MO.getReg() == DefReg) {
4132 assert(MO.getSubReg() == 0 &&
4133 "tied sub-registers in bundles currently not supported");
4134 MI.removeOperand(MO.getOperandNo());
4149 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4151 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4152 MIOp.setIsUndef(
true);
4153 MIOp.setReg(DummyReg);
4157 if (
MI.isBundle()) {
4161 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4162 MIOp.setIsUndef(
true);
4163 MIOp.setReg(DummyReg);
4176 return MI.isBundle() ? &
MI : NewMI;
4181 ThreeAddressUpdates &U)
const {
4183 unsigned Opc =
MI.getOpcode();
4187 if (NewMFMAOpc != -1) {
4190 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4191 MIB.
add(
MI.getOperand(
I));
4199 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4204 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4205 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4206 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4210 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4211 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4212 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4213 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4214 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4215 bool Src0Literal =
false;
4220 case AMDGPU::V_MAC_F16_e64:
4221 case AMDGPU::V_FMAC_F16_e64:
4222 case AMDGPU::V_FMAC_F16_t16_e64:
4223 case AMDGPU::V_FMAC_F16_fake16_e64:
4224 case AMDGPU::V_MAC_F32_e64:
4225 case AMDGPU::V_MAC_LEGACY_F32_e64:
4226 case AMDGPU::V_FMAC_F32_e64:
4227 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4228 case AMDGPU::V_FMAC_F64_e64:
4230 case AMDGPU::V_MAC_F16_e32:
4231 case AMDGPU::V_FMAC_F16_e32:
4232 case AMDGPU::V_MAC_F32_e32:
4233 case AMDGPU::V_MAC_LEGACY_F32_e32:
4234 case AMDGPU::V_FMAC_F32_e32:
4235 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4236 case AMDGPU::V_FMAC_F64_e32: {
4237 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4238 AMDGPU::OpName::src0);
4239 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4250 MachineInstrBuilder MIB;
4253 const MachineOperand *Src0Mods =
4256 const MachineOperand *Src1Mods =
4259 const MachineOperand *Src2Mods =
4265 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4266 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4268 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4270 MachineInstr *
DefMI;
4306 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4322 if (Src0Literal && !ST.hasVOP3Literal())
4350 switch (
MI.getOpcode()) {
4351 case AMDGPU::S_SET_GPR_IDX_ON:
4352 case AMDGPU::S_SET_GPR_IDX_MODE:
4353 case AMDGPU::S_SET_GPR_IDX_OFF:
4371 if (
MI.isTerminator() ||
MI.isPosition())
4375 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4378 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4384 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4385 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4386 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4387 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4388 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4393 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4394 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4395 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4403 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4412 if (
MI.memoperands_empty())
4417 unsigned AS = Memop->getAddrSpace();
4418 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4419 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4420 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4421 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4436 if (
MI.memoperands_empty())
4445 unsigned AS = Memop->getAddrSpace();
4462 if (ST.isTgSplitEnabled())
4467 if (
MI.memoperands_empty())
4472 unsigned AS = Memop->getAddrSpace();
4488 unsigned Opcode =
MI.getOpcode();
4503 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4504 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4505 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4508 if (
MI.isCall() ||
MI.isInlineAsm())
4524 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4525 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4526 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4527 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4535 if (
MI.isMetaInstruction())
4539 if (
MI.isCopyLike()) {
4540 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4544 return MI.readsRegister(AMDGPU::EXEC, &RI);
4555 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4559 switch (Imm.getBitWidth()) {
4565 ST.hasInv2PiInlineImm());
4568 ST.hasInv2PiInlineImm());
4570 return ST.has16BitInsts() &&
4572 ST.hasInv2PiInlineImm());
4579 APInt IntImm = Imm.bitcastToAPInt();
4581 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4589 return ST.has16BitInsts() &&
4592 return ST.has16BitInsts() &&
4602 switch (OperandType) {
4612 int32_t Trunc =
static_cast<int32_t
>(Imm);
4652 int16_t Trunc =
static_cast<int16_t
>(Imm);
4653 return ST.has16BitInsts() &&
4662 int16_t Trunc =
static_cast<int16_t
>(Imm);
4663 return ST.has16BitInsts() &&
4714 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4720 return ST.hasVOP3Literal();
4724 int64_t ImmVal)
const {
4727 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4728 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4729 AMDGPU::OpName::src2))
4731 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4743 "unexpected imm-like operand kind");
4756 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4774 AMDGPU::OpName
OpName)
const {
4776 return Mods && Mods->
getImm();
4789 switch (
MI.getOpcode()) {
4790 default:
return false;
4792 case AMDGPU::V_ADDC_U32_e64:
4793 case AMDGPU::V_SUBB_U32_e64:
4794 case AMDGPU::V_SUBBREV_U32_e64: {
4802 case AMDGPU::V_MAC_F16_e64:
4803 case AMDGPU::V_MAC_F32_e64:
4804 case AMDGPU::V_MAC_LEGACY_F32_e64:
4805 case AMDGPU::V_FMAC_F16_e64:
4806 case AMDGPU::V_FMAC_F16_t16_e64:
4807 case AMDGPU::V_FMAC_F16_fake16_e64:
4808 case AMDGPU::V_FMAC_F32_e64:
4809 case AMDGPU::V_FMAC_F64_e64:
4810 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4816 case AMDGPU::V_CNDMASK_B32_e64:
4822 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4852 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4861 unsigned Op32)
const {
4875 Inst32.
add(
MI.getOperand(
I));
4879 int Idx =
MI.getNumExplicitDefs();
4881 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4886 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4908 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4916 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4919 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4920 AMDGPU::SReg_64RegClass.contains(Reg);
4926 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4938 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4948 switch (MO.getReg()) {
4950 case AMDGPU::VCC_LO:
4951 case AMDGPU::VCC_HI:
4953 case AMDGPU::FLAT_SCR:
4966 switch (
MI.getOpcode()) {
4967 case AMDGPU::V_READLANE_B32:
4968 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4969 case AMDGPU::V_WRITELANE_B32:
4970 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4977 if (
MI.isPreISelOpcode() ||
4978 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4993 if (
SubReg.getReg().isPhysical())
4996 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5007 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5008 ErrInfo =
"illegal copy from vector register to SGPR";
5026 if (!
MRI.isSSA() &&
MI.isCopy())
5027 return verifyCopy(
MI,
MRI, ErrInfo);
5029 if (SIInstrInfo::isGenericOpcode(Opcode))
5032 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5033 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5034 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5036 if (Src0Idx == -1) {
5038 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5039 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5040 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5041 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5046 if (!
Desc.isVariadic() &&
5047 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5048 ErrInfo =
"Instruction has wrong number of operands.";
5052 if (
MI.isInlineAsm()) {
5065 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5066 ErrInfo =
"inlineasm operand has incorrect register class.";
5074 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5075 ErrInfo =
"missing memory operand from image instruction.";
5080 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5083 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5084 "all fp values to integers.";
5089 int16_t RegClass = getOpRegClassID(OpInfo);
5091 switch (OpInfo.OperandType) {
5093 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5094 ErrInfo =
"Illegal immediate value for operand.";
5128 ErrInfo =
"Illegal immediate value for operand.";
5135 ErrInfo =
"Expected inline constant for operand.";
5150 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5151 ErrInfo =
"Expected immediate, but got non-immediate";
5160 if (OpInfo.isGenericType())
5175 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5177 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5179 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5180 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5187 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5188 ErrInfo =
"Subtarget requires even aligned vector registers";
5193 if (RegClass != -1) {
5194 if (Reg.isVirtual())
5199 ErrInfo =
"Operand has incorrect register class.";
5207 if (!ST.hasSDWA()) {
5208 ErrInfo =
"SDWA is not supported on this target";
5212 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5213 AMDGPU::OpName::dst_sel}) {
5217 int64_t Imm = MO->
getImm();
5219 ErrInfo =
"Invalid SDWA selection";
5224 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5226 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5231 if (!ST.hasSDWAScalar()) {
5233 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5234 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5241 "Only reg allowed as operands in SDWA instructions on GFX9+";
5247 if (!ST.hasSDWAOmod()) {
5250 if (OMod !=
nullptr &&
5252 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5257 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5258 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5259 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5260 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5263 unsigned Mods = Src0ModsMO->
getImm();
5266 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5272 if (
isVOPC(BasicOpcode)) {
5273 if (!ST.hasSDWASdst() && DstIdx != -1) {
5276 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5277 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5280 }
else if (!ST.hasSDWAOutModsVOPC()) {
5283 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5284 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5290 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5291 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5298 if (DstUnused && DstUnused->isImm() &&
5301 if (!Dst.isReg() || !Dst.isTied()) {
5302 ErrInfo =
"Dst register should have tied register";
5307 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5310 "Dst register should be tied to implicit use of preserved register";
5314 ErrInfo =
"Dst register should use same physical register as preserved";
5321 if (
isImage(Opcode) && !
MI.mayStore()) {
5333 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5341 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5345 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5346 if (RegCount > DstSize) {
5347 ErrInfo =
"Image instruction returns too many registers for dst "
5356 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5357 unsigned ConstantBusCount = 0;
5358 bool UsesLiteral =
false;
5361 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5365 LiteralVal = &
MI.getOperand(ImmIdx);
5374 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5385 }
else if (!MO.
isFI()) {
5392 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5402 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5403 return !RI.regsOverlap(SGPRUsed, SGPR);
5412 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5413 Opcode != AMDGPU::V_WRITELANE_B32) {
5414 ErrInfo =
"VOP* instruction violates constant bus restriction";
5418 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5419 ErrInfo =
"VOP3 instruction uses literal";
5426 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5427 unsigned SGPRCount = 0;
5430 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5438 if (MO.
getReg() != SGPRUsed)
5443 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5444 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5451 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5452 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5459 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5469 ErrInfo =
"ABS not allowed in VOP3B instructions";
5482 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5489 if (
Desc.isBranch()) {
5491 ErrInfo =
"invalid branch target for SOPK instruction";
5498 ErrInfo =
"invalid immediate for SOPK instruction";
5503 ErrInfo =
"invalid immediate for SOPK instruction";
5510 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5511 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5512 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5513 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5514 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5515 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5517 const unsigned StaticNumOps =
5518 Desc.getNumOperands() +
Desc.implicit_uses().size();
5519 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5524 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5525 ErrInfo =
"missing implicit register operands";
5531 if (!Dst->isUse()) {
5532 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5537 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5538 UseOpIdx != StaticNumOps + 1) {
5539 ErrInfo =
"movrel implicit operands should be tied";
5546 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5548 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5549 ErrInfo =
"src0 should be subreg of implicit vector use";
5557 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5558 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5564 if (
MI.mayStore() &&
5569 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5570 ErrInfo =
"scalar stores must use m0 as offset register";
5576 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5578 if (
Offset->getImm() != 0) {
5579 ErrInfo =
"subtarget does not support offsets in flat instructions";
5584 if (
isDS(
MI) && !ST.hasGDS()) {
5586 if (GDSOp && GDSOp->
getImm() != 0) {
5587 ErrInfo =
"GDS is not supported on this subtarget";
5595 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5596 AMDGPU::OpName::vaddr0);
5597 AMDGPU::OpName RSrcOpName =
5598 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5599 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5607 ErrInfo =
"dim is out of range";
5612 if (ST.hasR128A16()) {
5614 IsA16 = R128A16->
getImm() != 0;
5615 }
else if (ST.hasA16()) {
5617 IsA16 = A16->
getImm() != 0;
5620 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5622 unsigned AddrWords =
5625 unsigned VAddrWords;
5627 VAddrWords = RsrcIdx - VAddr0Idx;
5628 if (ST.hasPartialNSAEncoding() &&
5630 unsigned LastVAddrIdx = RsrcIdx - 1;
5631 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5639 if (VAddrWords != AddrWords) {
5641 <<
" but got " << VAddrWords <<
"\n");
5642 ErrInfo =
"bad vaddr size";
5652 unsigned DC = DppCt->
getImm();
5653 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5654 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5655 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5656 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5657 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5658 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5659 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5660 ErrInfo =
"Invalid dpp_ctrl value";
5663 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5665 ErrInfo =
"Invalid dpp_ctrl value: "
5666 "wavefront shifts are not supported on GFX10+";
5669 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5671 ErrInfo =
"Invalid dpp_ctrl value: "
5672 "broadcasts are not supported on GFX10+";
5675 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5677 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5678 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5679 !ST.hasGFX90AInsts()) {
5680 ErrInfo =
"Invalid dpp_ctrl value: "
5681 "row_newbroadcast/row_share is not supported before "
5685 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5686 ErrInfo =
"Invalid dpp_ctrl value: "
5687 "row_share and row_xmask are not supported before GFX10";
5692 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5695 ErrInfo =
"Invalid dpp_ctrl value: "
5696 "DP ALU dpp only support row_newbcast";
5703 AMDGPU::OpName DataName =
5704 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5710 if (ST.hasGFX90AInsts()) {
5711 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5712 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5713 ErrInfo =
"Invalid register class: "
5714 "vdata and vdst should be both VGPR or AGPR";
5717 if (
Data && Data2 &&
5719 ErrInfo =
"Invalid register class: "
5720 "both data operands should be VGPR or AGPR";
5724 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5726 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5727 ErrInfo =
"Invalid register class: "
5728 "agpr loads and stores not supported on this GPU";
5734 if (ST.needsAlignedVGPRs()) {
5735 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5740 if (Reg.isPhysical())
5741 return !(RI.getHWRegIndex(Reg) & 1);
5743 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5744 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5747 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5748 Opcode == AMDGPU::DS_GWS_BARRIER) {
5750 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5751 ErrInfo =
"Subtarget requires even aligned vector registers "
5752 "for DS_GWS instructions";
5758 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5759 ErrInfo =
"Subtarget requires even aligned vector registers "
5760 "for vaddr operand of image instructions";
5766 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5768 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5769 ErrInfo =
"Invalid register class: "
5770 "v_accvgpr_write with an SGPR is not supported on this GPU";
5775 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5778 ErrInfo =
"pseudo expects only physical SGPRs";
5785 if (!ST.hasScaleOffset()) {
5786 ErrInfo =
"Subtarget does not support offset scaling";
5790 ErrInfo =
"Instruction does not support offset scaling";
5799 for (
unsigned I = 0;
I < 3; ++
I) {
5812 switch (
MI.getOpcode()) {
5813 default:
return AMDGPU::INSTRUCTION_LIST_END;
5814 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5815 case AMDGPU::COPY:
return AMDGPU::COPY;
5816 case AMDGPU::PHI:
return AMDGPU::PHI;
5817 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5818 case AMDGPU::WQM:
return AMDGPU::WQM;
5819 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5820 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5821 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5822 case AMDGPU::S_MOV_B32: {
5824 return MI.getOperand(1).isReg() ||
5825 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5826 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5828 case AMDGPU::S_ADD_I32:
5829 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5830 case AMDGPU::S_ADDC_U32:
5831 return AMDGPU::V_ADDC_U32_e32;
5832 case AMDGPU::S_SUB_I32:
5833 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5836 case AMDGPU::S_ADD_U32:
5837 return AMDGPU::V_ADD_CO_U32_e32;
5838 case AMDGPU::S_SUB_U32:
5839 return AMDGPU::V_SUB_CO_U32_e32;
5840 case AMDGPU::S_ADD_U64_PSEUDO:
5841 return AMDGPU::V_ADD_U64_PSEUDO;
5842 case AMDGPU::S_SUB_U64_PSEUDO:
5843 return AMDGPU::V_SUB_U64_PSEUDO;
5844 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5845 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5846 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5847 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5848 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5849 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5850 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5851 case AMDGPU::S_XNOR_B32:
5852 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5853 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5854 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5855 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5856 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5857 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5858 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5859 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5860 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5861 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5862 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5863 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5864 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5865 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5866 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5867 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5868 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5869 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5870 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5871 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5872 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5873 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5874 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5875 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5876 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5877 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5878 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5879 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5880 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5881 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5882 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5883 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5884 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5885 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5886 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5887 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5888 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5889 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5890 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5891 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5892 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5893 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5894 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5895 case AMDGPU::S_CVT_F32_F16:
5896 case AMDGPU::S_CVT_HI_F32_F16:
5897 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5898 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5899 case AMDGPU::S_CVT_F16_F32:
5900 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5901 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5902 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5903 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5904 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5905 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5906 case AMDGPU::S_CEIL_F16:
5907 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5908 : AMDGPU::V_CEIL_F16_fake16_e64;
5909 case AMDGPU::S_FLOOR_F16:
5910 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5911 : AMDGPU::V_FLOOR_F16_fake16_e64;
5912 case AMDGPU::S_TRUNC_F16:
5913 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5914 : AMDGPU::V_TRUNC_F16_fake16_e64;
5915 case AMDGPU::S_RNDNE_F16:
5916 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5917 : AMDGPU::V_RNDNE_F16_fake16_e64;
5918 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5919 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5920 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5921 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5922 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5923 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5924 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5925 case AMDGPU::S_ADD_F16:
5926 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5927 : AMDGPU::V_ADD_F16_fake16_e64;
5928 case AMDGPU::S_SUB_F16:
5929 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5930 : AMDGPU::V_SUB_F16_fake16_e64;
5931 case AMDGPU::S_MIN_F16:
5932 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5933 : AMDGPU::V_MIN_F16_fake16_e64;
5934 case AMDGPU::S_MAX_F16:
5935 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5936 : AMDGPU::V_MAX_F16_fake16_e64;
5937 case AMDGPU::S_MINIMUM_F16:
5938 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5939 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5940 case AMDGPU::S_MAXIMUM_F16:
5941 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5942 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5943 case AMDGPU::S_MUL_F16:
5944 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5945 : AMDGPU::V_MUL_F16_fake16_e64;
5946 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5947 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5948 case AMDGPU::S_FMAC_F16:
5949 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5950 : AMDGPU::V_FMAC_F16_fake16_e64;
5951 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5952 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5953 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5954 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5955 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5956 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5957 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5958 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5959 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5960 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5961 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5962 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5963 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5964 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5965 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5966 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5967 case AMDGPU::S_CMP_LT_F16:
5968 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5969 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5970 case AMDGPU::S_CMP_EQ_F16:
5971 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5972 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5973 case AMDGPU::S_CMP_LE_F16:
5974 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5975 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5976 case AMDGPU::S_CMP_GT_F16:
5977 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5978 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5979 case AMDGPU::S_CMP_LG_F16:
5980 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5981 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5982 case AMDGPU::S_CMP_GE_F16:
5983 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5984 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5985 case AMDGPU::S_CMP_O_F16:
5986 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5987 : AMDGPU::V_CMP_O_F16_fake16_e64;
5988 case AMDGPU::S_CMP_U_F16:
5989 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5990 : AMDGPU::V_CMP_U_F16_fake16_e64;
5991 case AMDGPU::S_CMP_NGE_F16:
5992 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5993 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5994 case AMDGPU::S_CMP_NLG_F16:
5995 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5996 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5997 case AMDGPU::S_CMP_NGT_F16:
5998 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5999 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6000 case AMDGPU::S_CMP_NLE_F16:
6001 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6002 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6003 case AMDGPU::S_CMP_NEQ_F16:
6004 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6005 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6006 case AMDGPU::S_CMP_NLT_F16:
6007 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6008 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6009 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6010 case AMDGPU::V_S_EXP_F16_e64:
6011 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6012 : AMDGPU::V_EXP_F16_fake16_e64;
6013 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6014 case AMDGPU::V_S_LOG_F16_e64:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6016 : AMDGPU::V_LOG_F16_fake16_e64;
6017 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6018 case AMDGPU::V_S_RCP_F16_e64:
6019 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6020 : AMDGPU::V_RCP_F16_fake16_e64;
6021 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6022 case AMDGPU::V_S_RSQ_F16_e64:
6023 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6024 : AMDGPU::V_RSQ_F16_fake16_e64;
6025 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6026 case AMDGPU::V_S_SQRT_F16_e64:
6027 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6028 : AMDGPU::V_SQRT_F16_fake16_e64;
6031 "Unexpected scalar opcode without corresponding vector one!");
6080 "Not a whole wave func");
6083 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6084 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6091 unsigned OpNo)
const {
6093 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6094 Desc.operands()[OpNo].RegClass == -1) {
6097 if (Reg.isVirtual()) {
6099 return MRI.getRegClass(Reg);
6101 return RI.getPhysRegBaseClass(Reg);
6104 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6105 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6113 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6115 unsigned Size = RI.getRegSizeInBits(*RC);
6116 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6117 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6118 : AMDGPU::V_MOV_B32_e32;
6120 Opcode = AMDGPU::COPY;
6121 else if (RI.isSGPRClass(RC))
6122 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6136 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6142 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6153 if (SubIdx == AMDGPU::sub0)
6155 if (SubIdx == AMDGPU::sub1)
6167void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6183 if (Reg.isPhysical())
6193 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6196 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6203 unsigned Opc =
MI.getOpcode();
6209 constexpr AMDGPU::OpName OpNames[] = {
6210 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6213 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6214 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6224 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6225 if (IsAGPR && !ST.hasMAIInsts())
6227 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6231 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6232 const int DataIdx = AMDGPU::getNamedOperandIdx(
6233 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6234 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6235 MI.getOperand(DataIdx).isReg() &&
6236 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6238 if ((
int)
OpIdx == DataIdx) {
6239 if (VDstIdx != -1 &&
6240 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6243 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6244 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6245 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6250 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6251 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6271 constexpr unsigned NumOps = 3;
6272 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6273 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6274 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6275 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6280 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6283 MO = &
MI.getOperand(SrcIdx);
6290 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6294 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6298 return !OpSel && !OpSelHi;
6307 int64_t RegClass = getOpRegClassID(OpInfo);
6309 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6318 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6319 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6323 if (!LiteralLimit--)
6333 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6341 if (--ConstantBusLimit <= 0)
6353 if (!LiteralLimit--)
6355 if (--ConstantBusLimit <= 0)
6361 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6365 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6367 !
Op.isIdenticalTo(*MO))
6377 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6391 bool Is64BitOp = Is64BitFPOp ||
6398 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6407 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6425 bool IsGFX950Only = ST.hasGFX950Insts();
6426 bool IsGFX940Only = ST.hasGFX940Insts();
6428 if (!IsGFX950Only && !IsGFX940Only)
6446 unsigned Opcode =
MI.getOpcode();
6448 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6449 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6450 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6451 case AMDGPU::V_MQSAD_U32_U8_e64:
6452 case AMDGPU::V_PK_ADD_F16:
6453 case AMDGPU::V_PK_ADD_F32:
6454 case AMDGPU::V_PK_ADD_I16:
6455 case AMDGPU::V_PK_ADD_U16:
6456 case AMDGPU::V_PK_ASHRREV_I16:
6457 case AMDGPU::V_PK_FMA_F16:
6458 case AMDGPU::V_PK_FMA_F32:
6459 case AMDGPU::V_PK_FMAC_F16_e32:
6460 case AMDGPU::V_PK_FMAC_F16_e64:
6461 case AMDGPU::V_PK_LSHLREV_B16:
6462 case AMDGPU::V_PK_LSHRREV_B16:
6463 case AMDGPU::V_PK_MAD_I16:
6464 case AMDGPU::V_PK_MAD_U16:
6465 case AMDGPU::V_PK_MAX_F16:
6466 case AMDGPU::V_PK_MAX_I16:
6467 case AMDGPU::V_PK_MAX_U16:
6468 case AMDGPU::V_PK_MIN_F16:
6469 case AMDGPU::V_PK_MIN_I16:
6470 case AMDGPU::V_PK_MIN_U16:
6471 case AMDGPU::V_PK_MOV_B32:
6472 case AMDGPU::V_PK_MUL_F16:
6473 case AMDGPU::V_PK_MUL_F32:
6474 case AMDGPU::V_PK_MUL_LO_U16:
6475 case AMDGPU::V_PK_SUB_I16:
6476 case AMDGPU::V_PK_SUB_U16:
6477 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6486 unsigned Opc =
MI.getOpcode();
6489 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6492 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6498 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6505 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6508 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6514 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6524 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6525 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6526 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6538 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6540 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6552 if (HasImplicitSGPR || !
MI.isCommutable()) {
6569 if (CommutedOpc == -1) {
6574 MI.setDesc(
get(CommutedOpc));
6578 bool Src0Kill = Src0.
isKill();
6582 else if (Src1.
isReg()) {
6597 unsigned Opc =
MI.getOpcode();
6600 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6601 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6602 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6605 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6606 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6607 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6608 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6609 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6610 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6611 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6615 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6616 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6621 if (VOP3Idx[2] != -1) {
6623 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6624 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6633 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6634 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6636 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6638 SGPRsUsed.
insert(SGPRReg);
6642 for (
int Idx : VOP3Idx) {
6651 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6663 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6670 if (ConstantBusLimit > 0) {
6682 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6683 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6689 for (
unsigned I = 0;
I < 3; ++
I) {
6702 SRC = RI.getCommonSubClass(SRC, DstRC);
6705 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6707 if (RI.hasAGPRs(VRC)) {
6708 VRC = RI.getEquivalentVGPRClass(VRC);
6709 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6711 get(TargetOpcode::COPY), NewSrcReg)
6718 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6724 for (
unsigned i = 0; i < SubRegs; ++i) {
6725 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6727 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6728 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6734 get(AMDGPU::REG_SEQUENCE), DstReg);
6735 for (
unsigned i = 0; i < SubRegs; ++i) {
6737 MIB.
addImm(RI.getSubRegFromChannel(i));
6750 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6752 SBase->setReg(SGPR);
6755 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6763 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6764 if (OldSAddrIdx < 0)
6780 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6781 if (NewVAddrIdx < 0)
6784 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6788 if (OldVAddrIdx >= 0) {
6790 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6802 if (OldVAddrIdx == NewVAddrIdx) {
6805 MRI.removeRegOperandFromUseList(&NewVAddr);
6806 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6810 MRI.removeRegOperandFromUseList(&NewVAddr);
6811 MRI.addRegOperandToUseList(&NewVAddr);
6813 assert(OldSAddrIdx == NewVAddrIdx);
6815 if (OldVAddrIdx >= 0) {
6816 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6817 AMDGPU::OpName::vdst_in);
6821 if (NewVDstIn != -1) {
6822 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6828 if (NewVDstIn != -1) {
6829 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6850 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6870 unsigned OpSubReg =
Op.getSubReg();
6873 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6879 Register DstReg =
MRI.createVirtualRegister(DstRC);
6889 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6892 bool ImpDef = Def->isImplicitDef();
6893 while (!ImpDef && Def && Def->isCopy()) {
6894 if (Def->getOperand(1).getReg().isPhysical())
6896 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6897 ImpDef = Def && Def->isImplicitDef();
6899 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6918 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6924 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6925 unsigned NumSubRegs =
RegSize / 32;
6926 Register VScalarOp = ScalarOp->getReg();
6928 if (NumSubRegs == 1) {
6929 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6931 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6934 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6936 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6942 CondReg = NewCondReg;
6944 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6952 ScalarOp->setReg(CurReg);
6953 ScalarOp->setIsKill();
6957 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6958 "Unhandled register size");
6960 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6962 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6964 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6967 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6968 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6971 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6972 .
addReg(VScalarOp, VScalarOpUndef,
6973 TRI->getSubRegFromChannel(Idx + 1));
6979 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6980 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6986 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6987 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6990 if (NumSubRegs <= 2)
6991 Cmp.addReg(VScalarOp);
6993 Cmp.addReg(VScalarOp, VScalarOpUndef,
6994 TRI->getSubRegFromChannel(Idx, 2));
6998 CondReg = NewCondReg;
7000 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7008 const auto *SScalarOpRC =
7009 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7010 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7014 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7015 unsigned Channel = 0;
7016 for (
Register Piece : ReadlanePieces) {
7017 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7021 ScalarOp->setReg(SScalarOp);
7022 ScalarOp->setIsKill();
7026 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7027 MRI.setSimpleHint(SaveExec, CondReg);
7058 if (!Begin.isValid())
7060 if (!End.isValid()) {
7066 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7074 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7075 std::numeric_limits<unsigned>::max()) !=
7078 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7084 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7093 for (
auto I = Begin;
I != AfterMI;
I++) {
7094 for (
auto &MO :
I->all_uses())
7095 MRI.clearKillFlags(MO.getReg());
7120 MBB.addSuccessor(LoopBB);
7130 for (
auto &Succ : RemainderBB->
successors()) {
7154static std::tuple<unsigned, unsigned>
7162 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7163 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7166 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7167 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7168 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7169 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7170 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7187 .
addImm(AMDGPU::sub0_sub1)
7193 return std::tuple(RsrcPtr, NewSRsrc);
7230 if (
MI.getOpcode() == AMDGPU::PHI) {
7232 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7233 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7236 MRI.getRegClass(
MI.getOperand(i).getReg());
7237 if (RI.hasVectorRegisters(OpRC)) {
7251 VRC = &AMDGPU::VReg_1RegClass;
7254 ? RI.getEquivalentAGPRClass(SRC)
7255 : RI.getEquivalentVGPRClass(SRC);
7258 ? RI.getEquivalentAGPRClass(VRC)
7259 : RI.getEquivalentVGPRClass(VRC);
7267 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7269 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7285 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7288 if (RI.hasVGPRs(DstRC)) {
7292 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7294 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7312 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7317 if (DstRC != Src0RC) {
7326 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7328 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7334 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7335 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7336 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7337 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7338 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7339 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7340 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7342 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7355 ? AMDGPU::OpName::rsrc
7356 : AMDGPU::OpName::srsrc;
7358 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7361 AMDGPU::OpName SampOpName =
7362 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7364 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7371 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7373 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7377 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7378 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7383 while (Start->getOpcode() != FrameSetupOpcode)
7386 while (End->getOpcode() != FrameDestroyOpcode)
7390 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7391 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7399 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7401 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7403 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7413 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7414 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7415 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7416 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7418 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7425 bool isSoffsetLegal =
true;
7427 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7428 if (SoffsetIdx != -1) {
7431 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7432 isSoffsetLegal =
false;
7436 bool isRsrcLegal =
true;
7438 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7439 if (RsrcIdx != -1) {
7442 isRsrcLegal =
false;
7446 if (isRsrcLegal && isSoffsetLegal)
7470 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7471 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7472 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7474 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7475 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7476 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7478 unsigned RsrcPtr, NewSRsrc;
7485 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7492 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7506 }
else if (!VAddr && ST.hasAddr64()) {
7510 "FIXME: Need to emit flat atomics here");
7512 unsigned RsrcPtr, NewSRsrc;
7515 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7538 MIB.
addImm(CPol->getImm());
7543 MIB.
addImm(TFE->getImm());
7563 MI.removeFromParent();
7568 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7570 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7574 if (!isSoffsetLegal) {
7586 if (!isSoffsetLegal) {
7598 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7599 if (RsrcIdx != -1) {
7600 DeferredList.insert(
MI);
7605 return DeferredList.contains(
MI);
7615 if (!ST.useRealTrue16Insts())
7618 unsigned Opcode =
MI.getOpcode();
7622 OpIdx >=
get(Opcode).getNumOperands() ||
7623 get(Opcode).operands()[
OpIdx].RegClass == -1)
7627 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7631 if (!RI.isVGPRClass(CurrRC))
7634 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7636 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7637 Op.setSubReg(AMDGPU::lo16);
7638 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7640 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7641 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7648 Op.setReg(NewDstReg);
7660 while (!Worklist.
empty()) {
7674 "Deferred MachineInstr are not supposed to re-populate worklist");
7694 case AMDGPU::S_ADD_I32:
7695 case AMDGPU::S_SUB_I32: {
7699 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7707 case AMDGPU::S_MUL_U64:
7708 if (ST.hasVectorMulU64()) {
7709 NewOpcode = AMDGPU::V_MUL_U64_e64;
7713 splitScalarSMulU64(Worklist, Inst, MDT);
7717 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7718 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7721 splitScalarSMulPseudo(Worklist, Inst, MDT);
7725 case AMDGPU::S_AND_B64:
7726 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7730 case AMDGPU::S_OR_B64:
7731 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7735 case AMDGPU::S_XOR_B64:
7736 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7740 case AMDGPU::S_NAND_B64:
7741 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7745 case AMDGPU::S_NOR_B64:
7746 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7750 case AMDGPU::S_XNOR_B64:
7751 if (ST.hasDLInsts())
7752 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7754 splitScalar64BitXnor(Worklist, Inst, MDT);
7758 case AMDGPU::S_ANDN2_B64:
7759 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7763 case AMDGPU::S_ORN2_B64:
7764 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7768 case AMDGPU::S_BREV_B64:
7769 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7773 case AMDGPU::S_NOT_B64:
7774 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7778 case AMDGPU::S_BCNT1_I32_B64:
7779 splitScalar64BitBCNT(Worklist, Inst);
7783 case AMDGPU::S_BFE_I64:
7784 splitScalar64BitBFE(Worklist, Inst);
7788 case AMDGPU::S_FLBIT_I32_B64:
7789 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7792 case AMDGPU::S_FF1_I32_B64:
7793 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7797 case AMDGPU::S_LSHL_B32:
7798 if (ST.hasOnlyRevVALUShifts()) {
7799 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7803 case AMDGPU::S_ASHR_I32:
7804 if (ST.hasOnlyRevVALUShifts()) {
7805 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7809 case AMDGPU::S_LSHR_B32:
7810 if (ST.hasOnlyRevVALUShifts()) {
7811 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7815 case AMDGPU::S_LSHL_B64:
7816 if (ST.hasOnlyRevVALUShifts()) {
7818 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7819 : AMDGPU::V_LSHLREV_B64_e64;
7823 case AMDGPU::S_ASHR_I64:
7824 if (ST.hasOnlyRevVALUShifts()) {
7825 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7829 case AMDGPU::S_LSHR_B64:
7830 if (ST.hasOnlyRevVALUShifts()) {
7831 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7836 case AMDGPU::S_ABS_I32:
7837 lowerScalarAbs(Worklist, Inst);
7841 case AMDGPU::S_ABSDIFF_I32:
7842 lowerScalarAbsDiff(Worklist, Inst);
7846 case AMDGPU::S_CBRANCH_SCC0:
7847 case AMDGPU::S_CBRANCH_SCC1: {
7850 bool IsSCC = CondReg == AMDGPU::SCC;
7858 case AMDGPU::S_BFE_U64:
7859 case AMDGPU::S_BFM_B64:
7862 case AMDGPU::S_PACK_LL_B32_B16:
7863 case AMDGPU::S_PACK_LH_B32_B16:
7864 case AMDGPU::S_PACK_HL_B32_B16:
7865 case AMDGPU::S_PACK_HH_B32_B16:
7866 movePackToVALU(Worklist,
MRI, Inst);
7870 case AMDGPU::S_XNOR_B32:
7871 lowerScalarXnor(Worklist, Inst);
7875 case AMDGPU::S_NAND_B32:
7876 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7880 case AMDGPU::S_NOR_B32:
7881 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7885 case AMDGPU::S_ANDN2_B32:
7886 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7890 case AMDGPU::S_ORN2_B32:
7891 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7899 case AMDGPU::S_ADD_CO_PSEUDO:
7900 case AMDGPU::S_SUB_CO_PSEUDO: {
7901 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7902 ? AMDGPU::V_ADDC_U32_e64
7903 : AMDGPU::V_SUBB_U32_e64;
7904 const auto *CarryRC = RI.getWaveMaskRegClass();
7907 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7908 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7915 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7926 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7930 case AMDGPU::S_UADDO_PSEUDO:
7931 case AMDGPU::S_USUBO_PSEUDO: {
7937 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7938 ? AMDGPU::V_ADD_CO_U32_e64
7939 : AMDGPU::V_SUB_CO_U32_e64;
7941 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7942 Register DestReg =
MRI.createVirtualRegister(NewRC);
7950 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7951 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7955 case AMDGPU::S_LSHL1_ADD_U32:
7956 case AMDGPU::S_LSHL2_ADD_U32:
7957 case AMDGPU::S_LSHL3_ADD_U32:
7958 case AMDGPU::S_LSHL4_ADD_U32: {
7962 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
7963 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
7964 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
7968 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
7969 Register DestReg =
MRI.createVirtualRegister(NewRC);
7977 MRI.replaceRegWith(Dest.
getReg(), DestReg);
7978 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7982 case AMDGPU::S_CSELECT_B32:
7983 case AMDGPU::S_CSELECT_B64:
7984 lowerSelect(Worklist, Inst, MDT);
7987 case AMDGPU::S_CMP_EQ_I32:
7988 case AMDGPU::S_CMP_LG_I32:
7989 case AMDGPU::S_CMP_GT_I32:
7990 case AMDGPU::S_CMP_GE_I32:
7991 case AMDGPU::S_CMP_LT_I32:
7992 case AMDGPU::S_CMP_LE_I32:
7993 case AMDGPU::S_CMP_EQ_U32:
7994 case AMDGPU::S_CMP_LG_U32:
7995 case AMDGPU::S_CMP_GT_U32:
7996 case AMDGPU::S_CMP_GE_U32:
7997 case AMDGPU::S_CMP_LT_U32:
7998 case AMDGPU::S_CMP_LE_U32:
7999 case AMDGPU::S_CMP_EQ_U64:
8000 case AMDGPU::S_CMP_LG_U64:
8001 case AMDGPU::S_CMP_LT_F32:
8002 case AMDGPU::S_CMP_EQ_F32:
8003 case AMDGPU::S_CMP_LE_F32:
8004 case AMDGPU::S_CMP_GT_F32:
8005 case AMDGPU::S_CMP_LG_F32:
8006 case AMDGPU::S_CMP_GE_F32:
8007 case AMDGPU::S_CMP_O_F32:
8008 case AMDGPU::S_CMP_U_F32:
8009 case AMDGPU::S_CMP_NGE_F32:
8010 case AMDGPU::S_CMP_NLG_F32:
8011 case AMDGPU::S_CMP_NGT_F32:
8012 case AMDGPU::S_CMP_NLE_F32:
8013 case AMDGPU::S_CMP_NEQ_F32:
8014 case AMDGPU::S_CMP_NLT_F32: {
8015 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8019 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8033 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8037 case AMDGPU::S_CMP_LT_F16:
8038 case AMDGPU::S_CMP_EQ_F16:
8039 case AMDGPU::S_CMP_LE_F16:
8040 case AMDGPU::S_CMP_GT_F16:
8041 case AMDGPU::S_CMP_LG_F16:
8042 case AMDGPU::S_CMP_GE_F16:
8043 case AMDGPU::S_CMP_O_F16:
8044 case AMDGPU::S_CMP_U_F16:
8045 case AMDGPU::S_CMP_NGE_F16:
8046 case AMDGPU::S_CMP_NLG_F16:
8047 case AMDGPU::S_CMP_NGT_F16:
8048 case AMDGPU::S_CMP_NLE_F16:
8049 case AMDGPU::S_CMP_NEQ_F16:
8050 case AMDGPU::S_CMP_NLT_F16: {
8051 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8073 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8077 case AMDGPU::S_CVT_HI_F32_F16: {
8078 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8079 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8080 if (ST.useRealTrue16Insts()) {
8085 .
addReg(TmpReg, 0, AMDGPU::hi16)
8101 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8105 case AMDGPU::S_MINIMUM_F32:
8106 case AMDGPU::S_MAXIMUM_F32: {
8107 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8118 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8122 case AMDGPU::S_MINIMUM_F16:
8123 case AMDGPU::S_MAXIMUM_F16: {
8124 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8125 ? &AMDGPU::VGPR_16RegClass
8126 : &AMDGPU::VGPR_32RegClass);
8138 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8142 case AMDGPU::V_S_EXP_F16_e64:
8143 case AMDGPU::V_S_LOG_F16_e64:
8144 case AMDGPU::V_S_RCP_F16_e64:
8145 case AMDGPU::V_S_RSQ_F16_e64:
8146 case AMDGPU::V_S_SQRT_F16_e64: {
8147 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8148 ? &AMDGPU::VGPR_16RegClass
8149 : &AMDGPU::VGPR_32RegClass);
8161 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8167 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8175 if (NewOpcode == Opcode) {
8183 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8185 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8199 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8206 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8207 MRI.replaceRegWith(DstReg, NewDstReg);
8208 MRI.clearKillFlags(NewDstReg);
8211 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8228 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8232 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8233 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8234 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8236 get(AMDGPU::IMPLICIT_DEF), Undef);
8238 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8244 MRI.replaceRegWith(DstReg, NewDstReg);
8245 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8247 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8250 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8251 MRI.replaceRegWith(DstReg, NewDstReg);
8252 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8257 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8258 MRI.replaceRegWith(DstReg, NewDstReg);
8260 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8270 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8271 AMDGPU::OpName::src0_modifiers) >= 0)
8275 NewInstr->addOperand(Src);
8278 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8281 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8283 NewInstr.addImm(
Size);
8284 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8288 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8293 "Scalar BFE is only implemented for constant width and offset");
8301 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8302 AMDGPU::OpName::src1_modifiers) >= 0)
8304 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8306 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8307 AMDGPU::OpName::src2_modifiers) >= 0)
8309 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8311 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8313 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8315 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8321 NewInstr->addOperand(
Op);
8328 if (
Op.getReg() == AMDGPU::SCC) {
8330 if (
Op.isDef() && !
Op.isDead())
8331 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8333 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8338 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8339 Register DstReg = NewInstr->getOperand(0).getReg();
8344 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8345 MRI.replaceRegWith(DstReg, NewDstReg);
8354 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8358std::pair<bool, MachineBasicBlock *>
8370 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8373 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8375 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8376 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8384 MRI.replaceRegWith(OldDstReg, ResultReg);
8387 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8388 return std::pair(
true, NewBB);
8391 return std::pair(
false,
nullptr);
8408 bool IsSCC = (CondReg == AMDGPU::SCC);
8416 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8422 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8423 NewCondReg =
MRI.createVirtualRegister(TC);
8427 bool CopyFound =
false;
8428 for (MachineInstr &CandI :
8431 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8433 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8435 .
addReg(CandI.getOperand(1).getReg());
8447 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8455 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8456 MachineInstr *NewInst;
8457 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8458 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8471 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8473 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8485 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8486 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8488 unsigned SubOp = ST.hasAddNoCarry() ?
8489 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8499 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8500 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8513 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8514 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8515 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8518 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8530 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8531 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8545 if (ST.hasDLInsts()) {
8546 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8554 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8555 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8561 bool Src0IsSGPR = Src0.
isReg() &&
8562 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8563 bool Src1IsSGPR = Src1.
isReg() &&
8564 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8566 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8567 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8577 }
else if (Src1IsSGPR) {
8591 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8595 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8601 unsigned Opcode)
const {
8611 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8612 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8624 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8625 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8630 unsigned Opcode)
const {
8640 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8641 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8653 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8654 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8669 const MCInstrDesc &InstDesc =
get(Opcode);
8670 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8672 &AMDGPU::SGPR_32RegClass;
8674 const TargetRegisterClass *Src0SubRC =
8675 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8678 AMDGPU::sub0, Src0SubRC);
8680 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8681 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8682 const TargetRegisterClass *NewDestSubRC =
8683 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8685 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8686 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8689 AMDGPU::sub1, Src0SubRC);
8691 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8692 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8697 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8704 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8706 Worklist.
insert(&LoHalf);
8707 Worklist.
insert(&HiHalf);
8713 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8724 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8725 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8726 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8734 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8735 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8736 const TargetRegisterClass *Src0SubRC =
8737 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8738 if (RI.isSGPRClass(Src0SubRC))
8739 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8740 const TargetRegisterClass *Src1SubRC =
8741 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8742 if (RI.isSGPRClass(Src1SubRC))
8743 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8747 MachineOperand Op0L =
8749 MachineOperand Op1L =
8751 MachineOperand Op0H =
8753 MachineOperand Op1H =
8771 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8772 MachineInstr *Op1L_Op0H =
8777 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8778 MachineInstr *Op1H_Op0L =
8783 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8784 MachineInstr *Carry =
8789 MachineInstr *LoHalf =
8794 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8799 MachineInstr *HiHalf =
8810 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8822 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8833 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8834 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8835 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8843 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8844 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8845 const TargetRegisterClass *Src0SubRC =
8846 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8847 if (RI.isSGPRClass(Src0SubRC))
8848 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8849 const TargetRegisterClass *Src1SubRC =
8850 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8851 if (RI.isSGPRClass(Src1SubRC))
8852 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8856 MachineOperand Op0L =
8858 MachineOperand Op1L =
8862 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8863 ? AMDGPU::V_MUL_HI_U32_e64
8864 : AMDGPU::V_MUL_HI_I32_e64;
8865 MachineInstr *HiHalf =
8868 MachineInstr *LoHalf =
8879 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8887 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8903 const MCInstrDesc &InstDesc =
get(Opcode);
8904 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8906 &AMDGPU::SGPR_32RegClass;
8908 const TargetRegisterClass *Src0SubRC =
8909 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8910 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8912 &AMDGPU::SGPR_32RegClass;
8914 const TargetRegisterClass *Src1SubRC =
8915 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8918 AMDGPU::sub0, Src0SubRC);
8920 AMDGPU::sub0, Src1SubRC);
8922 AMDGPU::sub1, Src0SubRC);
8924 AMDGPU::sub1, Src1SubRC);
8926 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8927 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8928 const TargetRegisterClass *NewDestSubRC =
8929 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8931 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8932 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8936 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8937 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8941 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8948 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8950 Worklist.
insert(&LoHalf);
8951 Worklist.
insert(&HiHalf);
8954 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8970 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8972 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8974 MachineOperand* Op0;
8975 MachineOperand* Op1;
8988 Register NewDest =
MRI.createVirtualRegister(DestRC);
8994 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9010 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9011 const TargetRegisterClass *SrcRC = Src.isReg() ?
9012 MRI.getRegClass(Src.getReg()) :
9013 &AMDGPU::SGPR_32RegClass;
9015 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9016 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9018 const TargetRegisterClass *SrcSubRC =
9019 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9022 AMDGPU::sub0, SrcSubRC);
9024 AMDGPU::sub1, SrcSubRC);
9030 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9034 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9053 Offset == 0 &&
"Not implemented");
9056 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9057 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9058 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9075 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9076 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9081 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9082 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9086 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9089 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9094 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9095 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9114 const MCInstrDesc &InstDesc =
get(Opcode);
9116 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9117 unsigned OpcodeAdd =
9118 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9120 const TargetRegisterClass *SrcRC =
9121 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9122 const TargetRegisterClass *SrcSubRC =
9123 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9125 MachineOperand SrcRegSub0 =
9127 MachineOperand SrcRegSub1 =
9130 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9131 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9132 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9133 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9140 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9146 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9148 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9150 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9153void SIInstrInfo::addUsersToMoveToVALUWorklist(
9157 MachineInstr &
UseMI = *MO.getParent();
9161 switch (
UseMI.getOpcode()) {
9164 case AMDGPU::SOFT_WQM:
9165 case AMDGPU::STRICT_WWM:
9166 case AMDGPU::STRICT_WQM:
9167 case AMDGPU::REG_SEQUENCE:
9169 case AMDGPU::INSERT_SUBREG:
9172 OpNo = MO.getOperandNo();
9177 MRI.constrainRegClass(DstReg, OpRC);
9179 if (!RI.hasVectorRegisters(OpRC))
9190 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9196 if (ST.useRealTrue16Insts()) {
9199 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9206 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9212 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9213 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9215 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9217 case AMDGPU::S_PACK_LL_B32_B16:
9220 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9221 .addImm(AMDGPU::lo16)
9223 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9224 .addImm(AMDGPU::hi16);
9226 case AMDGPU::S_PACK_LH_B32_B16:
9229 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9230 .addImm(AMDGPU::lo16)
9231 .addReg(SrcReg1, 0, AMDGPU::hi16)
9232 .addImm(AMDGPU::hi16);
9234 case AMDGPU::S_PACK_HL_B32_B16:
9235 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9236 .addImm(AMDGPU::lo16)
9238 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9239 .addImm(AMDGPU::hi16);
9241 case AMDGPU::S_PACK_HH_B32_B16:
9242 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9243 .addImm(AMDGPU::lo16)
9244 .addReg(SrcReg1, 0, AMDGPU::hi16)
9245 .addImm(AMDGPU::hi16);
9252 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9253 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9258 case AMDGPU::S_PACK_LL_B32_B16: {
9259 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9260 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9277 case AMDGPU::S_PACK_LH_B32_B16: {
9278 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9287 case AMDGPU::S_PACK_HL_B32_B16: {
9288 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9298 case AMDGPU::S_PACK_HH_B32_B16: {
9299 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9300 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9317 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9318 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9327 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9328 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9329 SmallVector<MachineInstr *, 4> CopyToDelete;
9332 for (MachineInstr &
MI :
9336 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9339 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9340 Register DestReg =
MI.getOperand(0).getReg();
9342 MRI.replaceRegWith(DestReg, NewCond);
9347 MI.getOperand(SCCIdx).setReg(NewCond);
9353 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9356 for (
auto &Copy : CopyToDelete)
9357 Copy->eraseFromParent();
9365void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9371 for (MachineInstr &
MI :
9374 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9376 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9385 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9393 case AMDGPU::REG_SEQUENCE:
9394 case AMDGPU::INSERT_SUBREG:
9396 case AMDGPU::SOFT_WQM:
9397 case AMDGPU::STRICT_WWM:
9398 case AMDGPU::STRICT_WQM: {
9400 if (RI.isAGPRClass(SrcRC)) {
9401 if (RI.isAGPRClass(NewDstRC))
9406 case AMDGPU::REG_SEQUENCE:
9407 case AMDGPU::INSERT_SUBREG:
9408 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9411 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9417 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9420 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9434 int OpIndices[3])
const {
9435 const MCInstrDesc &
Desc =
MI.getDesc();
9451 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9453 for (
unsigned i = 0; i < 3; ++i) {
9454 int Idx = OpIndices[i];
9458 const MachineOperand &MO =
MI.getOperand(Idx);
9464 const TargetRegisterClass *OpRC =
9465 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9466 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9472 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9473 if (RI.isSGPRClass(RegRC))
9491 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9492 SGPRReg = UsedSGPRs[0];
9495 if (!SGPRReg && UsedSGPRs[1]) {
9496 if (UsedSGPRs[1] == UsedSGPRs[2])
9497 SGPRReg = UsedSGPRs[1];
9504 AMDGPU::OpName OperandName)
const {
9505 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9508 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9512 return &
MI.getOperand(Idx);
9526 if (ST.isAmdHsaOS()) {
9529 RsrcDataFormat |= (1ULL << 56);
9534 RsrcDataFormat |= (2ULL << 59);
9537 return RsrcDataFormat;
9547 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9552 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9559 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9565 unsigned Opc =
MI.getOpcode();
9571 return get(
Opc).mayLoad() &&
9576 int &FrameIndex)
const {
9578 if (!Addr || !Addr->
isFI())
9589 int &FrameIndex)
const {
9597 int &FrameIndex)
const {
9611 int &FrameIndex)
const {
9628 while (++
I != E &&
I->isInsideBundle()) {
9629 assert(!
I->isBundle() &&
"No nested bundle!");
9637 unsigned Opc =
MI.getOpcode();
9639 unsigned DescSize =
Desc.getSize();
9644 unsigned Size = DescSize;
9648 if (
MI.isBranch() && ST.hasOffset3fBug())
9659 bool HasLiteral =
false;
9660 unsigned LiteralSize = 4;
9661 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9666 if (ST.has64BitLiterals()) {
9667 switch (OpInfo.OperandType) {
9683 return HasLiteral ? DescSize + LiteralSize : DescSize;
9688 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9692 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9693 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9697 case TargetOpcode::BUNDLE:
9699 case TargetOpcode::INLINEASM:
9700 case TargetOpcode::INLINEASM_BR: {
9702 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9706 if (
MI.isMetaInstruction())
9710 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9713 unsigned LoInstOpcode = D16Info->LoOp;
9715 DescSize =
Desc.getSize();
9719 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9722 DescSize =
Desc.getSize();
9733 if (
MI.memoperands_empty())
9745 static const std::pair<int, const char *> TargetIndices[] = {
9783std::pair<unsigned, unsigned>
9790 static const std::pair<unsigned, const char *> TargetFlags[] = {
9808 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9823 return AMDGPU::WWM_COPY;
9825 return AMDGPU::COPY;
9837 bool IsNullOrVectorRegister =
true;
9840 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9845 return IsNullOrVectorRegister &&
9847 (Opcode == AMDGPU::IMPLICIT_DEF &&
9849 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9850 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9858 if (ST.hasAddNoCarry())
9862 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9863 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9874 if (ST.hasAddNoCarry())
9878 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9880 : RS.scavengeRegisterBackwards(
9881 *RI.getBoolRC(),
I,
false,
9894 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9895 case AMDGPU::SI_KILL_I1_TERMINATOR:
9904 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9905 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9906 case AMDGPU::SI_KILL_I1_PSEUDO:
9907 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9919 const unsigned OffsetBits =
9921 return (1 << OffsetBits) - 1;
9928 if (
MI.isInlineAsm())
9931 for (
auto &
Op :
MI.implicit_operands()) {
9932 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9933 Op.setReg(AMDGPU::VCC_LO);
9942 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9946 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
9947 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9964 if (Imm <= MaxImm + 64) {
9966 Overflow = Imm - MaxImm;
9993 if (ST.hasRestrictedSOffset())
10036 if (!ST.hasFlatInstOffsets())
10044 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10056std::pair<int64_t, int64_t>
10059 int64_t RemainderOffset = COffsetVal;
10060 int64_t ImmField = 0;
10065 if (AllowNegative) {
10067 int64_t
D = 1LL << NumBits;
10068 RemainderOffset = (COffsetVal /
D) *
D;
10069 ImmField = COffsetVal - RemainderOffset;
10071 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10073 (ImmField % 4) != 0) {
10075 RemainderOffset += ImmField % 4;
10076 ImmField -= ImmField % 4;
10078 }
else if (COffsetVal >= 0) {
10080 RemainderOffset = COffsetVal - ImmField;
10084 assert(RemainderOffset + ImmField == COffsetVal);
10085 return {ImmField, RemainderOffset};
10089 if (ST.hasNegativeScratchOffsetBug() &&
10097 switch (ST.getGeneration()) {
10123 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10124 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10125 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10126 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10127 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10128 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10129 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10130 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10137#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10138 case OPCODE##_dpp: \
10139 case OPCODE##_e32: \
10140 case OPCODE##_e64: \
10141 case OPCODE##_e64_dpp: \
10142 case OPCODE##_sdwa:
10156 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10157 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10158 case AMDGPU::V_FMA_F16_gfx9_e64:
10159 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10160 case AMDGPU::V_INTERP_P2_F16:
10161 case AMDGPU::V_MAD_F16_e64:
10162 case AMDGPU::V_MAD_U16_e64:
10163 case AMDGPU::V_MAD_I16_e64:
10185 switch (ST.getGeneration()) {
10198 if (
isMAI(Opcode)) {
10206 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10213 if (ST.hasGFX90AInsts()) {
10215 if (ST.hasGFX940Insts())
10246 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10247 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10248 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10260 switch (
MI.getOpcode()) {
10262 case AMDGPU::REG_SEQUENCE:
10266 case AMDGPU::INSERT_SUBREG:
10267 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10284 if (!
P.Reg.isVirtual())
10288 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10289 while (
auto *
MI = DefInst) {
10291 switch (
MI->getOpcode()) {
10293 case AMDGPU::V_MOV_B32_e32: {
10294 auto &Op1 =
MI->getOperand(1);
10299 DefInst =
MRI.getVRegDef(RSR.Reg);
10307 DefInst =
MRI.getVRegDef(RSR.Reg);
10320 assert(
MRI.isSSA() &&
"Must be run on SSA");
10322 auto *
TRI =
MRI.getTargetRegisterInfo();
10323 auto *DefBB =
DefMI.getParent();
10327 if (
UseMI.getParent() != DefBB)
10330 const int MaxInstScan = 20;
10334 auto E =
UseMI.getIterator();
10335 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10336 if (
I->isDebugInstr())
10339 if (++NumInst > MaxInstScan)
10342 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10352 assert(
MRI.isSSA() &&
"Must be run on SSA");
10354 auto *
TRI =
MRI.getTargetRegisterInfo();
10355 auto *DefBB =
DefMI.getParent();
10357 const int MaxUseScan = 10;
10360 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10361 auto &UseInst = *
Use.getParent();
10364 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10367 if (++NumUse > MaxUseScan)
10374 const int MaxInstScan = 20;
10378 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10381 if (
I->isDebugInstr())
10384 if (++NumInst > MaxInstScan)
10397 if (Reg == VReg && --NumUse == 0)
10399 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10408 auto Cur =
MBB.begin();
10409 if (Cur !=
MBB.end())
10411 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10414 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10423 if (InsPt !=
MBB.end() &&
10424 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10425 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10426 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10427 InsPt->definesRegister(Src,
nullptr)) {
10431 .
addReg(Src, 0, SrcSubReg)
10456 if (isFullCopyInstr(
MI)) {
10457 Register DstReg =
MI.getOperand(0).getReg();
10458 Register SrcReg =
MI.getOperand(1).getReg();
10465 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10469 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10480 unsigned *PredCost)
const {
10481 if (
MI.isBundle()) {
10484 unsigned Lat = 0,
Count = 0;
10485 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10487 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10489 return Lat +
Count - 1;
10492 return SchedModel.computeInstrLatency(&
MI);
10498 unsigned Opcode =
MI.getOpcode();
10503 :
MI.getOperand(1).getReg();
10504 LLT DstTy =
MRI.getType(Dst);
10505 LLT SrcTy =
MRI.getType(Src);
10507 unsigned SrcAS = SrcTy.getAddressSpace();
10510 ST.hasGloballyAddressableScratch()
10518 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10519 return HandleAddrSpaceCast(
MI);
10522 auto IID = GI->getIntrinsicID();
10529 case Intrinsic::amdgcn_addrspacecast_nonnull:
10530 return HandleAddrSpaceCast(
MI);
10531 case Intrinsic::amdgcn_if:
10532 case Intrinsic::amdgcn_else:
10546 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10547 Opcode == AMDGPU::G_SEXTLOAD) {
10548 if (
MI.memoperands_empty())
10552 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10553 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10561 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10562 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10563 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10576 unsigned opcode =
MI.getOpcode();
10577 if (opcode == AMDGPU::V_READLANE_B32 ||
10578 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10579 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10582 if (isCopyInstr(
MI)) {
10586 RI.getPhysRegBaseClass(srcOp.
getReg());
10594 if (
MI.isPreISelOpcode())
10609 if (
MI.memoperands_empty())
10613 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10614 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10629 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10631 if (!
SrcOp.isReg())
10635 if (!Reg || !
SrcOp.readsReg())
10641 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10668 F,
"ds_ordered_count unsupported for this calling conv"));
10682 Register &SrcReg2, int64_t &CmpMask,
10683 int64_t &CmpValue)
const {
10684 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10687 switch (
MI.getOpcode()) {
10690 case AMDGPU::S_CMP_EQ_U32:
10691 case AMDGPU::S_CMP_EQ_I32:
10692 case AMDGPU::S_CMP_LG_U32:
10693 case AMDGPU::S_CMP_LG_I32:
10694 case AMDGPU::S_CMP_LT_U32:
10695 case AMDGPU::S_CMP_LT_I32:
10696 case AMDGPU::S_CMP_GT_U32:
10697 case AMDGPU::S_CMP_GT_I32:
10698 case AMDGPU::S_CMP_LE_U32:
10699 case AMDGPU::S_CMP_LE_I32:
10700 case AMDGPU::S_CMP_GE_U32:
10701 case AMDGPU::S_CMP_GE_I32:
10702 case AMDGPU::S_CMP_EQ_U64:
10703 case AMDGPU::S_CMP_LG_U64:
10704 SrcReg =
MI.getOperand(0).getReg();
10705 if (
MI.getOperand(1).isReg()) {
10706 if (
MI.getOperand(1).getSubReg())
10708 SrcReg2 =
MI.getOperand(1).getReg();
10710 }
else if (
MI.getOperand(1).isImm()) {
10712 CmpValue =
MI.getOperand(1).getImm();
10718 case AMDGPU::S_CMPK_EQ_U32:
10719 case AMDGPU::S_CMPK_EQ_I32:
10720 case AMDGPU::S_CMPK_LG_U32:
10721 case AMDGPU::S_CMPK_LG_I32:
10722 case AMDGPU::S_CMPK_LT_U32:
10723 case AMDGPU::S_CMPK_LT_I32:
10724 case AMDGPU::S_CMPK_GT_U32:
10725 case AMDGPU::S_CMPK_GT_I32:
10726 case AMDGPU::S_CMPK_LE_U32:
10727 case AMDGPU::S_CMPK_LE_I32:
10728 case AMDGPU::S_CMPK_GE_U32:
10729 case AMDGPU::S_CMPK_GE_I32:
10730 SrcReg =
MI.getOperand(0).getReg();
10732 CmpValue =
MI.getOperand(1).getImm();
10751 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10753 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10758 SccDef->setIsDead(
false);
10766 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10767 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10769 bool Op1IsNonZeroImm =
10770 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10771 bool Op2IsZeroImm =
10772 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10773 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10779 Register SrcReg2, int64_t CmpMask,
10788 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10820 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10821 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10827 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
10842 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10843 this](int64_t ExpectedValue,
unsigned SrcSize,
10844 bool IsReversible,
bool IsSigned) ->
bool {
10872 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10873 Def->getOpcode() != AMDGPU::S_AND_B64)
10877 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10888 SrcOp = &Def->getOperand(2);
10889 else if (isMask(&Def->getOperand(2)))
10890 SrcOp = &Def->getOperand(1);
10898 if (IsSigned && BitNo == SrcSize - 1)
10901 ExpectedValue <<= BitNo;
10903 bool IsReversedCC =
false;
10904 if (CmpValue != ExpectedValue) {
10907 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10912 Register DefReg = Def->getOperand(0).getReg();
10913 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10919 if (!
MRI->use_nodbg_empty(DefReg)) {
10927 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10928 : AMDGPU::S_BITCMP1_B32
10929 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10930 : AMDGPU::S_BITCMP1_B64;
10935 Def->eraseFromParent();
10943 case AMDGPU::S_CMP_EQ_U32:
10944 case AMDGPU::S_CMP_EQ_I32:
10945 case AMDGPU::S_CMPK_EQ_U32:
10946 case AMDGPU::S_CMPK_EQ_I32:
10947 return optimizeCmpAnd(1, 32,
true,
false);
10948 case AMDGPU::S_CMP_GE_U32:
10949 case AMDGPU::S_CMPK_GE_U32:
10950 return optimizeCmpAnd(1, 32,
false,
false);
10951 case AMDGPU::S_CMP_GE_I32:
10952 case AMDGPU::S_CMPK_GE_I32:
10953 return optimizeCmpAnd(1, 32,
false,
true);
10954 case AMDGPU::S_CMP_EQ_U64:
10955 return optimizeCmpAnd(1, 64,
true,
false);
10956 case AMDGPU::S_CMP_LG_U32:
10957 case AMDGPU::S_CMP_LG_I32:
10958 case AMDGPU::S_CMPK_LG_U32:
10959 case AMDGPU::S_CMPK_LG_I32:
10960 return optimizeCmpAnd(0, 32,
true,
false) || optimizeCmpSelect();
10961 case AMDGPU::S_CMP_GT_U32:
10962 case AMDGPU::S_CMPK_GT_U32:
10963 return optimizeCmpAnd(0, 32,
false,
false);
10964 case AMDGPU::S_CMP_GT_I32:
10965 case AMDGPU::S_CMPK_GT_I32:
10966 return optimizeCmpAnd(0, 32,
false,
true);
10967 case AMDGPU::S_CMP_LG_U64:
10968 return optimizeCmpAnd(0, 64,
true,
false) || optimizeCmpSelect();
10975 AMDGPU::OpName
OpName)
const {
10976 if (!ST.needsAlignedVGPRs())
10979 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10991 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10993 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10996 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10997 : &AMDGPU::VReg_64_Align2RegClass);
10999 .
addReg(DataReg, 0,
Op.getSubReg())
11004 Op.setSubReg(AMDGPU::sub0);
11026 unsigned Opcode =
MI.getOpcode();
11032 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11033 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11036 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.