33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO: {
1333 return MI.getOperand(0).getReg() == Reg;
1338 case AMDGPU::S_BREV_B32:
1339 case AMDGPU::V_BFREV_B32_e32:
1340 case AMDGPU::V_BFREV_B32_e64: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_NOT_B32:
1350 case AMDGPU::V_NOT_B32_e32:
1351 case AMDGPU::V_NOT_B32_e64: {
1354 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1355 return MI.getOperand(0).getReg() == Reg;
1367 if (RI.isAGPRClass(DstRC))
1368 return AMDGPU::COPY;
1369 if (RI.getRegSizeInBits(*DstRC) == 16) {
1372 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1374 if (RI.getRegSizeInBits(*DstRC) == 32)
1375 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1376 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1377 return AMDGPU::S_MOV_B64;
1378 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1379 return AMDGPU::V_MOV_B64_PSEUDO;
1380 return AMDGPU::COPY;
1385 bool IsIndirectSrc)
const {
1386 if (IsIndirectSrc) {
1388 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1390 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1392 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1394 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1396 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1409 if (VecSize <= 1024)
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1416 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1418 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1420 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1422 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1424 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1426 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1428 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1437 if (VecSize <= 1024)
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1445 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1447 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1449 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1451 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1453 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1455 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1457 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1466 if (VecSize <= 1024)
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1474 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1476 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1478 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1480 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1482 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1484 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1486 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1495 if (VecSize <= 1024)
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1510 if (VecSize <= 1024)
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1518 bool IsSGPR)
const {
1530 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1537 return AMDGPU::SI_SPILL_S32_SAVE;
1539 return AMDGPU::SI_SPILL_S64_SAVE;
1541 return AMDGPU::SI_SPILL_S96_SAVE;
1543 return AMDGPU::SI_SPILL_S128_SAVE;
1545 return AMDGPU::SI_SPILL_S160_SAVE;
1547 return AMDGPU::SI_SPILL_S192_SAVE;
1549 return AMDGPU::SI_SPILL_S224_SAVE;
1551 return AMDGPU::SI_SPILL_S256_SAVE;
1553 return AMDGPU::SI_SPILL_S288_SAVE;
1555 return AMDGPU::SI_SPILL_S320_SAVE;
1557 return AMDGPU::SI_SPILL_S352_SAVE;
1559 return AMDGPU::SI_SPILL_S384_SAVE;
1561 return AMDGPU::SI_SPILL_S512_SAVE;
1563 return AMDGPU::SI_SPILL_S1024_SAVE;
1572 return AMDGPU::SI_SPILL_V16_SAVE;
1574 return AMDGPU::SI_SPILL_V32_SAVE;
1576 return AMDGPU::SI_SPILL_V64_SAVE;
1578 return AMDGPU::SI_SPILL_V96_SAVE;
1580 return AMDGPU::SI_SPILL_V128_SAVE;
1582 return AMDGPU::SI_SPILL_V160_SAVE;
1584 return AMDGPU::SI_SPILL_V192_SAVE;
1586 return AMDGPU::SI_SPILL_V224_SAVE;
1588 return AMDGPU::SI_SPILL_V256_SAVE;
1590 return AMDGPU::SI_SPILL_V288_SAVE;
1592 return AMDGPU::SI_SPILL_V320_SAVE;
1594 return AMDGPU::SI_SPILL_V352_SAVE;
1596 return AMDGPU::SI_SPILL_V384_SAVE;
1598 return AMDGPU::SI_SPILL_V512_SAVE;
1600 return AMDGPU::SI_SPILL_V1024_SAVE;
1609 return AMDGPU::SI_SPILL_AV32_SAVE;
1611 return AMDGPU::SI_SPILL_AV64_SAVE;
1613 return AMDGPU::SI_SPILL_AV96_SAVE;
1615 return AMDGPU::SI_SPILL_AV128_SAVE;
1617 return AMDGPU::SI_SPILL_AV160_SAVE;
1619 return AMDGPU::SI_SPILL_AV192_SAVE;
1621 return AMDGPU::SI_SPILL_AV224_SAVE;
1623 return AMDGPU::SI_SPILL_AV256_SAVE;
1625 return AMDGPU::SI_SPILL_AV288_SAVE;
1627 return AMDGPU::SI_SPILL_AV320_SAVE;
1629 return AMDGPU::SI_SPILL_AV352_SAVE;
1631 return AMDGPU::SI_SPILL_AV384_SAVE;
1633 return AMDGPU::SI_SPILL_AV512_SAVE;
1635 return AMDGPU::SI_SPILL_AV1024_SAVE;
1642 bool IsVectorSuperClass) {
1647 if (IsVectorSuperClass)
1648 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1650 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1656 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1663 if (ST.hasMAIInsts())
1682 FrameInfo.getObjectAlign(FrameIndex));
1683 unsigned SpillSize = RI.getSpillSize(*RC);
1686 if (RI.isSGPRClass(RC)) {
1688 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1689 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1690 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1698 if (SrcReg.
isVirtual() && SpillSize == 4) {
1699 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1708 if (RI.spillSGPRToVGPR())
1728 return AMDGPU::SI_SPILL_S32_RESTORE;
1730 return AMDGPU::SI_SPILL_S64_RESTORE;
1732 return AMDGPU::SI_SPILL_S96_RESTORE;
1734 return AMDGPU::SI_SPILL_S128_RESTORE;
1736 return AMDGPU::SI_SPILL_S160_RESTORE;
1738 return AMDGPU::SI_SPILL_S192_RESTORE;
1740 return AMDGPU::SI_SPILL_S224_RESTORE;
1742 return AMDGPU::SI_SPILL_S256_RESTORE;
1744 return AMDGPU::SI_SPILL_S288_RESTORE;
1746 return AMDGPU::SI_SPILL_S320_RESTORE;
1748 return AMDGPU::SI_SPILL_S352_RESTORE;
1750 return AMDGPU::SI_SPILL_S384_RESTORE;
1752 return AMDGPU::SI_SPILL_S512_RESTORE;
1754 return AMDGPU::SI_SPILL_S1024_RESTORE;
1763 return AMDGPU::SI_SPILL_V16_RESTORE;
1765 return AMDGPU::SI_SPILL_V32_RESTORE;
1767 return AMDGPU::SI_SPILL_V64_RESTORE;
1769 return AMDGPU::SI_SPILL_V96_RESTORE;
1771 return AMDGPU::SI_SPILL_V128_RESTORE;
1773 return AMDGPU::SI_SPILL_V160_RESTORE;
1775 return AMDGPU::SI_SPILL_V192_RESTORE;
1777 return AMDGPU::SI_SPILL_V224_RESTORE;
1779 return AMDGPU::SI_SPILL_V256_RESTORE;
1781 return AMDGPU::SI_SPILL_V288_RESTORE;
1783 return AMDGPU::SI_SPILL_V320_RESTORE;
1785 return AMDGPU::SI_SPILL_V352_RESTORE;
1787 return AMDGPU::SI_SPILL_V384_RESTORE;
1789 return AMDGPU::SI_SPILL_V512_RESTORE;
1791 return AMDGPU::SI_SPILL_V1024_RESTORE;
1800 return AMDGPU::SI_SPILL_AV32_RESTORE;
1802 return AMDGPU::SI_SPILL_AV64_RESTORE;
1804 return AMDGPU::SI_SPILL_AV96_RESTORE;
1806 return AMDGPU::SI_SPILL_AV128_RESTORE;
1808 return AMDGPU::SI_SPILL_AV160_RESTORE;
1810 return AMDGPU::SI_SPILL_AV192_RESTORE;
1812 return AMDGPU::SI_SPILL_AV224_RESTORE;
1814 return AMDGPU::SI_SPILL_AV256_RESTORE;
1816 return AMDGPU::SI_SPILL_AV288_RESTORE;
1818 return AMDGPU::SI_SPILL_AV320_RESTORE;
1820 return AMDGPU::SI_SPILL_AV352_RESTORE;
1822 return AMDGPU::SI_SPILL_AV384_RESTORE;
1824 return AMDGPU::SI_SPILL_AV512_RESTORE;
1826 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1833 bool IsVectorSuperClass) {
1838 if (IsVectorSuperClass)
1839 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1841 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1847 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1854 if (ST.hasMAIInsts())
1857 assert(!RI.isAGPRClass(RC));
1871 unsigned SpillSize = RI.getSpillSize(*RC);
1878 FrameInfo.getObjectAlign(FrameIndex));
1880 if (RI.isSGPRClass(RC)) {
1882 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1883 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1884 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1889 if (DestReg.
isVirtual() && SpillSize == 4) {
1891 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1894 if (RI.spillSGPRToVGPR())
1920 unsigned Quantity)
const {
1922 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1923 while (Quantity > 0) {
1924 unsigned Arg = std::min(Quantity, MaxSNopCount);
1931 auto *MF =
MBB.getParent();
1934 assert(Info->isEntryFunction());
1936 if (
MBB.succ_empty()) {
1937 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1938 if (HasNoTerminator) {
1939 if (Info->returnsVoid()) {
1953 constexpr unsigned DoorbellIDMask = 0x3ff;
1954 constexpr unsigned ECQueueWaveAbort = 0x400;
1960 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1961 ContBB =
MBB.splitAt(
MI,
false);
1965 MBB.addSuccessor(TrapBB);
1969 ContBB = HaltLoopBB;
1976 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1980 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1983 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1984 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1988 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1989 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1990 .
addUse(DoorbellRegMasked)
1991 .
addImm(ECQueueWaveAbort);
1992 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1993 .
addUse(SetWaveAbortBit);
1996 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2011 switch (
MI.getOpcode()) {
2013 if (
MI.isMetaInstruction())
2018 return MI.getOperand(0).getImm() + 1;
2028 switch (
MI.getOpcode()) {
2030 case AMDGPU::S_MOV_B64_term:
2033 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2036 case AMDGPU::S_MOV_B32_term:
2039 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2042 case AMDGPU::S_XOR_B64_term:
2045 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2048 case AMDGPU::S_XOR_B32_term:
2051 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2053 case AMDGPU::S_OR_B64_term:
2056 MI.setDesc(
get(AMDGPU::S_OR_B64));
2058 case AMDGPU::S_OR_B32_term:
2061 MI.setDesc(
get(AMDGPU::S_OR_B32));
2064 case AMDGPU::S_ANDN2_B64_term:
2067 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2070 case AMDGPU::S_ANDN2_B32_term:
2073 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2076 case AMDGPU::S_AND_B64_term:
2079 MI.setDesc(
get(AMDGPU::S_AND_B64));
2082 case AMDGPU::S_AND_B32_term:
2085 MI.setDesc(
get(AMDGPU::S_AND_B32));
2088 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2091 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2094 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2097 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2100 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2101 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2104 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2105 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2107 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2111 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2114 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2117 int64_t Imm =
MI.getOperand(1).getImm();
2119 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2120 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2127 MI.eraseFromParent();
2133 case AMDGPU::V_MOV_B64_PSEUDO: {
2135 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2136 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2141 if (ST.hasMovB64()) {
2142 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2147 if (
SrcOp.isImm()) {
2149 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2150 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2172 if (ST.hasPkMovB32() &&
2193 MI.eraseFromParent();
2196 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2200 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2204 if (ST.has64BitLiterals()) {
2205 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2211 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2216 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2217 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2219 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2220 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2227 MI.eraseFromParent();
2230 case AMDGPU::V_SET_INACTIVE_B32: {
2234 .
add(
MI.getOperand(3))
2235 .
add(
MI.getOperand(4))
2236 .
add(
MI.getOperand(1))
2237 .
add(
MI.getOperand(2))
2238 .
add(
MI.getOperand(5));
2239 MI.eraseFromParent();
2242 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2243 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2244 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2245 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2246 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2247 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2248 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2249 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2250 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2251 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2252 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2254 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2255 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2256 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2257 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2258 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2259 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2260 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2261 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2262 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2263 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2274 if (RI.hasVGPRs(EltRC)) {
2275 Opc = AMDGPU::V_MOVRELD_B32_e32;
2277 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2278 : AMDGPU::S_MOVRELD_B32;
2283 bool IsUndef =
MI.getOperand(1).isUndef();
2284 unsigned SubReg =
MI.getOperand(3).getImm();
2285 assert(VecReg ==
MI.getOperand(1).getReg());
2290 .
add(
MI.getOperand(2))
2294 const int ImpDefIdx =
2296 const int ImpUseIdx = ImpDefIdx + 1;
2298 MI.eraseFromParent();
2301 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2313 assert(ST.useVGPRIndexMode());
2315 bool IsUndef =
MI.getOperand(1).isUndef();
2324 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2328 .
add(
MI.getOperand(2))
2333 const int ImpDefIdx =
2335 const int ImpUseIdx = ImpDefIdx + 1;
2342 MI.eraseFromParent();
2345 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2346 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2347 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2348 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2349 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2350 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2351 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2352 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2353 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2354 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2355 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2357 assert(ST.useVGPRIndexMode());
2360 bool IsUndef =
MI.getOperand(1).isUndef();
2378 MI.eraseFromParent();
2381 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2384 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2385 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2404 if (ST.hasGetPCZeroExtension()) {
2408 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2415 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2425 MI.eraseFromParent();
2428 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2438 Op.setOffset(
Op.getOffset() + 4);
2440 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2444 MI.eraseFromParent();
2447 case AMDGPU::ENTER_STRICT_WWM: {
2453 case AMDGPU::ENTER_STRICT_WQM: {
2460 MI.eraseFromParent();
2463 case AMDGPU::EXIT_STRICT_WWM:
2464 case AMDGPU::EXIT_STRICT_WQM: {
2470 case AMDGPU::SI_RETURN: {
2484 MI.eraseFromParent();
2488 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2489 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2490 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2493 case AMDGPU::S_GETPC_B64_pseudo:
2494 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2495 if (ST.hasGetPCZeroExtension()) {
2497 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2506 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2507 assert(ST.hasBF16PackedInsts());
2508 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2532 case AMDGPU::S_LOAD_DWORDX16_IMM:
2533 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2546 for (
auto &CandMO :
I->operands()) {
2547 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2555 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2559 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2563 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2565 unsigned NewOpcode = -1;
2566 if (SubregSize == 256)
2567 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2568 else if (SubregSize == 128)
2569 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2576 MRI.setRegClass(DestReg, NewRC);
2579 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2584 MI->getOperand(0).setReg(DestReg);
2585 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2589 OffsetMO->
setImm(FinalOffset);
2595 MI->setMemRefs(*MF, NewMMOs);
2608std::pair<MachineInstr*, MachineInstr*>
2610 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2612 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2615 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2616 return std::pair(&
MI,
nullptr);
2627 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2629 if (Dst.isPhysical()) {
2630 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2633 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2637 for (
unsigned I = 1;
I <= 2; ++
I) {
2640 if (
SrcOp.isImm()) {
2642 Imm.ashrInPlace(Part * 32);
2643 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2647 if (Src.isPhysical())
2648 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2655 MovDPP.addImm(MO.getImm());
2657 Split[Part] = MovDPP;
2661 if (Dst.isVirtual())
2668 MI.eraseFromParent();
2669 return std::pair(Split[0], Split[1]);
2672std::optional<DestSourcePair>
2674 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2677 return std::nullopt;
2681 AMDGPU::OpName Src0OpName,
2683 AMDGPU::OpName Src1OpName)
const {
2690 "All commutable instructions have both src0 and src1 modifiers");
2692 int Src0ModsVal = Src0Mods->
getImm();
2693 int Src1ModsVal = Src1Mods->
getImm();
2695 Src1Mods->
setImm(Src0ModsVal);
2696 Src0Mods->
setImm(Src1ModsVal);
2705 bool IsKill = RegOp.
isKill();
2707 bool IsUndef = RegOp.
isUndef();
2708 bool IsDebug = RegOp.
isDebug();
2710 if (NonRegOp.
isImm())
2712 else if (NonRegOp.
isFI())
2733 int64_t NonRegVal = NonRegOp1.
getImm();
2736 NonRegOp2.
setImm(NonRegVal);
2743 unsigned OpIdx1)
const {
2748 unsigned Opc =
MI.getOpcode();
2749 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2759 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2762 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2767 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2773 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2788 unsigned Src1Idx)
const {
2789 assert(!NewMI &&
"this should never be used");
2791 unsigned Opc =
MI.getOpcode();
2793 if (CommutedOpcode == -1)
2796 if (Src0Idx > Src1Idx)
2799 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2800 static_cast<int>(Src0Idx) &&
2801 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2802 static_cast<int>(Src1Idx) &&
2803 "inconsistency with findCommutedOpIndices");
2828 Src1, AMDGPU::OpName::src1_modifiers);
2831 AMDGPU::OpName::src1_sel);
2843 unsigned &SrcOpIdx0,
2844 unsigned &SrcOpIdx1)
const {
2849 unsigned &SrcOpIdx0,
2850 unsigned &SrcOpIdx1)
const {
2851 if (!
Desc.isCommutable())
2854 unsigned Opc =
Desc.getOpcode();
2855 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2859 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2863 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2867 int64_t BrOffset)
const {
2884 return MI.getOperand(0).getMBB();
2889 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2890 MI.getOpcode() == AMDGPU::SI_LOOP)
2902 "new block should be inserted for expanding unconditional branch");
2905 "restore block should be inserted for restoring clobbered registers");
2913 if (ST.hasAddPC64Inst()) {
2915 MCCtx.createTempSymbol(
"offset",
true);
2919 MCCtx.createTempSymbol(
"post_addpc",
true);
2920 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2924 Offset->setVariableValue(OffsetExpr);
2928 assert(RS &&
"RegScavenger required for long branching");
2932 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2936 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2937 ST.hasVALUReadSGPRHazard();
2938 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2939 if (FlushSGPRWrites)
2947 ApplyHazardWorkarounds();
2950 MCCtx.createTempSymbol(
"post_getpc",
true);
2954 MCCtx.createTempSymbol(
"offset_lo",
true);
2956 MCCtx.createTempSymbol(
"offset_hi",
true);
2959 .
addReg(PCReg, 0, AMDGPU::sub0)
2963 .
addReg(PCReg, 0, AMDGPU::sub1)
2965 ApplyHazardWorkarounds();
3006 if (LongBranchReservedReg) {
3007 RS->enterBasicBlock(
MBB);
3008 Scav = LongBranchReservedReg;
3010 RS->enterBasicBlockEnd(
MBB);
3011 Scav = RS->scavengeRegisterBackwards(
3016 RS->setRegUsed(Scav);
3017 MRI.replaceRegWith(PCReg, Scav);
3018 MRI.clearVirtRegs();
3024 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3025 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3026 MRI.clearVirtRegs();
3041unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3043 case SIInstrInfo::SCC_TRUE:
3044 return AMDGPU::S_CBRANCH_SCC1;
3045 case SIInstrInfo::SCC_FALSE:
3046 return AMDGPU::S_CBRANCH_SCC0;
3047 case SIInstrInfo::VCCNZ:
3048 return AMDGPU::S_CBRANCH_VCCNZ;
3049 case SIInstrInfo::VCCZ:
3050 return AMDGPU::S_CBRANCH_VCCZ;
3051 case SIInstrInfo::EXECNZ:
3052 return AMDGPU::S_CBRANCH_EXECNZ;
3053 case SIInstrInfo::EXECZ:
3054 return AMDGPU::S_CBRANCH_EXECZ;
3060SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3062 case AMDGPU::S_CBRANCH_SCC0:
3064 case AMDGPU::S_CBRANCH_SCC1:
3066 case AMDGPU::S_CBRANCH_VCCNZ:
3068 case AMDGPU::S_CBRANCH_VCCZ:
3070 case AMDGPU::S_CBRANCH_EXECNZ:
3072 case AMDGPU::S_CBRANCH_EXECZ:
3084 bool AllowModify)
const {
3085 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3087 TBB =
I->getOperand(0).getMBB();
3091 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3092 if (Pred == INVALID_BR)
3097 Cond.push_back(
I->getOperand(1));
3101 if (
I ==
MBB.end()) {
3107 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3109 FBB =
I->getOperand(0).getMBB();
3119 bool AllowModify)
const {
3127 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3128 switch (
I->getOpcode()) {
3129 case AMDGPU::S_MOV_B64_term:
3130 case AMDGPU::S_XOR_B64_term:
3131 case AMDGPU::S_OR_B64_term:
3132 case AMDGPU::S_ANDN2_B64_term:
3133 case AMDGPU::S_AND_B64_term:
3134 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3135 case AMDGPU::S_MOV_B32_term:
3136 case AMDGPU::S_XOR_B32_term:
3137 case AMDGPU::S_OR_B32_term:
3138 case AMDGPU::S_ANDN2_B32_term:
3139 case AMDGPU::S_AND_B32_term:
3140 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3143 case AMDGPU::SI_ELSE:
3144 case AMDGPU::SI_KILL_I1_TERMINATOR:
3145 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3162 int *BytesRemoved)
const {
3164 unsigned RemovedSize = 0;
3167 if (
MI.isBranch() ||
MI.isReturn()) {
3169 MI.eraseFromParent();
3175 *BytesRemoved = RemovedSize;
3192 int *BytesAdded)
const {
3193 if (!FBB &&
Cond.empty()) {
3197 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3204 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3216 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3234 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3241 if (
Cond.size() != 2) {
3245 if (
Cond[0].isImm()) {
3256 Register FalseReg,
int &CondCycles,
3257 int &TrueCycles,
int &FalseCycles)
const {
3263 if (
MRI.getRegClass(FalseReg) != RC)
3267 CondCycles = TrueCycles = FalseCycles = NumInsts;
3270 return RI.hasVGPRs(RC) && NumInsts <= 6;
3278 if (
MRI.getRegClass(FalseReg) != RC)
3284 if (NumInsts % 2 == 0)
3287 CondCycles = TrueCycles = FalseCycles = NumInsts;
3288 return RI.isSGPRClass(RC);
3299 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3300 if (Pred == VCCZ || Pred == SCC_FALSE) {
3301 Pred =
static_cast<BranchPredicate
>(-Pred);
3307 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3309 if (DstSize == 32) {
3311 if (Pred == SCC_TRUE) {
3326 if (DstSize == 64 && Pred == SCC_TRUE) {
3336 static const int16_t Sub0_15[] = {
3337 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3338 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3339 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3340 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3343 static const int16_t Sub0_15_64[] = {
3344 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3345 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3346 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3347 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3350 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3352 const int16_t *SubIndices = Sub0_15;
3353 int NElts = DstSize / 32;
3357 if (Pred == SCC_TRUE) {
3359 SelOp = AMDGPU::S_CSELECT_B32;
3360 EltRC = &AMDGPU::SGPR_32RegClass;
3362 SelOp = AMDGPU::S_CSELECT_B64;
3363 EltRC = &AMDGPU::SGPR_64RegClass;
3364 SubIndices = Sub0_15_64;
3370 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3375 for (
int Idx = 0; Idx != NElts; ++Idx) {
3376 Register DstElt =
MRI.createVirtualRegister(EltRC);
3379 unsigned SubIdx = SubIndices[Idx];
3382 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3385 .
addReg(FalseReg, 0, SubIdx)
3386 .
addReg(TrueReg, 0, SubIdx);
3390 .
addReg(TrueReg, 0, SubIdx)
3391 .
addReg(FalseReg, 0, SubIdx);
3403 switch (
MI.getOpcode()) {
3404 case AMDGPU::V_MOV_B16_t16_e32:
3405 case AMDGPU::V_MOV_B16_t16_e64:
3406 case AMDGPU::V_MOV_B32_e32:
3407 case AMDGPU::V_MOV_B32_e64:
3408 case AMDGPU::V_MOV_B64_PSEUDO:
3409 case AMDGPU::V_MOV_B64_e32:
3410 case AMDGPU::V_MOV_B64_e64:
3411 case AMDGPU::S_MOV_B32:
3412 case AMDGPU::S_MOV_B64:
3413 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3415 case AMDGPU::WWM_COPY:
3416 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3417 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3418 case AMDGPU::V_ACCVGPR_MOV_B32:
3419 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3420 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3428 switch (
MI.getOpcode()) {
3429 case AMDGPU::V_MOV_B16_t16_e32:
3430 case AMDGPU::V_MOV_B16_t16_e64:
3432 case AMDGPU::V_MOV_B32_e32:
3433 case AMDGPU::V_MOV_B32_e64:
3434 case AMDGPU::V_MOV_B64_PSEUDO:
3435 case AMDGPU::V_MOV_B64_e32:
3436 case AMDGPU::V_MOV_B64_e64:
3437 case AMDGPU::S_MOV_B32:
3438 case AMDGPU::S_MOV_B64:
3439 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3441 case AMDGPU::WWM_COPY:
3442 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3443 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3444 case AMDGPU::V_ACCVGPR_MOV_B32:
3445 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3446 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3454 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3455 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3456 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3459 unsigned Opc =
MI.getOpcode();
3461 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3463 MI.removeOperand(Idx);
3469 MI.setDesc(NewDesc);
3475 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3476 Desc.implicit_defs().size();
3478 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3479 MI.removeOperand(
I);
3483 unsigned SubRegIndex) {
3484 switch (SubRegIndex) {
3485 case AMDGPU::NoSubRegister:
3495 case AMDGPU::sub1_lo16:
3497 case AMDGPU::sub1_hi16:
3500 return std::nullopt;
3508 case AMDGPU::V_MAC_F16_e32:
3509 case AMDGPU::V_MAC_F16_e64:
3510 case AMDGPU::V_MAD_F16_e64:
3511 return AMDGPU::V_MADAK_F16;
3512 case AMDGPU::V_MAC_F32_e32:
3513 case AMDGPU::V_MAC_F32_e64:
3514 case AMDGPU::V_MAD_F32_e64:
3515 return AMDGPU::V_MADAK_F32;
3516 case AMDGPU::V_FMAC_F32_e32:
3517 case AMDGPU::V_FMAC_F32_e64:
3518 case AMDGPU::V_FMA_F32_e64:
3519 return AMDGPU::V_FMAAK_F32;
3520 case AMDGPU::V_FMAC_F16_e32:
3521 case AMDGPU::V_FMAC_F16_e64:
3522 case AMDGPU::V_FMAC_F16_t16_e64:
3523 case AMDGPU::V_FMAC_F16_fake16_e64:
3524 case AMDGPU::V_FMA_F16_e64:
3525 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3526 ? AMDGPU::V_FMAAK_F16_t16
3527 : AMDGPU::V_FMAAK_F16_fake16
3528 : AMDGPU::V_FMAAK_F16;
3529 case AMDGPU::V_FMAC_F64_e32:
3530 case AMDGPU::V_FMAC_F64_e64:
3531 case AMDGPU::V_FMA_F64_e64:
3532 return AMDGPU::V_FMAAK_F64;
3540 case AMDGPU::V_MAC_F16_e32:
3541 case AMDGPU::V_MAC_F16_e64:
3542 case AMDGPU::V_MAD_F16_e64:
3543 return AMDGPU::V_MADMK_F16;
3544 case AMDGPU::V_MAC_F32_e32:
3545 case AMDGPU::V_MAC_F32_e64:
3546 case AMDGPU::V_MAD_F32_e64:
3547 return AMDGPU::V_MADMK_F32;
3548 case AMDGPU::V_FMAC_F32_e32:
3549 case AMDGPU::V_FMAC_F32_e64:
3550 case AMDGPU::V_FMA_F32_e64:
3551 return AMDGPU::V_FMAMK_F32;
3552 case AMDGPU::V_FMAC_F16_e32:
3553 case AMDGPU::V_FMAC_F16_e64:
3554 case AMDGPU::V_FMAC_F16_t16_e64:
3555 case AMDGPU::V_FMAC_F16_fake16_e64:
3556 case AMDGPU::V_FMA_F16_e64:
3557 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3558 ? AMDGPU::V_FMAMK_F16_t16
3559 : AMDGPU::V_FMAMK_F16_fake16
3560 : AMDGPU::V_FMAMK_F16;
3561 case AMDGPU::V_FMAC_F64_e32:
3562 case AMDGPU::V_FMAC_F64_e64:
3563 case AMDGPU::V_FMA_F64_e64:
3564 return AMDGPU::V_FMAMK_F64;
3576 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3578 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3581 if (
Opc == AMDGPU::COPY) {
3582 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3589 if (HasMultipleUses) {
3592 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3595 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3603 if (ImmDefSize == 32 &&
3608 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3609 RI.getSubRegIdxSize(UseSubReg) == 16;
3612 if (RI.hasVGPRs(DstRC))
3615 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3621 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3628 for (
unsigned MovOp :
3629 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3630 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3638 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3642 if (MovDstPhysReg) {
3646 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3653 if (MovDstPhysReg) {
3654 if (!MovDstRC->
contains(MovDstPhysReg))
3656 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3670 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3678 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3682 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3684 UseMI.getOperand(0).setReg(MovDstPhysReg);
3689 UseMI.setDesc(NewMCID);
3690 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3691 UseMI.addImplicitDefUseOperands(*MF);
3695 if (HasMultipleUses)
3698 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3699 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3700 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3701 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3702 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3703 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3704 Opc == AMDGPU::V_FMAC_F64_e64) {
3713 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3728 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3729 if (!RegSrc->
isReg())
3731 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3732 ST.getConstantBusLimit(
Opc) < 2)
3735 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3747 if (Def && Def->isMoveImmediate() &&
3758 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3759 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3769 unsigned SrcSubReg = RegSrc->
getSubReg();
3774 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3775 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3776 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3777 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3778 UseMI.untieRegOperand(
3779 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3786 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3788 DefMI.eraseFromParent();
3795 if (ST.getConstantBusLimit(
Opc) < 2) {
3798 bool Src0Inlined =
false;
3799 if (Src0->
isReg()) {
3804 if (Def && Def->isMoveImmediate() &&
3809 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3816 if (Src1->
isReg() && !Src0Inlined) {
3819 if (Def && Def->isMoveImmediate() &&
3821 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3823 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3836 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3837 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3843 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3844 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3845 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3846 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3847 UseMI.untieRegOperand(
3848 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3850 const std::optional<int64_t> SubRegImm =
3864 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3866 DefMI.eraseFromParent();
3878 if (BaseOps1.
size() != BaseOps2.
size())
3880 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3881 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3889 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3890 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3891 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3893 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3896bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3899 int64_t Offset0, Offset1;
3902 bool Offset0IsScalable, Offset1IsScalable;
3916 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3917 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3924 "MIa must load from or modify a memory location");
3926 "MIb must load from or modify a memory location");
3948 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3955 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3965 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3979 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3990 if (
Reg.isPhysical())
3992 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3994 Imm = Def->getOperand(1).getImm();
4014 unsigned NumOps =
MI.getNumOperands();
4017 if (
Op.isReg() &&
Op.isKill())
4025 case AMDGPU::V_MAC_F16_e32:
4026 case AMDGPU::V_MAC_F16_e64:
4027 return AMDGPU::V_MAD_F16_e64;
4028 case AMDGPU::V_MAC_F32_e32:
4029 case AMDGPU::V_MAC_F32_e64:
4030 return AMDGPU::V_MAD_F32_e64;
4031 case AMDGPU::V_MAC_LEGACY_F32_e32:
4032 case AMDGPU::V_MAC_LEGACY_F32_e64:
4033 return AMDGPU::V_MAD_LEGACY_F32_e64;
4034 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4035 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4036 return AMDGPU::V_FMA_LEGACY_F32_e64;
4037 case AMDGPU::V_FMAC_F16_e32:
4038 case AMDGPU::V_FMAC_F16_e64:
4039 case AMDGPU::V_FMAC_F16_t16_e64:
4040 case AMDGPU::V_FMAC_F16_fake16_e64:
4041 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4042 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4043 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4044 : AMDGPU::V_FMA_F16_gfx9_e64;
4045 case AMDGPU::V_FMAC_F32_e32:
4046 case AMDGPU::V_FMAC_F32_e64:
4047 return AMDGPU::V_FMA_F32_e64;
4048 case AMDGPU::V_FMAC_F64_e32:
4049 case AMDGPU::V_FMAC_F64_e64:
4050 return AMDGPU::V_FMA_F64_e64;
4070 if (
MI.isBundle()) {
4073 if (
MI.getBundleSize() != 1)
4075 CandidateMI =
MI.getNextNode();
4079 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4083 if (
MI.isBundle()) {
4088 MI.untieRegOperand(MO.getOperandNo());
4096 if (Def.isEarlyClobber() && Def.isReg() &&
4101 auto UpdateDefIndex = [&](
LiveRange &LR) {
4102 auto *S = LR.find(OldIndex);
4103 if (S != LR.end() && S->start == OldIndex) {
4104 assert(S->valno && S->valno->def == OldIndex);
4105 S->start = NewIndex;
4106 S->valno->def = NewIndex;
4110 for (
auto &SR : LI.subranges())
4116 if (U.RemoveMIUse) {
4119 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4121 if (
MRI.hasOneNonDBGUse(DefReg)) {
4123 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4124 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4125 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4126 U.RemoveMIUse->removeOperand(
I);
4131 if (
MI.isBundle()) {
4135 if (MO.isReg() && MO.getReg() == DefReg) {
4136 assert(MO.getSubReg() == 0 &&
4137 "tied sub-registers in bundles currently not supported");
4138 MI.removeOperand(MO.getOperandNo());
4153 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4155 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4156 MIOp.setIsUndef(
true);
4157 MIOp.setReg(DummyReg);
4161 if (
MI.isBundle()) {
4165 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4166 MIOp.setIsUndef(
true);
4167 MIOp.setReg(DummyReg);
4180 return MI.isBundle() ? &
MI : NewMI;
4185 ThreeAddressUpdates &U)
const {
4187 unsigned Opc =
MI.getOpcode();
4191 if (NewMFMAOpc != -1) {
4194 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4195 MIB.
add(
MI.getOperand(
I));
4203 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4208 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4209 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4210 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4214 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4215 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4216 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4217 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4218 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4219 bool Src0Literal =
false;
4224 case AMDGPU::V_MAC_F16_e64:
4225 case AMDGPU::V_FMAC_F16_e64:
4226 case AMDGPU::V_FMAC_F16_t16_e64:
4227 case AMDGPU::V_FMAC_F16_fake16_e64:
4228 case AMDGPU::V_MAC_F32_e64:
4229 case AMDGPU::V_MAC_LEGACY_F32_e64:
4230 case AMDGPU::V_FMAC_F32_e64:
4231 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4232 case AMDGPU::V_FMAC_F64_e64:
4234 case AMDGPU::V_MAC_F16_e32:
4235 case AMDGPU::V_FMAC_F16_e32:
4236 case AMDGPU::V_MAC_F32_e32:
4237 case AMDGPU::V_MAC_LEGACY_F32_e32:
4238 case AMDGPU::V_FMAC_F32_e32:
4239 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4240 case AMDGPU::V_FMAC_F64_e32: {
4241 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4242 AMDGPU::OpName::src0);
4243 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4254 MachineInstrBuilder MIB;
4257 const MachineOperand *Src0Mods =
4260 const MachineOperand *Src1Mods =
4263 const MachineOperand *Src2Mods =
4269 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4270 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4272 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4274 MachineInstr *
DefMI;
4310 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4326 if (Src0Literal && !ST.hasVOP3Literal())
4354 switch (
MI.getOpcode()) {
4355 case AMDGPU::S_SET_GPR_IDX_ON:
4356 case AMDGPU::S_SET_GPR_IDX_MODE:
4357 case AMDGPU::S_SET_GPR_IDX_OFF:
4375 if (
MI.isTerminator() ||
MI.isPosition())
4379 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4382 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4388 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4389 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4390 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4391 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4392 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4397 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4398 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4399 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4408 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4417 if (
MI.memoperands_empty())
4422 unsigned AS = Memop->getAddrSpace();
4423 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4424 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4425 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4426 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4441 if (
MI.memoperands_empty())
4450 unsigned AS = Memop->getAddrSpace();
4467 if (ST.isTgSplitEnabled())
4472 if (
MI.memoperands_empty())
4477 unsigned AS = Memop->getAddrSpace();
4493 unsigned Opcode =
MI.getOpcode();
4508 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4509 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4510 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4513 if (
MI.isCall() ||
MI.isInlineAsm())
4529 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4530 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4531 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4532 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4540 if (
MI.isMetaInstruction())
4544 if (
MI.isCopyLike()) {
4545 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4549 return MI.readsRegister(AMDGPU::EXEC, &RI);
4560 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4564 switch (Imm.getBitWidth()) {
4570 ST.hasInv2PiInlineImm());
4573 ST.hasInv2PiInlineImm());
4575 return ST.has16BitInsts() &&
4577 ST.hasInv2PiInlineImm());
4584 APInt IntImm = Imm.bitcastToAPInt();
4586 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4594 return ST.has16BitInsts() &&
4597 return ST.has16BitInsts() &&
4607 switch (OperandType) {
4617 int32_t Trunc =
static_cast<int32_t
>(Imm);
4657 int16_t Trunc =
static_cast<int16_t
>(Imm);
4658 return ST.has16BitInsts() &&
4667 int16_t Trunc =
static_cast<int16_t
>(Imm);
4668 return ST.has16BitInsts() &&
4719 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4725 return ST.hasVOP3Literal();
4729 int64_t ImmVal)
const {
4732 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4733 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4734 AMDGPU::OpName::src2))
4736 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4748 "unexpected imm-like operand kind");
4761 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4779 AMDGPU::OpName
OpName)
const {
4781 return Mods && Mods->
getImm();
4794 switch (
MI.getOpcode()) {
4795 default:
return false;
4797 case AMDGPU::V_ADDC_U32_e64:
4798 case AMDGPU::V_SUBB_U32_e64:
4799 case AMDGPU::V_SUBBREV_U32_e64: {
4807 case AMDGPU::V_MAC_F16_e64:
4808 case AMDGPU::V_MAC_F32_e64:
4809 case AMDGPU::V_MAC_LEGACY_F32_e64:
4810 case AMDGPU::V_FMAC_F16_e64:
4811 case AMDGPU::V_FMAC_F16_t16_e64:
4812 case AMDGPU::V_FMAC_F16_fake16_e64:
4813 case AMDGPU::V_FMAC_F32_e64:
4814 case AMDGPU::V_FMAC_F64_e64:
4815 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4821 case AMDGPU::V_CNDMASK_B32_e64:
4827 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4857 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4866 unsigned Op32)
const {
4880 Inst32.
add(
MI.getOperand(
I));
4884 int Idx =
MI.getNumExplicitDefs();
4886 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4891 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4913 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4921 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4924 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4925 AMDGPU::SReg_64RegClass.contains(Reg);
4931 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4943 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4953 switch (MO.getReg()) {
4955 case AMDGPU::VCC_LO:
4956 case AMDGPU::VCC_HI:
4958 case AMDGPU::FLAT_SCR:
4971 switch (
MI.getOpcode()) {
4972 case AMDGPU::V_READLANE_B32:
4973 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4974 case AMDGPU::V_WRITELANE_B32:
4975 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4982 if (
MI.isPreISelOpcode() ||
4983 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4998 if (
SubReg.getReg().isPhysical())
5001 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5012 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5013 ErrInfo =
"illegal copy from vector register to SGPR";
5031 if (!
MRI.isSSA() &&
MI.isCopy())
5032 return verifyCopy(
MI,
MRI, ErrInfo);
5034 if (SIInstrInfo::isGenericOpcode(Opcode))
5037 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5038 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5039 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5041 if (Src0Idx == -1) {
5043 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5044 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5045 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5046 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5051 if (!
Desc.isVariadic() &&
5052 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5053 ErrInfo =
"Instruction has wrong number of operands.";
5057 if (
MI.isInlineAsm()) {
5070 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5071 ErrInfo =
"inlineasm operand has incorrect register class.";
5079 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5080 ErrInfo =
"missing memory operand from image instruction.";
5085 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5088 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5089 "all fp values to integers.";
5094 int16_t RegClass = getOpRegClassID(OpInfo);
5096 switch (OpInfo.OperandType) {
5098 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5099 ErrInfo =
"Illegal immediate value for operand.";
5133 ErrInfo =
"Illegal immediate value for operand.";
5140 ErrInfo =
"Expected inline constant for operand.";
5155 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5156 ErrInfo =
"Expected immediate, but got non-immediate";
5165 if (OpInfo.isGenericType())
5180 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5182 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5184 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5185 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5192 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5193 ErrInfo =
"Subtarget requires even aligned vector registers";
5198 if (RegClass != -1) {
5199 if (Reg.isVirtual())
5204 ErrInfo =
"Operand has incorrect register class.";
5212 if (!ST.hasSDWA()) {
5213 ErrInfo =
"SDWA is not supported on this target";
5217 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5218 AMDGPU::OpName::dst_sel}) {
5222 int64_t Imm = MO->
getImm();
5224 ErrInfo =
"Invalid SDWA selection";
5229 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5231 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5236 if (!ST.hasSDWAScalar()) {
5238 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5239 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5246 "Only reg allowed as operands in SDWA instructions on GFX9+";
5252 if (!ST.hasSDWAOmod()) {
5255 if (OMod !=
nullptr &&
5257 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5262 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5263 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5264 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5265 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5268 unsigned Mods = Src0ModsMO->
getImm();
5271 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5277 if (
isVOPC(BasicOpcode)) {
5278 if (!ST.hasSDWASdst() && DstIdx != -1) {
5281 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5282 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5285 }
else if (!ST.hasSDWAOutModsVOPC()) {
5288 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5289 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5295 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5296 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5303 if (DstUnused && DstUnused->isImm() &&
5306 if (!Dst.isReg() || !Dst.isTied()) {
5307 ErrInfo =
"Dst register should have tied register";
5312 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5315 "Dst register should be tied to implicit use of preserved register";
5319 ErrInfo =
"Dst register should use same physical register as preserved";
5326 if (
isImage(Opcode) && !
MI.mayStore()) {
5338 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5346 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5350 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5351 if (RegCount > DstSize) {
5352 ErrInfo =
"Image instruction returns too many registers for dst "
5361 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5362 unsigned ConstantBusCount = 0;
5363 bool UsesLiteral =
false;
5366 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5370 LiteralVal = &
MI.getOperand(ImmIdx);
5379 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5390 }
else if (!MO.
isFI()) {
5397 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5407 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5408 return !RI.regsOverlap(SGPRUsed, SGPR);
5417 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5418 Opcode != AMDGPU::V_WRITELANE_B32) {
5419 ErrInfo =
"VOP* instruction violates constant bus restriction";
5423 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5424 ErrInfo =
"VOP3 instruction uses literal";
5431 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5432 unsigned SGPRCount = 0;
5435 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5443 if (MO.
getReg() != SGPRUsed)
5448 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5449 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5456 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5457 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5464 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5474 ErrInfo =
"ABS not allowed in VOP3B instructions";
5487 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5494 if (
Desc.isBranch()) {
5496 ErrInfo =
"invalid branch target for SOPK instruction";
5503 ErrInfo =
"invalid immediate for SOPK instruction";
5508 ErrInfo =
"invalid immediate for SOPK instruction";
5515 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5516 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5517 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5518 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5519 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5520 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5522 const unsigned StaticNumOps =
5523 Desc.getNumOperands() +
Desc.implicit_uses().size();
5524 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5530 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5531 ErrInfo =
"missing implicit register operands";
5537 if (!Dst->isUse()) {
5538 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5543 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5544 UseOpIdx != StaticNumOps + 1) {
5545 ErrInfo =
"movrel implicit operands should be tied";
5552 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5554 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5555 ErrInfo =
"src0 should be subreg of implicit vector use";
5563 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5564 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5570 if (
MI.mayStore() &&
5575 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5576 ErrInfo =
"scalar stores must use m0 as offset register";
5582 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5584 if (
Offset->getImm() != 0) {
5585 ErrInfo =
"subtarget does not support offsets in flat instructions";
5590 if (
isDS(
MI) && !ST.hasGDS()) {
5592 if (GDSOp && GDSOp->
getImm() != 0) {
5593 ErrInfo =
"GDS is not supported on this subtarget";
5601 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5602 AMDGPU::OpName::vaddr0);
5603 AMDGPU::OpName RSrcOpName =
5604 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5605 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5613 ErrInfo =
"dim is out of range";
5618 if (ST.hasR128A16()) {
5620 IsA16 = R128A16->
getImm() != 0;
5621 }
else if (ST.hasA16()) {
5623 IsA16 = A16->
getImm() != 0;
5626 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5628 unsigned AddrWords =
5631 unsigned VAddrWords;
5633 VAddrWords = RsrcIdx - VAddr0Idx;
5634 if (ST.hasPartialNSAEncoding() &&
5636 unsigned LastVAddrIdx = RsrcIdx - 1;
5637 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5645 if (VAddrWords != AddrWords) {
5647 <<
" but got " << VAddrWords <<
"\n");
5648 ErrInfo =
"bad vaddr size";
5658 unsigned DC = DppCt->
getImm();
5659 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5660 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5661 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5662 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5663 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5664 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5665 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5666 ErrInfo =
"Invalid dpp_ctrl value";
5669 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5671 ErrInfo =
"Invalid dpp_ctrl value: "
5672 "wavefront shifts are not supported on GFX10+";
5675 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5677 ErrInfo =
"Invalid dpp_ctrl value: "
5678 "broadcasts are not supported on GFX10+";
5681 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5683 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5684 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5685 !ST.hasGFX90AInsts()) {
5686 ErrInfo =
"Invalid dpp_ctrl value: "
5687 "row_newbroadcast/row_share is not supported before "
5691 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5692 ErrInfo =
"Invalid dpp_ctrl value: "
5693 "row_share and row_xmask are not supported before GFX10";
5698 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5701 ErrInfo =
"Invalid dpp_ctrl value: "
5702 "DP ALU dpp only support row_newbcast";
5709 AMDGPU::OpName DataName =
5710 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5716 if (ST.hasGFX90AInsts()) {
5717 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5718 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5719 ErrInfo =
"Invalid register class: "
5720 "vdata and vdst should be both VGPR or AGPR";
5723 if (
Data && Data2 &&
5725 ErrInfo =
"Invalid register class: "
5726 "both data operands should be VGPR or AGPR";
5730 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5732 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5733 ErrInfo =
"Invalid register class: "
5734 "agpr loads and stores not supported on this GPU";
5740 if (ST.needsAlignedVGPRs()) {
5741 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5746 if (Reg.isPhysical())
5747 return !(RI.getHWRegIndex(Reg) & 1);
5749 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5750 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5753 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5754 Opcode == AMDGPU::DS_GWS_BARRIER) {
5756 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5757 ErrInfo =
"Subtarget requires even aligned vector registers "
5758 "for DS_GWS instructions";
5764 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5765 ErrInfo =
"Subtarget requires even aligned vector registers "
5766 "for vaddr operand of image instructions";
5772 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5774 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5775 ErrInfo =
"Invalid register class: "
5776 "v_accvgpr_write with an SGPR is not supported on this GPU";
5781 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5784 ErrInfo =
"pseudo expects only physical SGPRs";
5791 if (!ST.hasScaleOffset()) {
5792 ErrInfo =
"Subtarget does not support offset scaling";
5796 ErrInfo =
"Instruction does not support offset scaling";
5805 for (
unsigned I = 0;
I < 3; ++
I) {
5811 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5812 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5814 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5815 &AMDGPU::SReg_64RegClass) ||
5816 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5817 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5829 switch (
MI.getOpcode()) {
5830 default:
return AMDGPU::INSTRUCTION_LIST_END;
5831 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5832 case AMDGPU::COPY:
return AMDGPU::COPY;
5833 case AMDGPU::PHI:
return AMDGPU::PHI;
5834 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5835 case AMDGPU::WQM:
return AMDGPU::WQM;
5836 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5837 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5838 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5839 case AMDGPU::S_MOV_B32: {
5841 return MI.getOperand(1).isReg() ||
5842 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5843 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5845 case AMDGPU::S_ADD_I32:
5846 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5847 case AMDGPU::S_ADDC_U32:
5848 return AMDGPU::V_ADDC_U32_e32;
5849 case AMDGPU::S_SUB_I32:
5850 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5853 case AMDGPU::S_ADD_U32:
5854 return AMDGPU::V_ADD_CO_U32_e32;
5855 case AMDGPU::S_SUB_U32:
5856 return AMDGPU::V_SUB_CO_U32_e32;
5857 case AMDGPU::S_ADD_U64_PSEUDO:
5858 return AMDGPU::V_ADD_U64_PSEUDO;
5859 case AMDGPU::S_SUB_U64_PSEUDO:
5860 return AMDGPU::V_SUB_U64_PSEUDO;
5861 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5862 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5863 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5864 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5865 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5866 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5867 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5868 case AMDGPU::S_XNOR_B32:
5869 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5870 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5871 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5872 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5873 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5874 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5875 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5876 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5877 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5878 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5879 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5880 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5881 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5882 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5883 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5884 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5885 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5886 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5887 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5888 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5889 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5890 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5891 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5892 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5893 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5894 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5895 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5896 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5897 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5898 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5899 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5900 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5901 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5902 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5903 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5904 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5905 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5906 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5907 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5908 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5909 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5910 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5911 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5912 case AMDGPU::S_CVT_F32_F16:
5913 case AMDGPU::S_CVT_HI_F32_F16:
5914 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5915 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5916 case AMDGPU::S_CVT_F16_F32:
5917 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5918 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5919 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5920 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5921 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5922 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5923 case AMDGPU::S_CEIL_F16:
5924 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5925 : AMDGPU::V_CEIL_F16_fake16_e64;
5926 case AMDGPU::S_FLOOR_F16:
5927 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5928 : AMDGPU::V_FLOOR_F16_fake16_e64;
5929 case AMDGPU::S_TRUNC_F16:
5930 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5931 : AMDGPU::V_TRUNC_F16_fake16_e64;
5932 case AMDGPU::S_RNDNE_F16:
5933 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5934 : AMDGPU::V_RNDNE_F16_fake16_e64;
5935 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5936 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5937 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5938 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5939 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5940 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5941 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5942 case AMDGPU::S_ADD_F16:
5943 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5944 : AMDGPU::V_ADD_F16_fake16_e64;
5945 case AMDGPU::S_SUB_F16:
5946 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5947 : AMDGPU::V_SUB_F16_fake16_e64;
5948 case AMDGPU::S_MIN_F16:
5949 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5950 : AMDGPU::V_MIN_F16_fake16_e64;
5951 case AMDGPU::S_MAX_F16:
5952 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5953 : AMDGPU::V_MAX_F16_fake16_e64;
5954 case AMDGPU::S_MINIMUM_F16:
5955 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5956 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5957 case AMDGPU::S_MAXIMUM_F16:
5958 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5959 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5960 case AMDGPU::S_MUL_F16:
5961 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5962 : AMDGPU::V_MUL_F16_fake16_e64;
5963 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5964 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5965 case AMDGPU::S_FMAC_F16:
5966 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5967 : AMDGPU::V_FMAC_F16_fake16_e64;
5968 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5969 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5970 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5971 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5972 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5973 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5974 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5975 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5976 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5977 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5978 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5979 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5980 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5981 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5982 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5983 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5984 case AMDGPU::S_CMP_LT_F16:
5985 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5986 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5987 case AMDGPU::S_CMP_EQ_F16:
5988 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5989 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5990 case AMDGPU::S_CMP_LE_F16:
5991 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5992 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5993 case AMDGPU::S_CMP_GT_F16:
5994 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5995 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5996 case AMDGPU::S_CMP_LG_F16:
5997 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5998 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5999 case AMDGPU::S_CMP_GE_F16:
6000 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6001 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6002 case AMDGPU::S_CMP_O_F16:
6003 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6004 : AMDGPU::V_CMP_O_F16_fake16_e64;
6005 case AMDGPU::S_CMP_U_F16:
6006 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6007 : AMDGPU::V_CMP_U_F16_fake16_e64;
6008 case AMDGPU::S_CMP_NGE_F16:
6009 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6010 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6011 case AMDGPU::S_CMP_NLG_F16:
6012 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6013 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6014 case AMDGPU::S_CMP_NGT_F16:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6016 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6017 case AMDGPU::S_CMP_NLE_F16:
6018 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6019 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6020 case AMDGPU::S_CMP_NEQ_F16:
6021 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6022 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6023 case AMDGPU::S_CMP_NLT_F16:
6024 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6025 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6026 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6027 case AMDGPU::V_S_EXP_F16_e64:
6028 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6029 : AMDGPU::V_EXP_F16_fake16_e64;
6030 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6031 case AMDGPU::V_S_LOG_F16_e64:
6032 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6033 : AMDGPU::V_LOG_F16_fake16_e64;
6034 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6035 case AMDGPU::V_S_RCP_F16_e64:
6036 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6037 : AMDGPU::V_RCP_F16_fake16_e64;
6038 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6039 case AMDGPU::V_S_RSQ_F16_e64:
6040 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6041 : AMDGPU::V_RSQ_F16_fake16_e64;
6042 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6043 case AMDGPU::V_S_SQRT_F16_e64:
6044 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6045 : AMDGPU::V_SQRT_F16_fake16_e64;
6048 "Unexpected scalar opcode without corresponding vector one!");
6097 "Not a whole wave func");
6100 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6101 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6108 unsigned OpNo)
const {
6110 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6111 Desc.operands()[OpNo].RegClass == -1) {
6114 if (Reg.isVirtual()) {
6116 return MRI.getRegClass(Reg);
6118 return RI.getPhysRegBaseClass(Reg);
6121 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6122 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6130 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6132 unsigned Size = RI.getRegSizeInBits(*RC);
6133 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6134 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6135 : AMDGPU::V_MOV_B32_e32;
6137 Opcode = AMDGPU::COPY;
6138 else if (RI.isSGPRClass(RC))
6139 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6153 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6159 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6170 if (SubIdx == AMDGPU::sub0)
6172 if (SubIdx == AMDGPU::sub1)
6184void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6200 if (Reg.isPhysical())
6210 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6213 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6220 unsigned Opc =
MI.getOpcode();
6226 constexpr AMDGPU::OpName OpNames[] = {
6227 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6230 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6231 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6241 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6242 if (IsAGPR && !ST.hasMAIInsts())
6244 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6248 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6249 const int DataIdx = AMDGPU::getNamedOperandIdx(
6250 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6251 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6252 MI.getOperand(DataIdx).isReg() &&
6253 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6255 if ((
int)
OpIdx == DataIdx) {
6256 if (VDstIdx != -1 &&
6257 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6260 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6261 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6262 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6267 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6268 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6272 if (ST.hasFlatScratchHiInB64InstHazard() &&
6279 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6300 constexpr unsigned NumOps = 3;
6301 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6302 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6303 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6304 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6309 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6312 MO = &
MI.getOperand(SrcIdx);
6319 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6323 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6327 return !OpSel && !OpSelHi;
6336 int64_t RegClass = getOpRegClassID(OpInfo);
6338 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6347 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6348 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6352 if (!LiteralLimit--)
6362 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6370 if (--ConstantBusLimit <= 0)
6382 if (!LiteralLimit--)
6384 if (--ConstantBusLimit <= 0)
6390 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6394 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6396 !
Op.isIdenticalTo(*MO))
6406 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6420 bool Is64BitOp = Is64BitFPOp ||
6427 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6436 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6454 bool IsGFX950Only = ST.hasGFX950Insts();
6455 bool IsGFX940Only = ST.hasGFX940Insts();
6457 if (!IsGFX950Only && !IsGFX940Only)
6475 unsigned Opcode =
MI.getOpcode();
6477 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6478 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6479 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6480 case AMDGPU::V_MQSAD_U32_U8_e64:
6481 case AMDGPU::V_PK_ADD_F16:
6482 case AMDGPU::V_PK_ADD_F32:
6483 case AMDGPU::V_PK_ADD_I16:
6484 case AMDGPU::V_PK_ADD_U16:
6485 case AMDGPU::V_PK_ASHRREV_I16:
6486 case AMDGPU::V_PK_FMA_F16:
6487 case AMDGPU::V_PK_FMA_F32:
6488 case AMDGPU::V_PK_FMAC_F16_e32:
6489 case AMDGPU::V_PK_FMAC_F16_e64:
6490 case AMDGPU::V_PK_LSHLREV_B16:
6491 case AMDGPU::V_PK_LSHRREV_B16:
6492 case AMDGPU::V_PK_MAD_I16:
6493 case AMDGPU::V_PK_MAD_U16:
6494 case AMDGPU::V_PK_MAX_F16:
6495 case AMDGPU::V_PK_MAX_I16:
6496 case AMDGPU::V_PK_MAX_U16:
6497 case AMDGPU::V_PK_MIN_F16:
6498 case AMDGPU::V_PK_MIN_I16:
6499 case AMDGPU::V_PK_MIN_U16:
6500 case AMDGPU::V_PK_MOV_B32:
6501 case AMDGPU::V_PK_MUL_F16:
6502 case AMDGPU::V_PK_MUL_F32:
6503 case AMDGPU::V_PK_MUL_LO_U16:
6504 case AMDGPU::V_PK_SUB_I16:
6505 case AMDGPU::V_PK_SUB_U16:
6506 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6515 unsigned Opc =
MI.getOpcode();
6518 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6521 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6527 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6534 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6537 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6543 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6553 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6554 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6555 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6567 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6569 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6581 if (HasImplicitSGPR || !
MI.isCommutable()) {
6598 if (CommutedOpc == -1) {
6603 MI.setDesc(
get(CommutedOpc));
6607 bool Src0Kill = Src0.
isKill();
6611 else if (Src1.
isReg()) {
6626 unsigned Opc =
MI.getOpcode();
6629 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6630 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6631 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6634 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6635 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6636 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6637 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6638 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6639 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6640 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6644 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6645 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6650 if (VOP3Idx[2] != -1) {
6652 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6653 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6662 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6663 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6665 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6667 SGPRsUsed.
insert(SGPRReg);
6671 for (
int Idx : VOP3Idx) {
6680 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6692 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6699 if (ConstantBusLimit > 0) {
6711 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6712 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6718 for (
unsigned I = 0;
I < 3; ++
I) {
6731 SRC = RI.getCommonSubClass(SRC, DstRC);
6734 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6736 if (RI.hasAGPRs(VRC)) {
6737 VRC = RI.getEquivalentVGPRClass(VRC);
6738 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6740 get(TargetOpcode::COPY), NewSrcReg)
6747 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6753 for (
unsigned i = 0; i < SubRegs; ++i) {
6754 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6756 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6757 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6763 get(AMDGPU::REG_SEQUENCE), DstReg);
6764 for (
unsigned i = 0; i < SubRegs; ++i) {
6766 MIB.
addImm(RI.getSubRegFromChannel(i));
6779 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6781 SBase->setReg(SGPR);
6784 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6792 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6793 if (OldSAddrIdx < 0)
6809 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6810 if (NewVAddrIdx < 0)
6813 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6817 if (OldVAddrIdx >= 0) {
6819 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6831 if (OldVAddrIdx == NewVAddrIdx) {
6834 MRI.removeRegOperandFromUseList(&NewVAddr);
6835 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6839 MRI.removeRegOperandFromUseList(&NewVAddr);
6840 MRI.addRegOperandToUseList(&NewVAddr);
6842 assert(OldSAddrIdx == NewVAddrIdx);
6844 if (OldVAddrIdx >= 0) {
6845 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6846 AMDGPU::OpName::vdst_in);
6850 if (NewVDstIn != -1) {
6851 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6857 if (NewVDstIn != -1) {
6858 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6879 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6899 unsigned OpSubReg =
Op.getSubReg();
6902 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6908 Register DstReg =
MRI.createVirtualRegister(DstRC);
6918 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6921 bool ImpDef = Def->isImplicitDef();
6922 while (!ImpDef && Def && Def->isCopy()) {
6923 if (Def->getOperand(1).getReg().isPhysical())
6925 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6926 ImpDef = Def && Def->isImplicitDef();
6928 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6947 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6953 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6954 unsigned NumSubRegs =
RegSize / 32;
6955 Register VScalarOp = ScalarOp->getReg();
6957 if (NumSubRegs == 1) {
6958 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6960 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6963 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6965 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6971 CondReg = NewCondReg;
6973 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6981 ScalarOp->setReg(CurReg);
6982 ScalarOp->setIsKill();
6986 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6987 "Unhandled register size");
6989 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6991 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6993 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6996 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6997 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7000 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7001 .
addReg(VScalarOp, VScalarOpUndef,
7002 TRI->getSubRegFromChannel(Idx + 1));
7008 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7009 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7015 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7016 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7019 if (NumSubRegs <= 2)
7020 Cmp.addReg(VScalarOp);
7022 Cmp.addReg(VScalarOp, VScalarOpUndef,
7023 TRI->getSubRegFromChannel(Idx, 2));
7027 CondReg = NewCondReg;
7029 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7037 const auto *SScalarOpRC =
7038 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7039 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7043 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7044 unsigned Channel = 0;
7045 for (
Register Piece : ReadlanePieces) {
7046 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7050 ScalarOp->setReg(SScalarOp);
7051 ScalarOp->setIsKill();
7055 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7056 MRI.setSimpleHint(SaveExec, CondReg);
7087 if (!Begin.isValid())
7089 if (!End.isValid()) {
7095 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7103 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7104 std::numeric_limits<unsigned>::max()) !=
7107 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7113 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7122 for (
auto I = Begin;
I != AfterMI;
I++) {
7123 for (
auto &MO :
I->all_uses())
7124 MRI.clearKillFlags(MO.getReg());
7149 MBB.addSuccessor(LoopBB);
7159 for (
auto &Succ : RemainderBB->
successors()) {
7183static std::tuple<unsigned, unsigned>
7191 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7192 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7195 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7196 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7197 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7198 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7199 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7216 .
addImm(AMDGPU::sub0_sub1)
7222 return std::tuple(RsrcPtr, NewSRsrc);
7259 if (
MI.getOpcode() == AMDGPU::PHI) {
7261 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7262 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7265 MRI.getRegClass(
MI.getOperand(i).getReg());
7266 if (RI.hasVectorRegisters(OpRC)) {
7280 VRC = &AMDGPU::VReg_1RegClass;
7283 ? RI.getEquivalentAGPRClass(SRC)
7284 : RI.getEquivalentVGPRClass(SRC);
7287 ? RI.getEquivalentAGPRClass(VRC)
7288 : RI.getEquivalentVGPRClass(VRC);
7296 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7298 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7314 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7317 if (RI.hasVGPRs(DstRC)) {
7321 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7323 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7341 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7346 if (DstRC != Src0RC) {
7355 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7357 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7363 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7364 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7365 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7366 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7367 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7368 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7369 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7371 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7384 ? AMDGPU::OpName::rsrc
7385 : AMDGPU::OpName::srsrc;
7387 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7390 AMDGPU::OpName SampOpName =
7391 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7393 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7400 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7402 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7406 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7407 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7412 while (Start->getOpcode() != FrameSetupOpcode)
7415 while (End->getOpcode() != FrameDestroyOpcode)
7419 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7420 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7428 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7430 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7432 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7442 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7443 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7444 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7445 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7447 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7454 bool isSoffsetLegal =
true;
7456 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7457 if (SoffsetIdx != -1) {
7460 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7461 isSoffsetLegal =
false;
7465 bool isRsrcLegal =
true;
7467 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7468 if (RsrcIdx != -1) {
7471 isRsrcLegal =
false;
7475 if (isRsrcLegal && isSoffsetLegal)
7499 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7500 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7501 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7503 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7504 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7505 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7507 unsigned RsrcPtr, NewSRsrc;
7514 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7521 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7535 }
else if (!VAddr && ST.hasAddr64()) {
7539 "FIXME: Need to emit flat atomics here");
7541 unsigned RsrcPtr, NewSRsrc;
7544 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7567 MIB.
addImm(CPol->getImm());
7572 MIB.
addImm(TFE->getImm());
7592 MI.removeFromParent();
7597 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7599 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7603 if (!isSoffsetLegal) {
7615 if (!isSoffsetLegal) {
7627 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7628 if (RsrcIdx != -1) {
7629 DeferredList.insert(
MI);
7634 return DeferredList.contains(
MI);
7644 if (!ST.useRealTrue16Insts())
7647 unsigned Opcode =
MI.getOpcode();
7651 OpIdx >=
get(Opcode).getNumOperands() ||
7652 get(Opcode).operands()[
OpIdx].RegClass == -1)
7656 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7660 if (!RI.isVGPRClass(CurrRC))
7663 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7665 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7666 Op.setSubReg(AMDGPU::lo16);
7667 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7669 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7670 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7677 Op.setReg(NewDstReg);
7689 while (!Worklist.
empty()) {
7703 "Deferred MachineInstr are not supposed to re-populate worklist");
7723 case AMDGPU::S_ADD_I32:
7724 case AMDGPU::S_SUB_I32: {
7728 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7736 case AMDGPU::S_MUL_U64:
7737 if (ST.hasVectorMulU64()) {
7738 NewOpcode = AMDGPU::V_MUL_U64_e64;
7742 splitScalarSMulU64(Worklist, Inst, MDT);
7746 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7747 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7750 splitScalarSMulPseudo(Worklist, Inst, MDT);
7754 case AMDGPU::S_AND_B64:
7755 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7759 case AMDGPU::S_OR_B64:
7760 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7764 case AMDGPU::S_XOR_B64:
7765 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7769 case AMDGPU::S_NAND_B64:
7770 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7774 case AMDGPU::S_NOR_B64:
7775 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7779 case AMDGPU::S_XNOR_B64:
7780 if (ST.hasDLInsts())
7781 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7783 splitScalar64BitXnor(Worklist, Inst, MDT);
7787 case AMDGPU::S_ANDN2_B64:
7788 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7792 case AMDGPU::S_ORN2_B64:
7793 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7797 case AMDGPU::S_BREV_B64:
7798 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7802 case AMDGPU::S_NOT_B64:
7803 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7807 case AMDGPU::S_BCNT1_I32_B64:
7808 splitScalar64BitBCNT(Worklist, Inst);
7812 case AMDGPU::S_BFE_I64:
7813 splitScalar64BitBFE(Worklist, Inst);
7817 case AMDGPU::S_FLBIT_I32_B64:
7818 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7821 case AMDGPU::S_FF1_I32_B64:
7822 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7826 case AMDGPU::S_LSHL_B32:
7827 if (ST.hasOnlyRevVALUShifts()) {
7828 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7832 case AMDGPU::S_ASHR_I32:
7833 if (ST.hasOnlyRevVALUShifts()) {
7834 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7838 case AMDGPU::S_LSHR_B32:
7839 if (ST.hasOnlyRevVALUShifts()) {
7840 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7844 case AMDGPU::S_LSHL_B64:
7845 if (ST.hasOnlyRevVALUShifts()) {
7847 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7848 : AMDGPU::V_LSHLREV_B64_e64;
7852 case AMDGPU::S_ASHR_I64:
7853 if (ST.hasOnlyRevVALUShifts()) {
7854 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7858 case AMDGPU::S_LSHR_B64:
7859 if (ST.hasOnlyRevVALUShifts()) {
7860 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7865 case AMDGPU::S_ABS_I32:
7866 lowerScalarAbs(Worklist, Inst);
7870 case AMDGPU::S_ABSDIFF_I32:
7871 lowerScalarAbsDiff(Worklist, Inst);
7875 case AMDGPU::S_CBRANCH_SCC0:
7876 case AMDGPU::S_CBRANCH_SCC1: {
7879 bool IsSCC = CondReg == AMDGPU::SCC;
7887 case AMDGPU::S_BFE_U64:
7888 case AMDGPU::S_BFM_B64:
7891 case AMDGPU::S_PACK_LL_B32_B16:
7892 case AMDGPU::S_PACK_LH_B32_B16:
7893 case AMDGPU::S_PACK_HL_B32_B16:
7894 case AMDGPU::S_PACK_HH_B32_B16:
7895 movePackToVALU(Worklist,
MRI, Inst);
7899 case AMDGPU::S_XNOR_B32:
7900 lowerScalarXnor(Worklist, Inst);
7904 case AMDGPU::S_NAND_B32:
7905 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7909 case AMDGPU::S_NOR_B32:
7910 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7914 case AMDGPU::S_ANDN2_B32:
7915 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7919 case AMDGPU::S_ORN2_B32:
7920 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7928 case AMDGPU::S_ADD_CO_PSEUDO:
7929 case AMDGPU::S_SUB_CO_PSEUDO: {
7930 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7931 ? AMDGPU::V_ADDC_U32_e64
7932 : AMDGPU::V_SUBB_U32_e64;
7933 const auto *CarryRC = RI.getWaveMaskRegClass();
7936 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7937 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7944 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7955 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7959 case AMDGPU::S_UADDO_PSEUDO:
7960 case AMDGPU::S_USUBO_PSEUDO: {
7966 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7967 ? AMDGPU::V_ADD_CO_U32_e64
7968 : AMDGPU::V_SUB_CO_U32_e64;
7970 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7971 Register DestReg =
MRI.createVirtualRegister(NewRC);
7979 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7980 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7984 case AMDGPU::S_LSHL1_ADD_U32:
7985 case AMDGPU::S_LSHL2_ADD_U32:
7986 case AMDGPU::S_LSHL3_ADD_U32:
7987 case AMDGPU::S_LSHL4_ADD_U32: {
7991 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
7992 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
7993 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
7997 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
7998 Register DestReg =
MRI.createVirtualRegister(NewRC);
8006 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8007 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8011 case AMDGPU::S_CSELECT_B32:
8012 case AMDGPU::S_CSELECT_B64:
8013 lowerSelect(Worklist, Inst, MDT);
8016 case AMDGPU::S_CMP_EQ_I32:
8017 case AMDGPU::S_CMP_LG_I32:
8018 case AMDGPU::S_CMP_GT_I32:
8019 case AMDGPU::S_CMP_GE_I32:
8020 case AMDGPU::S_CMP_LT_I32:
8021 case AMDGPU::S_CMP_LE_I32:
8022 case AMDGPU::S_CMP_EQ_U32:
8023 case AMDGPU::S_CMP_LG_U32:
8024 case AMDGPU::S_CMP_GT_U32:
8025 case AMDGPU::S_CMP_GE_U32:
8026 case AMDGPU::S_CMP_LT_U32:
8027 case AMDGPU::S_CMP_LE_U32:
8028 case AMDGPU::S_CMP_EQ_U64:
8029 case AMDGPU::S_CMP_LG_U64:
8030 case AMDGPU::S_CMP_LT_F32:
8031 case AMDGPU::S_CMP_EQ_F32:
8032 case AMDGPU::S_CMP_LE_F32:
8033 case AMDGPU::S_CMP_GT_F32:
8034 case AMDGPU::S_CMP_LG_F32:
8035 case AMDGPU::S_CMP_GE_F32:
8036 case AMDGPU::S_CMP_O_F32:
8037 case AMDGPU::S_CMP_U_F32:
8038 case AMDGPU::S_CMP_NGE_F32:
8039 case AMDGPU::S_CMP_NLG_F32:
8040 case AMDGPU::S_CMP_NGT_F32:
8041 case AMDGPU::S_CMP_NLE_F32:
8042 case AMDGPU::S_CMP_NEQ_F32:
8043 case AMDGPU::S_CMP_NLT_F32: {
8044 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8048 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8062 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8066 case AMDGPU::S_CMP_LT_F16:
8067 case AMDGPU::S_CMP_EQ_F16:
8068 case AMDGPU::S_CMP_LE_F16:
8069 case AMDGPU::S_CMP_GT_F16:
8070 case AMDGPU::S_CMP_LG_F16:
8071 case AMDGPU::S_CMP_GE_F16:
8072 case AMDGPU::S_CMP_O_F16:
8073 case AMDGPU::S_CMP_U_F16:
8074 case AMDGPU::S_CMP_NGE_F16:
8075 case AMDGPU::S_CMP_NLG_F16:
8076 case AMDGPU::S_CMP_NGT_F16:
8077 case AMDGPU::S_CMP_NLE_F16:
8078 case AMDGPU::S_CMP_NEQ_F16:
8079 case AMDGPU::S_CMP_NLT_F16: {
8080 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8102 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8106 case AMDGPU::S_CVT_HI_F32_F16: {
8107 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8108 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8109 if (ST.useRealTrue16Insts()) {
8114 .
addReg(TmpReg, 0, AMDGPU::hi16)
8130 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8134 case AMDGPU::S_MINIMUM_F32:
8135 case AMDGPU::S_MAXIMUM_F32: {
8136 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8147 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8151 case AMDGPU::S_MINIMUM_F16:
8152 case AMDGPU::S_MAXIMUM_F16: {
8153 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8154 ? &AMDGPU::VGPR_16RegClass
8155 : &AMDGPU::VGPR_32RegClass);
8167 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8171 case AMDGPU::V_S_EXP_F16_e64:
8172 case AMDGPU::V_S_LOG_F16_e64:
8173 case AMDGPU::V_S_RCP_F16_e64:
8174 case AMDGPU::V_S_RSQ_F16_e64:
8175 case AMDGPU::V_S_SQRT_F16_e64: {
8176 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8177 ? &AMDGPU::VGPR_16RegClass
8178 : &AMDGPU::VGPR_32RegClass);
8190 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8196 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8204 if (NewOpcode == Opcode) {
8212 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8214 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8228 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8235 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8236 MRI.replaceRegWith(DstReg, NewDstReg);
8237 MRI.clearKillFlags(NewDstReg);
8240 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8257 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8261 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8262 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8263 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8265 get(AMDGPU::IMPLICIT_DEF), Undef);
8267 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8273 MRI.replaceRegWith(DstReg, NewDstReg);
8274 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8276 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8279 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8280 MRI.replaceRegWith(DstReg, NewDstReg);
8281 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8286 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8287 MRI.replaceRegWith(DstReg, NewDstReg);
8289 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8299 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8300 AMDGPU::OpName::src0_modifiers) >= 0)
8304 NewInstr->addOperand(Src);
8307 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8310 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8312 NewInstr.addImm(
Size);
8313 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8317 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8322 "Scalar BFE is only implemented for constant width and offset");
8330 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8331 AMDGPU::OpName::src1_modifiers) >= 0)
8333 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8335 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8336 AMDGPU::OpName::src2_modifiers) >= 0)
8338 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8340 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8342 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8344 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8350 NewInstr->addOperand(
Op);
8357 if (
Op.getReg() == AMDGPU::SCC) {
8359 if (
Op.isDef() && !
Op.isDead())
8360 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8362 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8367 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8368 Register DstReg = NewInstr->getOperand(0).getReg();
8373 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8374 MRI.replaceRegWith(DstReg, NewDstReg);
8383 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8387std::pair<bool, MachineBasicBlock *>
8399 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8402 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8404 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8405 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8413 MRI.replaceRegWith(OldDstReg, ResultReg);
8416 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8417 return std::pair(
true, NewBB);
8420 return std::pair(
false,
nullptr);
8437 bool IsSCC = (CondReg == AMDGPU::SCC);
8445 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8451 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8452 NewCondReg =
MRI.createVirtualRegister(TC);
8456 bool CopyFound =
false;
8457 for (MachineInstr &CandI :
8460 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8462 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8464 .
addReg(CandI.getOperand(1).getReg());
8476 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8484 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8485 MachineInstr *NewInst;
8486 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8487 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8500 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8502 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8514 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8515 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8517 unsigned SubOp = ST.hasAddNoCarry() ?
8518 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8528 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8529 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8542 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8543 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8544 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8547 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8559 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8560 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8574 if (ST.hasDLInsts()) {
8575 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8583 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8584 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8590 bool Src0IsSGPR = Src0.
isReg() &&
8591 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8592 bool Src1IsSGPR = Src1.
isReg() &&
8593 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8595 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8596 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8606 }
else if (Src1IsSGPR) {
8620 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8624 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8630 unsigned Opcode)
const {
8640 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8641 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8653 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8654 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8659 unsigned Opcode)
const {
8669 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8670 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8682 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8683 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8698 const MCInstrDesc &InstDesc =
get(Opcode);
8699 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8701 &AMDGPU::SGPR_32RegClass;
8703 const TargetRegisterClass *Src0SubRC =
8704 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8707 AMDGPU::sub0, Src0SubRC);
8709 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8710 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8711 const TargetRegisterClass *NewDestSubRC =
8712 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8714 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8715 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8718 AMDGPU::sub1, Src0SubRC);
8720 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8721 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8726 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8733 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8735 Worklist.
insert(&LoHalf);
8736 Worklist.
insert(&HiHalf);
8742 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8753 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8754 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8755 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8763 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8764 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8765 const TargetRegisterClass *Src0SubRC =
8766 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8767 if (RI.isSGPRClass(Src0SubRC))
8768 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8769 const TargetRegisterClass *Src1SubRC =
8770 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8771 if (RI.isSGPRClass(Src1SubRC))
8772 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8776 MachineOperand Op0L =
8778 MachineOperand Op1L =
8780 MachineOperand Op0H =
8782 MachineOperand Op1H =
8800 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8801 MachineInstr *Op1L_Op0H =
8806 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8807 MachineInstr *Op1H_Op0L =
8812 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8813 MachineInstr *Carry =
8818 MachineInstr *LoHalf =
8823 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8828 MachineInstr *HiHalf =
8839 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8851 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8862 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8863 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8864 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8872 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8873 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8874 const TargetRegisterClass *Src0SubRC =
8875 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8876 if (RI.isSGPRClass(Src0SubRC))
8877 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8878 const TargetRegisterClass *Src1SubRC =
8879 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8880 if (RI.isSGPRClass(Src1SubRC))
8881 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8885 MachineOperand Op0L =
8887 MachineOperand Op1L =
8891 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8892 ? AMDGPU::V_MUL_HI_U32_e64
8893 : AMDGPU::V_MUL_HI_I32_e64;
8894 MachineInstr *HiHalf =
8897 MachineInstr *LoHalf =
8908 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8916 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8932 const MCInstrDesc &InstDesc =
get(Opcode);
8933 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8935 &AMDGPU::SGPR_32RegClass;
8937 const TargetRegisterClass *Src0SubRC =
8938 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8939 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8941 &AMDGPU::SGPR_32RegClass;
8943 const TargetRegisterClass *Src1SubRC =
8944 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8947 AMDGPU::sub0, Src0SubRC);
8949 AMDGPU::sub0, Src1SubRC);
8951 AMDGPU::sub1, Src0SubRC);
8953 AMDGPU::sub1, Src1SubRC);
8955 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8956 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8957 const TargetRegisterClass *NewDestSubRC =
8958 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8960 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8961 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8965 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8966 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8970 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8977 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8979 Worklist.
insert(&LoHalf);
8980 Worklist.
insert(&HiHalf);
8983 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8999 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9001 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9003 MachineOperand* Op0;
9004 MachineOperand* Op1;
9017 Register NewDest =
MRI.createVirtualRegister(DestRC);
9023 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9039 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9040 const TargetRegisterClass *SrcRC = Src.isReg() ?
9041 MRI.getRegClass(Src.getReg()) :
9042 &AMDGPU::SGPR_32RegClass;
9044 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9045 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9047 const TargetRegisterClass *SrcSubRC =
9048 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9051 AMDGPU::sub0, SrcSubRC);
9053 AMDGPU::sub1, SrcSubRC);
9059 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9063 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9082 Offset == 0 &&
"Not implemented");
9085 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9086 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9087 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9104 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9105 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9110 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9111 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9115 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9118 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9123 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9124 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9143 const MCInstrDesc &InstDesc =
get(Opcode);
9145 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9146 unsigned OpcodeAdd =
9147 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9149 const TargetRegisterClass *SrcRC =
9150 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9151 const TargetRegisterClass *SrcSubRC =
9152 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9154 MachineOperand SrcRegSub0 =
9156 MachineOperand SrcRegSub1 =
9159 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9160 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9161 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9162 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9169 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9175 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9177 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9179 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9182void SIInstrInfo::addUsersToMoveToVALUWorklist(
9186 MachineInstr &
UseMI = *MO.getParent();
9190 switch (
UseMI.getOpcode()) {
9193 case AMDGPU::SOFT_WQM:
9194 case AMDGPU::STRICT_WWM:
9195 case AMDGPU::STRICT_WQM:
9196 case AMDGPU::REG_SEQUENCE:
9198 case AMDGPU::INSERT_SUBREG:
9201 OpNo = MO.getOperandNo();
9206 MRI.constrainRegClass(DstReg, OpRC);
9208 if (!RI.hasVectorRegisters(OpRC))
9219 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9225 if (ST.useRealTrue16Insts()) {
9228 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9235 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9241 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9242 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9244 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9246 case AMDGPU::S_PACK_LL_B32_B16:
9249 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9250 .addImm(AMDGPU::lo16)
9252 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9253 .addImm(AMDGPU::hi16);
9255 case AMDGPU::S_PACK_LH_B32_B16:
9258 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9259 .addImm(AMDGPU::lo16)
9260 .addReg(SrcReg1, 0, AMDGPU::hi16)
9261 .addImm(AMDGPU::hi16);
9263 case AMDGPU::S_PACK_HL_B32_B16:
9264 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9265 .addImm(AMDGPU::lo16)
9267 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9268 .addImm(AMDGPU::hi16);
9270 case AMDGPU::S_PACK_HH_B32_B16:
9271 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9272 .addImm(AMDGPU::lo16)
9273 .addReg(SrcReg1, 0, AMDGPU::hi16)
9274 .addImm(AMDGPU::hi16);
9281 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9282 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9287 case AMDGPU::S_PACK_LL_B32_B16: {
9288 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9289 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9306 case AMDGPU::S_PACK_LH_B32_B16: {
9307 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9316 case AMDGPU::S_PACK_HL_B32_B16: {
9317 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9327 case AMDGPU::S_PACK_HH_B32_B16: {
9328 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9329 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9346 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9347 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9356 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9357 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9358 SmallVector<MachineInstr *, 4> CopyToDelete;
9361 for (MachineInstr &
MI :
9365 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9368 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9369 Register DestReg =
MI.getOperand(0).getReg();
9371 MRI.replaceRegWith(DestReg, NewCond);
9376 MI.getOperand(SCCIdx).setReg(NewCond);
9382 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9385 for (
auto &Copy : CopyToDelete)
9386 Copy->eraseFromParent();
9394void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9400 for (MachineInstr &
MI :
9403 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9405 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9414 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9422 case AMDGPU::REG_SEQUENCE:
9423 case AMDGPU::INSERT_SUBREG:
9425 case AMDGPU::SOFT_WQM:
9426 case AMDGPU::STRICT_WWM:
9427 case AMDGPU::STRICT_WQM: {
9429 if (RI.isAGPRClass(SrcRC)) {
9430 if (RI.isAGPRClass(NewDstRC))
9435 case AMDGPU::REG_SEQUENCE:
9436 case AMDGPU::INSERT_SUBREG:
9437 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9440 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9446 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9449 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9463 int OpIndices[3])
const {
9464 const MCInstrDesc &
Desc =
MI.getDesc();
9480 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9482 for (
unsigned i = 0; i < 3; ++i) {
9483 int Idx = OpIndices[i];
9487 const MachineOperand &MO =
MI.getOperand(Idx);
9493 const TargetRegisterClass *OpRC =
9494 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9495 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9501 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9502 if (RI.isSGPRClass(RegRC))
9520 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9521 SGPRReg = UsedSGPRs[0];
9524 if (!SGPRReg && UsedSGPRs[1]) {
9525 if (UsedSGPRs[1] == UsedSGPRs[2])
9526 SGPRReg = UsedSGPRs[1];
9533 AMDGPU::OpName OperandName)
const {
9534 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9537 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9541 return &
MI.getOperand(Idx);
9555 if (ST.isAmdHsaOS()) {
9558 RsrcDataFormat |= (1ULL << 56);
9563 RsrcDataFormat |= (2ULL << 59);
9566 return RsrcDataFormat;
9576 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9581 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9588 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9594 unsigned Opc =
MI.getOpcode();
9600 return get(
Opc).mayLoad() &&
9605 int &FrameIndex)
const {
9607 if (!Addr || !Addr->
isFI())
9618 int &FrameIndex)
const {
9626 int &FrameIndex)
const {
9640 int &FrameIndex)
const {
9657 while (++
I != E &&
I->isInsideBundle()) {
9658 assert(!
I->isBundle() &&
"No nested bundle!");
9666 unsigned Opc =
MI.getOpcode();
9668 unsigned DescSize =
Desc.getSize();
9673 unsigned Size = DescSize;
9677 if (
MI.isBranch() && ST.hasOffset3fBug())
9688 bool HasLiteral =
false;
9689 unsigned LiteralSize = 4;
9690 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9695 if (ST.has64BitLiterals()) {
9696 switch (OpInfo.OperandType) {
9712 return HasLiteral ? DescSize + LiteralSize : DescSize;
9717 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9721 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9722 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9726 case TargetOpcode::BUNDLE:
9728 case TargetOpcode::INLINEASM:
9729 case TargetOpcode::INLINEASM_BR: {
9731 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9735 if (
MI.isMetaInstruction())
9739 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9742 unsigned LoInstOpcode = D16Info->LoOp;
9744 DescSize =
Desc.getSize();
9748 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9751 DescSize =
Desc.getSize();
9762 if (
MI.memoperands_empty())
9774 static const std::pair<int, const char *> TargetIndices[] = {
9812std::pair<unsigned, unsigned>
9819 static const std::pair<unsigned, const char *> TargetFlags[] = {
9837 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9852 return AMDGPU::WWM_COPY;
9854 return AMDGPU::COPY;
9866 bool IsNullOrVectorRegister =
true;
9869 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9874 return IsNullOrVectorRegister &&
9876 (Opcode == AMDGPU::IMPLICIT_DEF &&
9878 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9879 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9887 if (ST.hasAddNoCarry())
9891 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9892 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9903 if (ST.hasAddNoCarry())
9907 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9909 : RS.scavengeRegisterBackwards(
9910 *RI.getBoolRC(),
I,
false,
9923 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9924 case AMDGPU::SI_KILL_I1_TERMINATOR:
9933 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9934 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9935 case AMDGPU::SI_KILL_I1_PSEUDO:
9936 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9948 const unsigned OffsetBits =
9950 return (1 << OffsetBits) - 1;
9957 if (
MI.isInlineAsm())
9960 for (
auto &
Op :
MI.implicit_operands()) {
9961 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9962 Op.setReg(AMDGPU::VCC_LO);
9971 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9975 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
9976 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9993 if (Imm <= MaxImm + 64) {
9995 Overflow = Imm - MaxImm;
10014 if (Overflow > 0) {
10022 if (ST.hasRestrictedSOffset())
10027 SOffset = Overflow;
10065 if (!ST.hasFlatInstOffsets())
10073 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10085std::pair<int64_t, int64_t>
10088 int64_t RemainderOffset = COffsetVal;
10089 int64_t ImmField = 0;
10094 if (AllowNegative) {
10096 int64_t
D = 1LL << NumBits;
10097 RemainderOffset = (COffsetVal /
D) *
D;
10098 ImmField = COffsetVal - RemainderOffset;
10100 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10102 (ImmField % 4) != 0) {
10104 RemainderOffset += ImmField % 4;
10105 ImmField -= ImmField % 4;
10107 }
else if (COffsetVal >= 0) {
10109 RemainderOffset = COffsetVal - ImmField;
10113 assert(RemainderOffset + ImmField == COffsetVal);
10114 return {ImmField, RemainderOffset};
10118 if (ST.hasNegativeScratchOffsetBug() &&
10126 switch (ST.getGeneration()) {
10152 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10153 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10154 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10155 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10156 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10157 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10158 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10159 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10166#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10167 case OPCODE##_dpp: \
10168 case OPCODE##_e32: \
10169 case OPCODE##_e64: \
10170 case OPCODE##_e64_dpp: \
10171 case OPCODE##_sdwa:
10185 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10186 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10187 case AMDGPU::V_FMA_F16_gfx9_e64:
10188 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10189 case AMDGPU::V_INTERP_P2_F16:
10190 case AMDGPU::V_MAD_F16_e64:
10191 case AMDGPU::V_MAD_U16_e64:
10192 case AMDGPU::V_MAD_I16_e64:
10214 switch (ST.getGeneration()) {
10227 if (
isMAI(Opcode)) {
10235 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10242 if (ST.hasGFX90AInsts()) {
10244 if (ST.hasGFX940Insts())
10275 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10276 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10277 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10289 switch (
MI.getOpcode()) {
10291 case AMDGPU::REG_SEQUENCE:
10295 case AMDGPU::INSERT_SUBREG:
10296 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10313 if (!
P.Reg.isVirtual())
10317 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10318 while (
auto *
MI = DefInst) {
10320 switch (
MI->getOpcode()) {
10322 case AMDGPU::V_MOV_B32_e32: {
10323 auto &Op1 =
MI->getOperand(1);
10328 DefInst =
MRI.getVRegDef(RSR.Reg);
10336 DefInst =
MRI.getVRegDef(RSR.Reg);
10349 assert(
MRI.isSSA() &&
"Must be run on SSA");
10351 auto *
TRI =
MRI.getTargetRegisterInfo();
10352 auto *DefBB =
DefMI.getParent();
10356 if (
UseMI.getParent() != DefBB)
10359 const int MaxInstScan = 20;
10363 auto E =
UseMI.getIterator();
10364 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10365 if (
I->isDebugInstr())
10368 if (++NumInst > MaxInstScan)
10371 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10381 assert(
MRI.isSSA() &&
"Must be run on SSA");
10383 auto *
TRI =
MRI.getTargetRegisterInfo();
10384 auto *DefBB =
DefMI.getParent();
10386 const int MaxUseScan = 10;
10389 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10390 auto &UseInst = *
Use.getParent();
10393 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10396 if (++NumUse > MaxUseScan)
10403 const int MaxInstScan = 20;
10407 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10410 if (
I->isDebugInstr())
10413 if (++NumInst > MaxInstScan)
10426 if (Reg == VReg && --NumUse == 0)
10428 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10437 auto Cur =
MBB.begin();
10438 if (Cur !=
MBB.end())
10440 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10443 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10452 if (InsPt !=
MBB.end() &&
10453 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10454 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10455 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10456 InsPt->definesRegister(Src,
nullptr)) {
10460 .
addReg(Src, 0, SrcSubReg)
10485 if (isFullCopyInstr(
MI)) {
10486 Register DstReg =
MI.getOperand(0).getReg();
10487 Register SrcReg =
MI.getOperand(1).getReg();
10494 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10498 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10509 unsigned *PredCost)
const {
10510 if (
MI.isBundle()) {
10513 unsigned Lat = 0,
Count = 0;
10514 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10516 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10518 return Lat +
Count - 1;
10521 return SchedModel.computeInstrLatency(&
MI);
10527 unsigned Opcode =
MI.getOpcode();
10532 :
MI.getOperand(1).getReg();
10533 LLT DstTy =
MRI.getType(Dst);
10534 LLT SrcTy =
MRI.getType(Src);
10536 unsigned SrcAS = SrcTy.getAddressSpace();
10539 ST.hasGloballyAddressableScratch()
10547 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10548 return HandleAddrSpaceCast(
MI);
10551 auto IID = GI->getIntrinsicID();
10558 case Intrinsic::amdgcn_addrspacecast_nonnull:
10559 return HandleAddrSpaceCast(
MI);
10560 case Intrinsic::amdgcn_if:
10561 case Intrinsic::amdgcn_else:
10575 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10576 Opcode == AMDGPU::G_SEXTLOAD) {
10577 if (
MI.memoperands_empty())
10581 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10582 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10590 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10591 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10592 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10605 unsigned opcode =
MI.getOpcode();
10606 if (opcode == AMDGPU::V_READLANE_B32 ||
10607 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10608 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10611 if (isCopyInstr(
MI)) {
10615 RI.getPhysRegBaseClass(srcOp.
getReg());
10623 if (
MI.isPreISelOpcode())
10638 if (
MI.memoperands_empty())
10642 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10643 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10658 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10660 if (!
SrcOp.isReg())
10664 if (!Reg || !
SrcOp.readsReg())
10670 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10697 F,
"ds_ordered_count unsupported for this calling conv"));
10711 Register &SrcReg2, int64_t &CmpMask,
10712 int64_t &CmpValue)
const {
10713 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10716 switch (
MI.getOpcode()) {
10719 case AMDGPU::S_CMP_EQ_U32:
10720 case AMDGPU::S_CMP_EQ_I32:
10721 case AMDGPU::S_CMP_LG_U32:
10722 case AMDGPU::S_CMP_LG_I32:
10723 case AMDGPU::S_CMP_LT_U32:
10724 case AMDGPU::S_CMP_LT_I32:
10725 case AMDGPU::S_CMP_GT_U32:
10726 case AMDGPU::S_CMP_GT_I32:
10727 case AMDGPU::S_CMP_LE_U32:
10728 case AMDGPU::S_CMP_LE_I32:
10729 case AMDGPU::S_CMP_GE_U32:
10730 case AMDGPU::S_CMP_GE_I32:
10731 case AMDGPU::S_CMP_EQ_U64:
10732 case AMDGPU::S_CMP_LG_U64:
10733 SrcReg =
MI.getOperand(0).getReg();
10734 if (
MI.getOperand(1).isReg()) {
10735 if (
MI.getOperand(1).getSubReg())
10737 SrcReg2 =
MI.getOperand(1).getReg();
10739 }
else if (
MI.getOperand(1).isImm()) {
10741 CmpValue =
MI.getOperand(1).getImm();
10747 case AMDGPU::S_CMPK_EQ_U32:
10748 case AMDGPU::S_CMPK_EQ_I32:
10749 case AMDGPU::S_CMPK_LG_U32:
10750 case AMDGPU::S_CMPK_LG_I32:
10751 case AMDGPU::S_CMPK_LT_U32:
10752 case AMDGPU::S_CMPK_LT_I32:
10753 case AMDGPU::S_CMPK_GT_U32:
10754 case AMDGPU::S_CMPK_GT_I32:
10755 case AMDGPU::S_CMPK_LE_U32:
10756 case AMDGPU::S_CMPK_LE_I32:
10757 case AMDGPU::S_CMPK_GE_U32:
10758 case AMDGPU::S_CMPK_GE_I32:
10759 SrcReg =
MI.getOperand(0).getReg();
10761 CmpValue =
MI.getOperand(1).getImm();
10780 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10782 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10787 SccDef->setIsDead(
false);
10795 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10796 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10798 bool Op1IsNonZeroImm =
10799 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10800 bool Op2IsZeroImm =
10801 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10802 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10808 Register SrcReg2, int64_t CmpMask,
10817 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10849 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10850 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10856 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
10871 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10872 this](int64_t ExpectedValue,
unsigned SrcSize,
10873 bool IsReversible,
bool IsSigned) ->
bool {
10901 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10902 Def->getOpcode() != AMDGPU::S_AND_B64)
10906 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10917 SrcOp = &Def->getOperand(2);
10918 else if (isMask(&Def->getOperand(2)))
10919 SrcOp = &Def->getOperand(1);
10927 if (IsSigned && BitNo == SrcSize - 1)
10930 ExpectedValue <<= BitNo;
10932 bool IsReversedCC =
false;
10933 if (CmpValue != ExpectedValue) {
10936 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10941 Register DefReg = Def->getOperand(0).getReg();
10942 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10948 if (!
MRI->use_nodbg_empty(DefReg)) {
10956 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10957 : AMDGPU::S_BITCMP1_B32
10958 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10959 : AMDGPU::S_BITCMP1_B64;
10964 Def->eraseFromParent();
10972 case AMDGPU::S_CMP_EQ_U32:
10973 case AMDGPU::S_CMP_EQ_I32:
10974 case AMDGPU::S_CMPK_EQ_U32:
10975 case AMDGPU::S_CMPK_EQ_I32:
10976 return optimizeCmpAnd(1, 32,
true,
false);
10977 case AMDGPU::S_CMP_GE_U32:
10978 case AMDGPU::S_CMPK_GE_U32:
10979 return optimizeCmpAnd(1, 32,
false,
false);
10980 case AMDGPU::S_CMP_GE_I32:
10981 case AMDGPU::S_CMPK_GE_I32:
10982 return optimizeCmpAnd(1, 32,
false,
true);
10983 case AMDGPU::S_CMP_EQ_U64:
10984 return optimizeCmpAnd(1, 64,
true,
false);
10985 case AMDGPU::S_CMP_LG_U32:
10986 case AMDGPU::S_CMP_LG_I32:
10987 case AMDGPU::S_CMPK_LG_U32:
10988 case AMDGPU::S_CMPK_LG_I32:
10989 return optimizeCmpAnd(0, 32,
true,
false) || optimizeCmpSelect();
10990 case AMDGPU::S_CMP_GT_U32:
10991 case AMDGPU::S_CMPK_GT_U32:
10992 return optimizeCmpAnd(0, 32,
false,
false);
10993 case AMDGPU::S_CMP_GT_I32:
10994 case AMDGPU::S_CMPK_GT_I32:
10995 return optimizeCmpAnd(0, 32,
false,
true);
10996 case AMDGPU::S_CMP_LG_U64:
10997 return optimizeCmpAnd(0, 64,
true,
false) || optimizeCmpSelect();
11004 AMDGPU::OpName
OpName)
const {
11005 if (!ST.needsAlignedVGPRs())
11008 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11020 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11022 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11025 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11026 : &AMDGPU::VReg_64_Align2RegClass);
11028 .
addReg(DataReg, 0,
Op.getSubReg())
11033 Op.setSubReg(AMDGPU::sub0);
11055 unsigned Opcode =
MI.getOpcode();
11061 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11062 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11065 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.