33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
184 if (!
MI.isConvergent())
187 switch (
MI.getOpcode()) {
190 case AMDGPU::V_READFIRSTLANE_B32:
207 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
212 for (
auto Op :
MI.uses()) {
213 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
214 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
219 if (FromCycle ==
nullptr)
225 while (FromCycle && !FromCycle->
contains(ToCycle)) {
245 int64_t &Offset1)
const {
253 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
257 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
273 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
274 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
275 if (Offset0Idx == -1 || Offset1Idx == -1)
282 Offset0Idx -=
get(Opc0).NumDefs;
283 Offset1Idx -=
get(Opc1).NumDefs;
313 if (!Load0Offset || !Load1Offset)
330 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
331 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
333 if (OffIdx0 == -1 || OffIdx1 == -1)
339 OffIdx0 -=
get(Opc0).NumDefs;
340 OffIdx1 -=
get(Opc1).NumDefs;
359 case AMDGPU::DS_READ2ST64_B32:
360 case AMDGPU::DS_READ2ST64_B64:
361 case AMDGPU::DS_WRITE2ST64_B32:
362 case AMDGPU::DS_WRITE2ST64_B64:
377 OffsetIsScalable =
false;
394 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
396 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
397 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
410 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
411 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
412 if (Offset0 + 1 != Offset1)
423 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
431 Offset = EltSize * Offset0;
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
434 if (DataOpIdx == -1) {
435 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
437 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
453 if (BaseOp && !BaseOp->
isFI())
461 if (SOffset->
isReg())
467 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
469 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
478 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
479 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
481 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
482 if (VAddr0Idx >= 0) {
484 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
491 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
506 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
523 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
525 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
542 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
550 if (MO1->getAddrSpace() != MO2->getAddrSpace())
553 const auto *Base1 = MO1->getValue();
554 const auto *Base2 = MO2->getValue();
555 if (!Base1 || !Base2)
563 return Base1 == Base2;
567 int64_t Offset1,
bool OffsetIsScalable1,
569 int64_t Offset2,
bool OffsetIsScalable2,
570 unsigned ClusterSize,
571 unsigned NumBytes)
const {
584 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
603 const unsigned LoadSize = NumBytes / ClusterSize;
604 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
605 return NumDWords <= MaxMemoryClusterDWords;
619 int64_t Offset0, int64_t Offset1,
620 unsigned NumLoads)
const {
621 assert(Offset1 > Offset0 &&
622 "Second offset should be larger than first offset!");
627 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
634 const char *Msg =
"illegal VGPR to SGPR copy") {
655 assert((
TII.getSubtarget().hasMAIInsts() &&
656 !
TII.getSubtarget().hasGFX90AInsts()) &&
657 "Expected GFX908 subtarget.");
660 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
661 "Source register of the copy should be either an SGPR or an AGPR.");
664 "Destination register of the copy should be an AGPR.");
673 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
676 if (!Def->modifiesRegister(SrcReg, &RI))
679 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
680 Def->getOperand(0).getReg() != SrcReg)
687 bool SafeToPropagate =
true;
690 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
691 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
692 SafeToPropagate =
false;
694 if (!SafeToPropagate)
697 for (
auto I = Def;
I !=
MI; ++
I)
698 I->clearRegisterKills(DefOp.
getReg(), &RI);
707 if (ImpUseSuperReg) {
708 Builder.addReg(ImpUseSuperReg,
716 RS.enterBasicBlockEnd(
MBB);
717 RS.backward(std::next(
MI));
726 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
729 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
730 "VGPR used for an intermediate copy should have been reserved.");
735 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
745 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
746 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
747 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
754 if (ImpUseSuperReg) {
755 UseBuilder.
addReg(ImpUseSuperReg,
776 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
777 int16_t SubIdx = BaseIndices[Idx];
778 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
779 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
780 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
781 unsigned Opcode = AMDGPU::S_MOV_B32;
784 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
785 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
786 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
790 DestSubReg = RI.getSubReg(DestReg, SubIdx);
791 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
792 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
793 Opcode = AMDGPU::S_MOV_B64;
808 assert(FirstMI && LastMI);
816 LastMI->addRegisterKilled(SrcReg, &RI);
822 Register SrcReg,
bool KillSrc,
bool RenamableDest,
823 bool RenamableSrc)
const {
825 unsigned Size = RI.getRegSizeInBits(*RC);
827 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
833 if (((
Size == 16) != (SrcSize == 16))) {
835 assert(ST.useRealTrue16Insts());
840 if (DestReg == SrcReg) {
846 RC = RI.getPhysRegBaseClass(DestReg);
847 Size = RI.getRegSizeInBits(*RC);
848 SrcRC = RI.getPhysRegBaseClass(SrcReg);
849 SrcSize = RI.getRegSizeInBits(*SrcRC);
853 if (RC == &AMDGPU::VGPR_32RegClass) {
855 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
856 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
857 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
858 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
864 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
865 RC == &AMDGPU::SReg_32RegClass) {
866 if (SrcReg == AMDGPU::SCC) {
873 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
874 if (DestReg == AMDGPU::VCC_LO) {
892 if (RC == &AMDGPU::SReg_64RegClass) {
893 if (SrcReg == AMDGPU::SCC) {
900 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
901 if (DestReg == AMDGPU::VCC) {
919 if (DestReg == AMDGPU::SCC) {
922 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
926 assert(ST.hasScalarCompareEq64());
940 if (RC == &AMDGPU::AGPR_32RegClass) {
941 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
942 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
948 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
957 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
964 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
965 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
967 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
968 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
969 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
970 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
973 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
974 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
987 if (IsAGPRDst || IsAGPRSrc) {
988 if (!DstLow || !SrcLow) {
990 "Cannot use hi16 subreg with an AGPR!");
997 if (ST.useRealTrue16Insts()) {
1003 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1004 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1016 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1017 if (!DstLow || !SrcLow) {
1019 "Cannot use hi16 subreg on VI!");
1042 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1043 if (ST.hasMovB64()) {
1048 if (ST.hasPkMovB32()) {
1064 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1065 if (RI.isSGPRClass(RC)) {
1066 if (!RI.isSGPRClass(SrcRC)) {
1070 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1076 unsigned EltSize = 4;
1077 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1078 if (RI.isAGPRClass(RC)) {
1079 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1080 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1081 else if (RI.hasVGPRs(SrcRC) ||
1082 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1083 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1085 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1086 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1087 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1088 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1089 (RI.isProperlyAlignedRC(*RC) &&
1090 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1092 if (ST.hasMovB64()) {
1093 Opcode = AMDGPU::V_MOV_B64_e32;
1095 }
else if (ST.hasPkMovB32()) {
1096 Opcode = AMDGPU::V_PK_MOV_B32;
1106 std::unique_ptr<RegScavenger> RS;
1107 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1108 RS = std::make_unique<RegScavenger>();
1114 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1115 const bool CanKillSuperReg = KillSrc && !Overlap;
1117 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1120 SubIdx = SubIndices[Idx];
1122 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1123 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1124 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1125 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1127 bool IsFirstSubreg = Idx == 0;
1128 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1130 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1134 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1135 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1181 return &AMDGPU::VGPR_32RegClass;
1193 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1194 "Not a VGPR32 reg");
1196 if (
Cond.size() == 1) {
1197 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1206 }
else if (
Cond.size() == 2) {
1207 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1209 case SIInstrInfo::SCC_TRUE: {
1210 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1220 case SIInstrInfo::SCC_FALSE: {
1221 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1231 case SIInstrInfo::VCCNZ: {
1234 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1245 case SIInstrInfo::VCCZ: {
1248 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1259 case SIInstrInfo::EXECNZ: {
1260 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1261 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1272 case SIInstrInfo::EXECZ: {
1273 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1274 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1299 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1312 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1322 int64_t &ImmVal)
const {
1323 switch (
MI.getOpcode()) {
1324 case AMDGPU::V_MOV_B32_e32:
1325 case AMDGPU::S_MOV_B32:
1326 case AMDGPU::S_MOVK_I32:
1327 case AMDGPU::S_MOV_B64:
1328 case AMDGPU::V_MOV_B64_e32:
1329 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1330 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1331 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1332 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1333 case AMDGPU::V_MOV_B64_PSEUDO:
1334 case AMDGPU::V_MOV_B16_t16_e32: {
1338 return MI.getOperand(0).getReg() == Reg;
1343 case AMDGPU::V_MOV_B16_t16_e64: {
1345 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1347 return MI.getOperand(0).getReg() == Reg;
1352 case AMDGPU::S_BREV_B32:
1353 case AMDGPU::V_BFREV_B32_e32:
1354 case AMDGPU::V_BFREV_B32_e64: {
1358 return MI.getOperand(0).getReg() == Reg;
1363 case AMDGPU::S_NOT_B32:
1364 case AMDGPU::V_NOT_B32_e32:
1365 case AMDGPU::V_NOT_B32_e64: {
1368 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1369 return MI.getOperand(0).getReg() == Reg;
1379std::optional<int64_t>
1384 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1385 return std::nullopt;
1388 if (Def && Def->isMoveImmediate()) {
1394 return std::nullopt;
1399 if (RI.isAGPRClass(DstRC))
1400 return AMDGPU::COPY;
1401 if (RI.getRegSizeInBits(*DstRC) == 16) {
1404 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1406 if (RI.getRegSizeInBits(*DstRC) == 32)
1407 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1408 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1409 return AMDGPU::S_MOV_B64;
1410 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1411 return AMDGPU::V_MOV_B64_PSEUDO;
1412 return AMDGPU::COPY;
1417 bool IsIndirectSrc)
const {
1418 if (IsIndirectSrc) {
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1432 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1434 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1436 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1438 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1440 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1442 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1444 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1445 if (VecSize <= 1024)
1446 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1460 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1462 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1464 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1466 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1468 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1470 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1472 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1474 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1476 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1477 if (VecSize <= 1024)
1478 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1497 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1499 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1501 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1503 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1505 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1507 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1509 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1510 if (VecSize <= 1024)
1511 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1524 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1526 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1538 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1540 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1542 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1543 if (VecSize <= 1024)
1544 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1551 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1553 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1555 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1557 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1558 if (VecSize <= 1024)
1559 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1566 bool IsSGPR)
const {
1578 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1585 return AMDGPU::SI_SPILL_S32_SAVE;
1587 return AMDGPU::SI_SPILL_S64_SAVE;
1589 return AMDGPU::SI_SPILL_S96_SAVE;
1591 return AMDGPU::SI_SPILL_S128_SAVE;
1593 return AMDGPU::SI_SPILL_S160_SAVE;
1595 return AMDGPU::SI_SPILL_S192_SAVE;
1597 return AMDGPU::SI_SPILL_S224_SAVE;
1599 return AMDGPU::SI_SPILL_S256_SAVE;
1601 return AMDGPU::SI_SPILL_S288_SAVE;
1603 return AMDGPU::SI_SPILL_S320_SAVE;
1605 return AMDGPU::SI_SPILL_S352_SAVE;
1607 return AMDGPU::SI_SPILL_S384_SAVE;
1609 return AMDGPU::SI_SPILL_S512_SAVE;
1611 return AMDGPU::SI_SPILL_S1024_SAVE;
1620 return AMDGPU::SI_SPILL_V16_SAVE;
1622 return AMDGPU::SI_SPILL_V32_SAVE;
1624 return AMDGPU::SI_SPILL_V64_SAVE;
1626 return AMDGPU::SI_SPILL_V96_SAVE;
1628 return AMDGPU::SI_SPILL_V128_SAVE;
1630 return AMDGPU::SI_SPILL_V160_SAVE;
1632 return AMDGPU::SI_SPILL_V192_SAVE;
1634 return AMDGPU::SI_SPILL_V224_SAVE;
1636 return AMDGPU::SI_SPILL_V256_SAVE;
1638 return AMDGPU::SI_SPILL_V288_SAVE;
1640 return AMDGPU::SI_SPILL_V320_SAVE;
1642 return AMDGPU::SI_SPILL_V352_SAVE;
1644 return AMDGPU::SI_SPILL_V384_SAVE;
1646 return AMDGPU::SI_SPILL_V512_SAVE;
1648 return AMDGPU::SI_SPILL_V1024_SAVE;
1657 return AMDGPU::SI_SPILL_AV32_SAVE;
1659 return AMDGPU::SI_SPILL_AV64_SAVE;
1661 return AMDGPU::SI_SPILL_AV96_SAVE;
1663 return AMDGPU::SI_SPILL_AV128_SAVE;
1665 return AMDGPU::SI_SPILL_AV160_SAVE;
1667 return AMDGPU::SI_SPILL_AV192_SAVE;
1669 return AMDGPU::SI_SPILL_AV224_SAVE;
1671 return AMDGPU::SI_SPILL_AV256_SAVE;
1673 return AMDGPU::SI_SPILL_AV288_SAVE;
1675 return AMDGPU::SI_SPILL_AV320_SAVE;
1677 return AMDGPU::SI_SPILL_AV352_SAVE;
1679 return AMDGPU::SI_SPILL_AV384_SAVE;
1681 return AMDGPU::SI_SPILL_AV512_SAVE;
1683 return AMDGPU::SI_SPILL_AV1024_SAVE;
1690 bool IsVectorSuperClass) {
1695 if (IsVectorSuperClass)
1696 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1698 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1704 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1711 if (ST.hasMAIInsts())
1730 FrameInfo.getObjectAlign(FrameIndex));
1731 unsigned SpillSize = RI.getSpillSize(*RC);
1734 if (RI.isSGPRClass(RC)) {
1736 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1737 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1738 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1746 if (SrcReg.
isVirtual() && SpillSize == 4) {
1747 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1756 if (RI.spillSGPRToVGPR())
1776 return AMDGPU::SI_SPILL_S32_RESTORE;
1778 return AMDGPU::SI_SPILL_S64_RESTORE;
1780 return AMDGPU::SI_SPILL_S96_RESTORE;
1782 return AMDGPU::SI_SPILL_S128_RESTORE;
1784 return AMDGPU::SI_SPILL_S160_RESTORE;
1786 return AMDGPU::SI_SPILL_S192_RESTORE;
1788 return AMDGPU::SI_SPILL_S224_RESTORE;
1790 return AMDGPU::SI_SPILL_S256_RESTORE;
1792 return AMDGPU::SI_SPILL_S288_RESTORE;
1794 return AMDGPU::SI_SPILL_S320_RESTORE;
1796 return AMDGPU::SI_SPILL_S352_RESTORE;
1798 return AMDGPU::SI_SPILL_S384_RESTORE;
1800 return AMDGPU::SI_SPILL_S512_RESTORE;
1802 return AMDGPU::SI_SPILL_S1024_RESTORE;
1811 return AMDGPU::SI_SPILL_V16_RESTORE;
1813 return AMDGPU::SI_SPILL_V32_RESTORE;
1815 return AMDGPU::SI_SPILL_V64_RESTORE;
1817 return AMDGPU::SI_SPILL_V96_RESTORE;
1819 return AMDGPU::SI_SPILL_V128_RESTORE;
1821 return AMDGPU::SI_SPILL_V160_RESTORE;
1823 return AMDGPU::SI_SPILL_V192_RESTORE;
1825 return AMDGPU::SI_SPILL_V224_RESTORE;
1827 return AMDGPU::SI_SPILL_V256_RESTORE;
1829 return AMDGPU::SI_SPILL_V288_RESTORE;
1831 return AMDGPU::SI_SPILL_V320_RESTORE;
1833 return AMDGPU::SI_SPILL_V352_RESTORE;
1835 return AMDGPU::SI_SPILL_V384_RESTORE;
1837 return AMDGPU::SI_SPILL_V512_RESTORE;
1839 return AMDGPU::SI_SPILL_V1024_RESTORE;
1848 return AMDGPU::SI_SPILL_AV32_RESTORE;
1850 return AMDGPU::SI_SPILL_AV64_RESTORE;
1852 return AMDGPU::SI_SPILL_AV96_RESTORE;
1854 return AMDGPU::SI_SPILL_AV128_RESTORE;
1856 return AMDGPU::SI_SPILL_AV160_RESTORE;
1858 return AMDGPU::SI_SPILL_AV192_RESTORE;
1860 return AMDGPU::SI_SPILL_AV224_RESTORE;
1862 return AMDGPU::SI_SPILL_AV256_RESTORE;
1864 return AMDGPU::SI_SPILL_AV288_RESTORE;
1866 return AMDGPU::SI_SPILL_AV320_RESTORE;
1868 return AMDGPU::SI_SPILL_AV352_RESTORE;
1870 return AMDGPU::SI_SPILL_AV384_RESTORE;
1872 return AMDGPU::SI_SPILL_AV512_RESTORE;
1874 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1881 bool IsVectorSuperClass) {
1886 if (IsVectorSuperClass)
1887 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1889 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1895 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1902 if (ST.hasMAIInsts())
1905 assert(!RI.isAGPRClass(RC));
1919 unsigned SpillSize = RI.getSpillSize(*RC);
1926 FrameInfo.getObjectAlign(FrameIndex));
1928 if (RI.isSGPRClass(RC)) {
1930 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1931 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1932 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1937 if (DestReg.
isVirtual() && SpillSize == 4) {
1939 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1942 if (RI.spillSGPRToVGPR())
1968 unsigned Quantity)
const {
1970 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1971 while (Quantity > 0) {
1972 unsigned Arg = std::min(Quantity, MaxSNopCount);
1979 auto *MF =
MBB.getParent();
1982 assert(Info->isEntryFunction());
1984 if (
MBB.succ_empty()) {
1985 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1986 if (HasNoTerminator) {
1987 if (Info->returnsVoid()) {
2001 constexpr unsigned DoorbellIDMask = 0x3ff;
2002 constexpr unsigned ECQueueWaveAbort = 0x400;
2007 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2008 MBB.splitAt(
MI,
false);
2012 MBB.addSuccessor(TrapBB);
2018 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2025 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2026 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2030 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2031 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2032 .
addUse(DoorbellRegMasked)
2033 .
addImm(ECQueueWaveAbort);
2034 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2035 .
addUse(SetWaveAbortBit);
2038 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2049 return MBB.getNextNode();
2053 switch (
MI.getOpcode()) {
2055 if (
MI.isMetaInstruction())
2060 return MI.getOperand(0).getImm() + 1;
2070 switch (
MI.getOpcode()) {
2072 case AMDGPU::S_MOV_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2078 case AMDGPU::S_MOV_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2084 case AMDGPU::S_XOR_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2090 case AMDGPU::S_XOR_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2095 case AMDGPU::S_OR_B64_term:
2098 MI.setDesc(
get(AMDGPU::S_OR_B64));
2100 case AMDGPU::S_OR_B32_term:
2103 MI.setDesc(
get(AMDGPU::S_OR_B32));
2106 case AMDGPU::S_ANDN2_B64_term:
2109 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2112 case AMDGPU::S_ANDN2_B32_term:
2115 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2118 case AMDGPU::S_AND_B64_term:
2121 MI.setDesc(
get(AMDGPU::S_AND_B64));
2124 case AMDGPU::S_AND_B32_term:
2127 MI.setDesc(
get(AMDGPU::S_AND_B32));
2130 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2133 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2136 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2139 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2142 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2143 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2146 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2147 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2149 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2153 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2156 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2159 int64_t Imm =
MI.getOperand(1).getImm();
2161 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2162 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2169 MI.eraseFromParent();
2175 case AMDGPU::V_MOV_B64_PSEUDO: {
2177 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2178 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2186 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2187 MI.setDesc(Mov64Desc);
2192 if (
SrcOp.isImm()) {
2194 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2195 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2221 if (ST.hasPkMovB32() &&
2242 MI.eraseFromParent();
2245 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2249 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2253 if (ST.has64BitLiterals()) {
2254 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2260 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2265 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2266 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2268 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2269 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2276 MI.eraseFromParent();
2279 case AMDGPU::V_SET_INACTIVE_B32: {
2283 .
add(
MI.getOperand(3))
2284 .
add(
MI.getOperand(4))
2285 .
add(
MI.getOperand(1))
2286 .
add(
MI.getOperand(2))
2287 .
add(
MI.getOperand(5));
2288 MI.eraseFromParent();
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2327 if (RI.hasVGPRs(EltRC)) {
2328 Opc = AMDGPU::V_MOVRELD_B32_e32;
2330 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2331 : AMDGPU::S_MOVRELD_B32;
2336 bool IsUndef =
MI.getOperand(1).isUndef();
2337 unsigned SubReg =
MI.getOperand(3).getImm();
2338 assert(VecReg ==
MI.getOperand(1).getReg());
2343 .
add(
MI.getOperand(2))
2347 const int ImpDefIdx =
2349 const int ImpUseIdx = ImpDefIdx + 1;
2351 MI.eraseFromParent();
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2368 assert(ST.useVGPRIndexMode());
2370 bool IsUndef =
MI.getOperand(1).isUndef();
2379 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2383 .
add(
MI.getOperand(2))
2387 const int ImpDefIdx =
2389 const int ImpUseIdx = ImpDefIdx + 1;
2396 MI.eraseFromParent();
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2413 assert(ST.useVGPRIndexMode());
2416 bool IsUndef =
MI.getOperand(1).isUndef();
2420 .
add(
MI.getOperand(2))
2433 MI.eraseFromParent();
2436 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2439 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2440 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2459 if (ST.hasGetPCZeroExtension()) {
2463 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2470 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2480 MI.eraseFromParent();
2483 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2493 Op.setOffset(
Op.getOffset() + 4);
2495 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2499 MI.eraseFromParent();
2502 case AMDGPU::ENTER_STRICT_WWM: {
2508 case AMDGPU::ENTER_STRICT_WQM: {
2515 MI.eraseFromParent();
2518 case AMDGPU::EXIT_STRICT_WWM:
2519 case AMDGPU::EXIT_STRICT_WQM: {
2525 case AMDGPU::SI_RETURN: {
2539 MI.eraseFromParent();
2543 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2544 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2545 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2548 case AMDGPU::S_GETPC_B64_pseudo:
2549 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2550 if (ST.hasGetPCZeroExtension()) {
2552 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2561 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2562 assert(ST.hasBF16PackedInsts());
2563 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2574 case AMDGPU::GET_STACK_BASE:
2577 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2584 Register DestReg =
MI.getOperand(0).getReg();
2594 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2595 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2596 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2599 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2602 MI.getNumExplicitOperands());
2620 case AMDGPU::S_LOAD_DWORDX16_IMM:
2621 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2634 for (
auto &CandMO :
I->operands()) {
2635 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2643 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2647 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2651 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2653 unsigned NewOpcode = -1;
2654 if (SubregSize == 256)
2655 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2656 else if (SubregSize == 128)
2657 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2664 MRI.setRegClass(DestReg, NewRC);
2667 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2672 MI->getOperand(0).setReg(DestReg);
2673 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2677 OffsetMO->
setImm(FinalOffset);
2683 MI->setMemRefs(*MF, NewMMOs);
2696std::pair<MachineInstr*, MachineInstr*>
2698 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2700 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2703 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2704 return std::pair(&
MI,
nullptr);
2715 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2717 if (Dst.isPhysical()) {
2718 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2721 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2725 for (
unsigned I = 1;
I <= 2; ++
I) {
2728 if (
SrcOp.isImm()) {
2730 Imm.ashrInPlace(Part * 32);
2731 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2735 if (Src.isPhysical())
2736 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2743 MovDPP.addImm(MO.getImm());
2745 Split[Part] = MovDPP;
2749 if (Dst.isVirtual())
2756 MI.eraseFromParent();
2757 return std::pair(Split[0], Split[1]);
2760std::optional<DestSourcePair>
2762 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2765 return std::nullopt;
2769 AMDGPU::OpName Src0OpName,
2771 AMDGPU::OpName Src1OpName)
const {
2778 "All commutable instructions have both src0 and src1 modifiers");
2780 int Src0ModsVal = Src0Mods->
getImm();
2781 int Src1ModsVal = Src1Mods->
getImm();
2783 Src1Mods->
setImm(Src0ModsVal);
2784 Src0Mods->
setImm(Src1ModsVal);
2793 bool IsKill = RegOp.
isKill();
2795 bool IsUndef = RegOp.
isUndef();
2796 bool IsDebug = RegOp.
isDebug();
2798 if (NonRegOp.
isImm())
2800 else if (NonRegOp.
isFI())
2821 int64_t NonRegVal = NonRegOp1.
getImm();
2824 NonRegOp2.
setImm(NonRegVal);
2831 unsigned OpIdx1)
const {
2836 unsigned Opc =
MI.getOpcode();
2837 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2847 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2850 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2855 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2861 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2876 unsigned Src1Idx)
const {
2877 assert(!NewMI &&
"this should never be used");
2879 unsigned Opc =
MI.getOpcode();
2881 if (CommutedOpcode == -1)
2884 if (Src0Idx > Src1Idx)
2887 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2888 static_cast<int>(Src0Idx) &&
2889 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2890 static_cast<int>(Src1Idx) &&
2891 "inconsistency with findCommutedOpIndices");
2916 Src1, AMDGPU::OpName::src1_modifiers);
2919 AMDGPU::OpName::src1_sel);
2931 unsigned &SrcOpIdx0,
2932 unsigned &SrcOpIdx1)
const {
2937 unsigned &SrcOpIdx0,
2938 unsigned &SrcOpIdx1)
const {
2939 if (!
Desc.isCommutable())
2942 unsigned Opc =
Desc.getOpcode();
2943 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2947 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2951 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2955 int64_t BrOffset)
const {
2972 return MI.getOperand(0).getMBB();
2977 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2978 MI.getOpcode() == AMDGPU::SI_LOOP)
2990 "new block should be inserted for expanding unconditional branch");
2993 "restore block should be inserted for restoring clobbered registers");
3001 if (ST.useAddPC64Inst()) {
3003 MCCtx.createTempSymbol(
"offset",
true);
3007 MCCtx.createTempSymbol(
"post_addpc",
true);
3008 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3012 Offset->setVariableValue(OffsetExpr);
3016 assert(RS &&
"RegScavenger required for long branching");
3020 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3024 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3025 ST.hasVALUReadSGPRHazard();
3026 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3027 if (FlushSGPRWrites)
3035 ApplyHazardWorkarounds();
3038 MCCtx.createTempSymbol(
"post_getpc",
true);
3042 MCCtx.createTempSymbol(
"offset_lo",
true);
3044 MCCtx.createTempSymbol(
"offset_hi",
true);
3047 .
addReg(PCReg, {}, AMDGPU::sub0)
3051 .
addReg(PCReg, {}, AMDGPU::sub1)
3053 ApplyHazardWorkarounds();
3094 if (LongBranchReservedReg) {
3095 RS->enterBasicBlock(
MBB);
3096 Scav = LongBranchReservedReg;
3098 RS->enterBasicBlockEnd(
MBB);
3099 Scav = RS->scavengeRegisterBackwards(
3104 RS->setRegUsed(Scav);
3105 MRI.replaceRegWith(PCReg, Scav);
3106 MRI.clearVirtRegs();
3112 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3113 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3114 MRI.clearVirtRegs();
3129unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3131 case SIInstrInfo::SCC_TRUE:
3132 return AMDGPU::S_CBRANCH_SCC1;
3133 case SIInstrInfo::SCC_FALSE:
3134 return AMDGPU::S_CBRANCH_SCC0;
3135 case SIInstrInfo::VCCNZ:
3136 return AMDGPU::S_CBRANCH_VCCNZ;
3137 case SIInstrInfo::VCCZ:
3138 return AMDGPU::S_CBRANCH_VCCZ;
3139 case SIInstrInfo::EXECNZ:
3140 return AMDGPU::S_CBRANCH_EXECNZ;
3141 case SIInstrInfo::EXECZ:
3142 return AMDGPU::S_CBRANCH_EXECZ;
3148SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3150 case AMDGPU::S_CBRANCH_SCC0:
3152 case AMDGPU::S_CBRANCH_SCC1:
3154 case AMDGPU::S_CBRANCH_VCCNZ:
3156 case AMDGPU::S_CBRANCH_VCCZ:
3158 case AMDGPU::S_CBRANCH_EXECNZ:
3160 case AMDGPU::S_CBRANCH_EXECZ:
3172 bool AllowModify)
const {
3173 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3175 TBB =
I->getOperand(0).getMBB();
3179 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3180 if (Pred == INVALID_BR)
3185 Cond.push_back(
I->getOperand(1));
3189 if (
I ==
MBB.end()) {
3195 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3197 FBB =
I->getOperand(0).getMBB();
3207 bool AllowModify)
const {
3215 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3216 switch (
I->getOpcode()) {
3217 case AMDGPU::S_MOV_B64_term:
3218 case AMDGPU::S_XOR_B64_term:
3219 case AMDGPU::S_OR_B64_term:
3220 case AMDGPU::S_ANDN2_B64_term:
3221 case AMDGPU::S_AND_B64_term:
3222 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3223 case AMDGPU::S_MOV_B32_term:
3224 case AMDGPU::S_XOR_B32_term:
3225 case AMDGPU::S_OR_B32_term:
3226 case AMDGPU::S_ANDN2_B32_term:
3227 case AMDGPU::S_AND_B32_term:
3228 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3231 case AMDGPU::SI_ELSE:
3232 case AMDGPU::SI_KILL_I1_TERMINATOR:
3233 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3250 int *BytesRemoved)
const {
3252 unsigned RemovedSize = 0;
3255 if (
MI.isBranch() ||
MI.isReturn()) {
3257 MI.eraseFromParent();
3263 *BytesRemoved = RemovedSize;
3280 int *BytesAdded)
const {
3281 if (!FBB &&
Cond.empty()) {
3285 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3292 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3304 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3322 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3329 if (
Cond.size() != 2) {
3333 if (
Cond[0].isImm()) {
3344 Register FalseReg,
int &CondCycles,
3345 int &TrueCycles,
int &FalseCycles)
const {
3351 if (
MRI.getRegClass(FalseReg) != RC)
3355 CondCycles = TrueCycles = FalseCycles = NumInsts;
3358 return RI.hasVGPRs(RC) && NumInsts <= 6;
3366 if (
MRI.getRegClass(FalseReg) != RC)
3372 if (NumInsts % 2 == 0)
3375 CondCycles = TrueCycles = FalseCycles = NumInsts;
3376 return RI.isSGPRClass(RC);
3387 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3388 if (Pred == VCCZ || Pred == SCC_FALSE) {
3389 Pred =
static_cast<BranchPredicate
>(-Pred);
3395 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3397 if (DstSize == 32) {
3399 if (Pred == SCC_TRUE) {
3414 if (DstSize == 64 && Pred == SCC_TRUE) {
3424 static const int16_t Sub0_15[] = {
3425 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3426 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3427 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3428 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3431 static const int16_t Sub0_15_64[] = {
3432 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3433 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3434 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3435 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3438 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3440 const int16_t *SubIndices = Sub0_15;
3441 int NElts = DstSize / 32;
3445 if (Pred == SCC_TRUE) {
3447 SelOp = AMDGPU::S_CSELECT_B32;
3448 EltRC = &AMDGPU::SGPR_32RegClass;
3450 SelOp = AMDGPU::S_CSELECT_B64;
3451 EltRC = &AMDGPU::SGPR_64RegClass;
3452 SubIndices = Sub0_15_64;
3458 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3463 for (
int Idx = 0; Idx != NElts; ++Idx) {
3464 Register DstElt =
MRI.createVirtualRegister(EltRC);
3467 unsigned SubIdx = SubIndices[Idx];
3470 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3472 .
addReg(FalseReg, {}, SubIdx)
3473 .addReg(TrueReg, {}, SubIdx);
3476 .
addReg(TrueReg, {}, SubIdx)
3477 .addReg(FalseReg, {}, SubIdx);
3489 switch (
MI.getOpcode()) {
3490 case AMDGPU::V_MOV_B16_t16_e32:
3491 case AMDGPU::V_MOV_B16_t16_e64:
3492 case AMDGPU::V_MOV_B32_e32:
3493 case AMDGPU::V_MOV_B32_e64:
3494 case AMDGPU::V_MOV_B64_PSEUDO:
3495 case AMDGPU::V_MOV_B64_e32:
3496 case AMDGPU::V_MOV_B64_e64:
3497 case AMDGPU::S_MOV_B32:
3498 case AMDGPU::S_MOV_B64:
3499 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3501 case AMDGPU::WWM_COPY:
3502 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3503 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3504 case AMDGPU::V_ACCVGPR_MOV_B32:
3505 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3506 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3514 switch (
MI.getOpcode()) {
3515 case AMDGPU::V_MOV_B16_t16_e32:
3516 case AMDGPU::V_MOV_B16_t16_e64:
3518 case AMDGPU::V_MOV_B32_e32:
3519 case AMDGPU::V_MOV_B32_e64:
3520 case AMDGPU::V_MOV_B64_PSEUDO:
3521 case AMDGPU::V_MOV_B64_e32:
3522 case AMDGPU::V_MOV_B64_e64:
3523 case AMDGPU::S_MOV_B32:
3524 case AMDGPU::S_MOV_B64:
3525 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3527 case AMDGPU::WWM_COPY:
3528 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3529 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3530 case AMDGPU::V_ACCVGPR_MOV_B32:
3531 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3532 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3540 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3541 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3542 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3545 unsigned Opc =
MI.getOpcode();
3547 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3549 MI.removeOperand(Idx);
3555 MI.setDesc(NewDesc);
3561 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3562 Desc.implicit_defs().size();
3564 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3565 MI.removeOperand(
I);
3569 unsigned SubRegIndex) {
3570 switch (SubRegIndex) {
3571 case AMDGPU::NoSubRegister:
3581 case AMDGPU::sub1_lo16:
3583 case AMDGPU::sub1_hi16:
3586 return std::nullopt;
3594 case AMDGPU::V_MAC_F16_e32:
3595 case AMDGPU::V_MAC_F16_e64:
3596 case AMDGPU::V_MAD_F16_e64:
3597 return AMDGPU::V_MADAK_F16;
3598 case AMDGPU::V_MAC_F32_e32:
3599 case AMDGPU::V_MAC_F32_e64:
3600 case AMDGPU::V_MAD_F32_e64:
3601 return AMDGPU::V_MADAK_F32;
3602 case AMDGPU::V_FMAC_F32_e32:
3603 case AMDGPU::V_FMAC_F32_e64:
3604 case AMDGPU::V_FMA_F32_e64:
3605 return AMDGPU::V_FMAAK_F32;
3606 case AMDGPU::V_FMAC_F16_e32:
3607 case AMDGPU::V_FMAC_F16_e64:
3608 case AMDGPU::V_FMAC_F16_t16_e64:
3609 case AMDGPU::V_FMAC_F16_fake16_e64:
3610 case AMDGPU::V_FMAC_F16_t16_e32:
3611 case AMDGPU::V_FMAC_F16_fake16_e32:
3612 case AMDGPU::V_FMA_F16_e64:
3613 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3614 ? AMDGPU::V_FMAAK_F16_t16
3615 : AMDGPU::V_FMAAK_F16_fake16
3616 : AMDGPU::V_FMAAK_F16;
3617 case AMDGPU::V_FMAC_F64_e32:
3618 case AMDGPU::V_FMAC_F64_e64:
3619 case AMDGPU::V_FMA_F64_e64:
3620 return AMDGPU::V_FMAAK_F64;
3628 case AMDGPU::V_MAC_F16_e32:
3629 case AMDGPU::V_MAC_F16_e64:
3630 case AMDGPU::V_MAD_F16_e64:
3631 return AMDGPU::V_MADMK_F16;
3632 case AMDGPU::V_MAC_F32_e32:
3633 case AMDGPU::V_MAC_F32_e64:
3634 case AMDGPU::V_MAD_F32_e64:
3635 return AMDGPU::V_MADMK_F32;
3636 case AMDGPU::V_FMAC_F32_e32:
3637 case AMDGPU::V_FMAC_F32_e64:
3638 case AMDGPU::V_FMA_F32_e64:
3639 return AMDGPU::V_FMAMK_F32;
3640 case AMDGPU::V_FMAC_F16_e32:
3641 case AMDGPU::V_FMAC_F16_e64:
3642 case AMDGPU::V_FMAC_F16_t16_e64:
3643 case AMDGPU::V_FMAC_F16_fake16_e64:
3644 case AMDGPU::V_FMAC_F16_t16_e32:
3645 case AMDGPU::V_FMAC_F16_fake16_e32:
3646 case AMDGPU::V_FMA_F16_e64:
3647 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3648 ? AMDGPU::V_FMAMK_F16_t16
3649 : AMDGPU::V_FMAMK_F16_fake16
3650 : AMDGPU::V_FMAMK_F16;
3651 case AMDGPU::V_FMAC_F64_e32:
3652 case AMDGPU::V_FMAC_F64_e64:
3653 case AMDGPU::V_FMA_F64_e64:
3654 return AMDGPU::V_FMAMK_F64;
3666 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3668 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3671 if (
Opc == AMDGPU::COPY) {
3672 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3679 if (HasMultipleUses) {
3682 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3685 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3693 if (ImmDefSize == 32 &&
3698 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3699 RI.getSubRegIdxSize(UseSubReg) == 16;
3702 if (RI.hasVGPRs(DstRC))
3705 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3711 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3718 for (
unsigned MovOp :
3719 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3720 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3728 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3732 if (MovDstPhysReg) {
3736 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3743 if (MovDstPhysReg) {
3744 if (!MovDstRC->
contains(MovDstPhysReg))
3746 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3760 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3768 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3772 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3774 UseMI.getOperand(0).setReg(MovDstPhysReg);
3779 UseMI.setDesc(NewMCID);
3780 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3781 UseMI.addImplicitDefUseOperands(*MF);
3785 if (HasMultipleUses)
3788 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3789 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3790 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3791 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3792 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3793 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3794 Opc == AMDGPU::V_FMAC_F64_e64) {
3803 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3814 auto CopyRegOperandToNarrowerRC =
3817 if (!
MI.getOperand(OpNo).isReg())
3821 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3824 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3825 get(AMDGPU::COPY), Tmp)
3827 MI.getOperand(OpNo).setReg(Tmp);
3828 MI.getOperand(OpNo).setIsKill();
3835 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3836 if (!RegSrc->
isReg())
3838 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3839 ST.getConstantBusLimit(
Opc) < 2)
3842 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3854 if (Def && Def->isMoveImmediate() &&
3869 unsigned SrcSubReg = RegSrc->
getSubReg();
3874 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3875 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3876 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3877 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3878 UseMI.untieRegOperand(
3879 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3886 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3887 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3891 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3892 UseMI.getOperand(0).getReg())
3894 UseMI.getOperand(0).setReg(Tmp);
3895 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3896 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3899 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3901 DefMI.eraseFromParent();
3908 if (ST.getConstantBusLimit(
Opc) < 2) {
3911 bool Src0Inlined =
false;
3912 if (Src0->
isReg()) {
3917 if (Def && Def->isMoveImmediate() &&
3922 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3929 if (Src1->
isReg() && !Src0Inlined) {
3932 if (Def && Def->isMoveImmediate() &&
3934 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3936 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3949 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3950 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3951 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3952 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3953 UseMI.untieRegOperand(
3954 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3956 const std::optional<int64_t> SubRegImm =
3966 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3967 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3971 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3972 UseMI.getOperand(0).getReg())
3974 UseMI.getOperand(0).setReg(Tmp);
3975 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3976 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3984 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3986 DefMI.eraseFromParent();
3998 if (BaseOps1.
size() != BaseOps2.
size())
4000 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4001 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4009 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4010 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4011 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4013 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4016bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4019 int64_t Offset0, Offset1;
4022 bool Offset0IsScalable, Offset1IsScalable;
4036 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4037 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4044 "MIa must load from or modify a memory location");
4046 "MIb must load from or modify a memory location");
4068 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4075 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4085 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4099 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4110 if (
Reg.isPhysical())
4112 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4114 Imm = Def->getOperand(1).getImm();
4134 unsigned NumOps =
MI.getNumOperands();
4137 if (
Op.isReg() &&
Op.isKill())
4145 case AMDGPU::V_MAC_F16_e32:
4146 case AMDGPU::V_MAC_F16_e64:
4147 return AMDGPU::V_MAD_F16_e64;
4148 case AMDGPU::V_MAC_F32_e32:
4149 case AMDGPU::V_MAC_F32_e64:
4150 return AMDGPU::V_MAD_F32_e64;
4151 case AMDGPU::V_MAC_LEGACY_F32_e32:
4152 case AMDGPU::V_MAC_LEGACY_F32_e64:
4153 return AMDGPU::V_MAD_LEGACY_F32_e64;
4154 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4155 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4156 return AMDGPU::V_FMA_LEGACY_F32_e64;
4157 case AMDGPU::V_FMAC_F16_e32:
4158 case AMDGPU::V_FMAC_F16_e64:
4159 case AMDGPU::V_FMAC_F16_t16_e64:
4160 case AMDGPU::V_FMAC_F16_fake16_e64:
4161 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4162 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4163 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4164 : AMDGPU::V_FMA_F16_gfx9_e64;
4165 case AMDGPU::V_FMAC_F32_e32:
4166 case AMDGPU::V_FMAC_F32_e64:
4167 return AMDGPU::V_FMA_F32_e64;
4168 case AMDGPU::V_FMAC_F64_e32:
4169 case AMDGPU::V_FMAC_F64_e64:
4170 return AMDGPU::V_FMA_F64_e64;
4190 if (
MI.isBundle()) {
4193 if (
MI.getBundleSize() != 1)
4195 CandidateMI =
MI.getNextNode();
4199 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4203 if (
MI.isBundle()) {
4208 MI.untieRegOperand(MO.getOperandNo());
4216 if (Def.isEarlyClobber() && Def.isReg() &&
4221 auto UpdateDefIndex = [&](
LiveRange &LR) {
4222 auto *S = LR.find(OldIndex);
4223 if (S != LR.end() && S->start == OldIndex) {
4224 assert(S->valno && S->valno->def == OldIndex);
4225 S->start = NewIndex;
4226 S->valno->def = NewIndex;
4230 for (
auto &SR : LI.subranges())
4236 if (U.RemoveMIUse) {
4239 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4241 if (
MRI.hasOneNonDBGUse(DefReg)) {
4243 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4244 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4245 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4246 U.RemoveMIUse->removeOperand(
I);
4251 if (
MI.isBundle()) {
4255 if (MO.isReg() && MO.getReg() == DefReg) {
4256 assert(MO.getSubReg() == 0 &&
4257 "tied sub-registers in bundles currently not supported");
4258 MI.removeOperand(MO.getOperandNo());
4273 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4275 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4276 MIOp.setIsUndef(
true);
4277 MIOp.setReg(DummyReg);
4281 if (
MI.isBundle()) {
4285 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4286 MIOp.setIsUndef(
true);
4287 MIOp.setReg(DummyReg);
4300 return MI.isBundle() ? &
MI : NewMI;
4305 ThreeAddressUpdates &U)
const {
4307 unsigned Opc =
MI.getOpcode();
4311 if (NewMFMAOpc != -1) {
4314 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4315 MIB.
add(
MI.getOperand(
I));
4323 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4328 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4329 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4330 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4334 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4335 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4336 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4337 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4338 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4339 bool Src0Literal =
false;
4344 case AMDGPU::V_MAC_F16_e64:
4345 case AMDGPU::V_FMAC_F16_e64:
4346 case AMDGPU::V_FMAC_F16_t16_e64:
4347 case AMDGPU::V_FMAC_F16_fake16_e64:
4348 case AMDGPU::V_MAC_F32_e64:
4349 case AMDGPU::V_MAC_LEGACY_F32_e64:
4350 case AMDGPU::V_FMAC_F32_e64:
4351 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4352 case AMDGPU::V_FMAC_F64_e64:
4354 case AMDGPU::V_MAC_F16_e32:
4355 case AMDGPU::V_FMAC_F16_e32:
4356 case AMDGPU::V_MAC_F32_e32:
4357 case AMDGPU::V_MAC_LEGACY_F32_e32:
4358 case AMDGPU::V_FMAC_F32_e32:
4359 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4360 case AMDGPU::V_FMAC_F64_e32: {
4361 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4362 AMDGPU::OpName::src0);
4363 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4374 MachineInstrBuilder MIB;
4377 const MachineOperand *Src0Mods =
4380 const MachineOperand *Src1Mods =
4383 const MachineOperand *Src2Mods =
4389 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4390 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4392 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4394 MachineInstr *
DefMI;
4430 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4446 if (Src0Literal && !ST.hasVOP3Literal())
4474 switch (
MI.getOpcode()) {
4475 case AMDGPU::S_SET_GPR_IDX_ON:
4476 case AMDGPU::S_SET_GPR_IDX_MODE:
4477 case AMDGPU::S_SET_GPR_IDX_OFF:
4495 if (
MI.isTerminator() ||
MI.isPosition())
4499 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4502 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4508 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4509 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4510 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4511 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4512 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4517 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4518 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4519 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4533 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4538 if (
MI.memoperands_empty())
4543 unsigned AS = Memop->getAddrSpace();
4544 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4545 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4546 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4547 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4562 if (
MI.memoperands_empty())
4571 unsigned AS = Memop->getAddrSpace();
4588 if (ST.isTgSplitEnabled())
4593 if (
MI.memoperands_empty())
4598 unsigned AS = Memop->getAddrSpace();
4614 unsigned Opcode =
MI.getOpcode();
4629 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4630 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4631 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4634 if (
MI.isCall() ||
MI.isInlineAsm())
4650 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4651 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4652 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4653 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4661 if (
MI.isMetaInstruction())
4665 if (
MI.isCopyLike()) {
4666 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4670 return MI.readsRegister(AMDGPU::EXEC, &RI);
4681 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4685 switch (Imm.getBitWidth()) {
4691 ST.hasInv2PiInlineImm());
4694 ST.hasInv2PiInlineImm());
4696 return ST.has16BitInsts() &&
4698 ST.hasInv2PiInlineImm());
4705 APInt IntImm = Imm.bitcastToAPInt();
4707 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4715 return ST.has16BitInsts() &&
4718 return ST.has16BitInsts() &&
4728 switch (OperandType) {
4738 int32_t Trunc =
static_cast<int32_t
>(Imm);
4780 int16_t Trunc =
static_cast<int16_t
>(Imm);
4781 return ST.has16BitInsts() &&
4790 int16_t Trunc =
static_cast<int16_t
>(Imm);
4791 return ST.has16BitInsts() &&
4842 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4848 return ST.hasVOP3Literal();
4852 int64_t ImmVal)
const {
4855 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4856 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4857 AMDGPU::OpName::src2))
4859 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4871 "unexpected imm-like operand kind");
4884 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4902 AMDGPU::OpName
OpName)
const {
4904 return Mods && Mods->
getImm();
4917 switch (
MI.getOpcode()) {
4918 default:
return false;
4920 case AMDGPU::V_ADDC_U32_e64:
4921 case AMDGPU::V_SUBB_U32_e64:
4922 case AMDGPU::V_SUBBREV_U32_e64: {
4930 case AMDGPU::V_MAC_F16_e64:
4931 case AMDGPU::V_MAC_F32_e64:
4932 case AMDGPU::V_MAC_LEGACY_F32_e64:
4933 case AMDGPU::V_FMAC_F16_e64:
4934 case AMDGPU::V_FMAC_F16_t16_e64:
4935 case AMDGPU::V_FMAC_F16_fake16_e64:
4936 case AMDGPU::V_FMAC_F32_e64:
4937 case AMDGPU::V_FMAC_F64_e64:
4938 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4944 case AMDGPU::V_CNDMASK_B32_e64:
4950 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4980 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4989 unsigned Op32)
const {
5003 Inst32.
add(
MI.getOperand(
I));
5007 int Idx =
MI.getNumExplicitDefs();
5009 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5014 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5036 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5044 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5047 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5048 AMDGPU::SReg_64RegClass.contains(Reg);
5054 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5066 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5076 switch (MO.getReg()) {
5078 case AMDGPU::VCC_LO:
5079 case AMDGPU::VCC_HI:
5081 case AMDGPU::FLAT_SCR:
5094 switch (
MI.getOpcode()) {
5095 case AMDGPU::V_READLANE_B32:
5096 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5097 case AMDGPU::V_WRITELANE_B32:
5098 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5105 if (
MI.isPreISelOpcode() ||
5106 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5121 if (
SubReg.getReg().isPhysical())
5124 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5135 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5136 ErrInfo =
"illegal copy from vector register to SGPR";
5154 if (!
MRI.isSSA() &&
MI.isCopy())
5155 return verifyCopy(
MI,
MRI, ErrInfo);
5157 if (SIInstrInfo::isGenericOpcode(Opcode))
5160 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5161 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5162 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5164 if (Src0Idx == -1) {
5166 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5167 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5168 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5169 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5174 if (!
Desc.isVariadic() &&
5175 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5176 ErrInfo =
"Instruction has wrong number of operands.";
5180 if (
MI.isInlineAsm()) {
5193 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5194 ErrInfo =
"inlineasm operand has incorrect register class.";
5202 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5203 ErrInfo =
"missing memory operand from image instruction.";
5208 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5211 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5212 "all fp values to integers.";
5217 int16_t RegClass = getOpRegClassID(OpInfo);
5219 switch (OpInfo.OperandType) {
5221 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5222 ErrInfo =
"Illegal immediate value for operand.";
5257 ErrInfo =
"Illegal immediate value for operand.";
5264 ErrInfo =
"Expected inline constant for operand.";
5279 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5280 ErrInfo =
"Expected immediate, but got non-immediate";
5289 if (OpInfo.isGenericType())
5304 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5305 Opcode != AMDGPU::V_MOV_B64_PSEUDO) {
5307 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5309 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5310 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5317 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5318 ErrInfo =
"Subtarget requires even aligned vector registers";
5323 if (RegClass != -1) {
5324 if (Reg.isVirtual())
5329 ErrInfo =
"Operand has incorrect register class.";
5337 if (!ST.hasSDWA()) {
5338 ErrInfo =
"SDWA is not supported on this target";
5342 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5343 AMDGPU::OpName::dst_sel}) {
5347 int64_t Imm = MO->
getImm();
5349 ErrInfo =
"Invalid SDWA selection";
5354 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5356 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5361 if (!ST.hasSDWAScalar()) {
5363 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5364 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5371 "Only reg allowed as operands in SDWA instructions on GFX9+";
5377 if (!ST.hasSDWAOmod()) {
5380 if (OMod !=
nullptr &&
5382 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5387 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5388 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5389 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5390 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5393 unsigned Mods = Src0ModsMO->
getImm();
5396 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5402 if (
isVOPC(BasicOpcode)) {
5403 if (!ST.hasSDWASdst() && DstIdx != -1) {
5406 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5407 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5410 }
else if (!ST.hasSDWAOutModsVOPC()) {
5413 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5414 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5420 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5421 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5428 if (DstUnused && DstUnused->isImm() &&
5431 if (!Dst.isReg() || !Dst.isTied()) {
5432 ErrInfo =
"Dst register should have tied register";
5437 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5440 "Dst register should be tied to implicit use of preserved register";
5444 ErrInfo =
"Dst register should use same physical register as preserved";
5451 if (
isImage(Opcode) && !
MI.mayStore()) {
5463 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5471 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5475 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5476 if (RegCount > DstSize) {
5477 ErrInfo =
"Image instruction returns too many registers for dst "
5486 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5487 unsigned ConstantBusCount = 0;
5488 bool UsesLiteral =
false;
5491 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5495 LiteralVal = &
MI.getOperand(ImmIdx);
5504 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5515 }
else if (!MO.
isFI()) {
5522 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5532 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5533 return !RI.regsOverlap(SGPRUsed, SGPR);
5542 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5543 Opcode != AMDGPU::V_WRITELANE_B32) {
5544 ErrInfo =
"VOP* instruction violates constant bus restriction";
5548 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5549 ErrInfo =
"VOP3 instruction uses literal";
5556 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5557 unsigned SGPRCount = 0;
5560 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5568 if (MO.
getReg() != SGPRUsed)
5573 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5574 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5581 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5582 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5589 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5599 ErrInfo =
"ABS not allowed in VOP3B instructions";
5612 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5619 if (
Desc.isBranch()) {
5621 ErrInfo =
"invalid branch target for SOPK instruction";
5628 ErrInfo =
"invalid immediate for SOPK instruction";
5633 ErrInfo =
"invalid immediate for SOPK instruction";
5640 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5641 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5642 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5643 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5644 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5645 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5647 const unsigned StaticNumOps =
5648 Desc.getNumOperands() +
Desc.implicit_uses().size();
5649 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5655 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5656 ErrInfo =
"missing implicit register operands";
5662 if (!Dst->isUse()) {
5663 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5668 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5669 UseOpIdx != StaticNumOps + 1) {
5670 ErrInfo =
"movrel implicit operands should be tied";
5677 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5679 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5680 ErrInfo =
"src0 should be subreg of implicit vector use";
5688 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5689 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5695 if (
MI.mayStore() &&
5700 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5701 ErrInfo =
"scalar stores must use m0 as offset register";
5707 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5709 if (
Offset->getImm() != 0) {
5710 ErrInfo =
"subtarget does not support offsets in flat instructions";
5715 if (
isDS(
MI) && !ST.hasGDS()) {
5717 if (GDSOp && GDSOp->
getImm() != 0) {
5718 ErrInfo =
"GDS is not supported on this subtarget";
5726 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5727 AMDGPU::OpName::vaddr0);
5728 AMDGPU::OpName RSrcOpName =
5729 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5730 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5738 ErrInfo =
"dim is out of range";
5743 if (ST.hasR128A16()) {
5745 IsA16 = R128A16->
getImm() != 0;
5746 }
else if (ST.hasA16()) {
5748 IsA16 = A16->
getImm() != 0;
5751 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5753 unsigned AddrWords =
5756 unsigned VAddrWords;
5758 VAddrWords = RsrcIdx - VAddr0Idx;
5759 if (ST.hasPartialNSAEncoding() &&
5761 unsigned LastVAddrIdx = RsrcIdx - 1;
5762 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5770 if (VAddrWords != AddrWords) {
5772 <<
" but got " << VAddrWords <<
"\n");
5773 ErrInfo =
"bad vaddr size";
5783 unsigned DC = DppCt->
getImm();
5784 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5785 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5786 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5787 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5788 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5789 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5790 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5791 ErrInfo =
"Invalid dpp_ctrl value";
5794 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5795 !ST.hasDPPWavefrontShifts()) {
5796 ErrInfo =
"Invalid dpp_ctrl value: "
5797 "wavefront shifts are not supported on GFX10+";
5800 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5801 !ST.hasDPPBroadcasts()) {
5802 ErrInfo =
"Invalid dpp_ctrl value: "
5803 "broadcasts are not supported on GFX10+";
5806 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5808 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5809 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5810 !ST.hasGFX90AInsts()) {
5811 ErrInfo =
"Invalid dpp_ctrl value: "
5812 "row_newbroadcast/row_share is not supported before "
5816 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5817 ErrInfo =
"Invalid dpp_ctrl value: "
5818 "row_share and row_xmask are not supported before GFX10";
5823 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5826 ErrInfo =
"Invalid dpp_ctrl value: "
5827 "DP ALU dpp only support row_newbcast";
5834 AMDGPU::OpName DataName =
5835 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5841 if (ST.hasGFX90AInsts()) {
5842 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5843 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5844 ErrInfo =
"Invalid register class: "
5845 "vdata and vdst should be both VGPR or AGPR";
5848 if (
Data && Data2 &&
5850 ErrInfo =
"Invalid register class: "
5851 "both data operands should be VGPR or AGPR";
5855 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5857 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5858 ErrInfo =
"Invalid register class: "
5859 "agpr loads and stores not supported on this GPU";
5865 if (ST.needsAlignedVGPRs()) {
5866 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5871 if (Reg.isPhysical())
5872 return !(RI.getHWRegIndex(Reg) & 1);
5874 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5875 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5878 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5879 Opcode == AMDGPU::DS_GWS_BARRIER) {
5881 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5882 ErrInfo =
"Subtarget requires even aligned vector registers "
5883 "for DS_GWS instructions";
5889 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5890 ErrInfo =
"Subtarget requires even aligned vector registers "
5891 "for vaddr operand of image instructions";
5897 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5899 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5900 ErrInfo =
"Invalid register class: "
5901 "v_accvgpr_write with an SGPR is not supported on this GPU";
5906 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5909 ErrInfo =
"pseudo expects only physical SGPRs";
5916 if (!ST.hasScaleOffset()) {
5917 ErrInfo =
"Subtarget does not support offset scaling";
5921 ErrInfo =
"Instruction does not support offset scaling";
5930 for (
unsigned I = 0;
I < 3; ++
I) {
5936 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5937 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5939 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5940 &AMDGPU::SReg_64RegClass) ||
5941 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5942 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5954 switch (
MI.getOpcode()) {
5955 default:
return AMDGPU::INSTRUCTION_LIST_END;
5956 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5957 case AMDGPU::COPY:
return AMDGPU::COPY;
5958 case AMDGPU::PHI:
return AMDGPU::PHI;
5959 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5960 case AMDGPU::WQM:
return AMDGPU::WQM;
5961 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5962 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5963 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5964 case AMDGPU::S_MOV_B32: {
5966 return MI.getOperand(1).isReg() ||
5967 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5968 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5970 case AMDGPU::S_ADD_I32:
5971 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5972 case AMDGPU::S_ADDC_U32:
5973 return AMDGPU::V_ADDC_U32_e32;
5974 case AMDGPU::S_SUB_I32:
5975 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5978 case AMDGPU::S_ADD_U32:
5979 return AMDGPU::V_ADD_CO_U32_e32;
5980 case AMDGPU::S_SUB_U32:
5981 return AMDGPU::V_SUB_CO_U32_e32;
5982 case AMDGPU::S_ADD_U64_PSEUDO:
5983 return AMDGPU::V_ADD_U64_PSEUDO;
5984 case AMDGPU::S_SUB_U64_PSEUDO:
5985 return AMDGPU::V_SUB_U64_PSEUDO;
5986 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5987 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5988 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5989 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5990 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5991 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5992 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5993 case AMDGPU::S_XNOR_B32:
5994 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5995 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5996 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5997 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5998 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5999 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6000 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6001 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6002 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6003 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6004 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6005 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6006 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6007 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6008 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6009 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6010 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6011 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6012 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6013 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6014 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6015 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6016 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6017 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6018 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6019 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6020 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6021 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6022 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6023 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6024 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6025 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6026 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6027 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6028 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6029 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6030 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6031 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6032 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6033 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6034 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6035 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6036 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6037 case AMDGPU::S_CVT_F32_F16:
6038 case AMDGPU::S_CVT_HI_F32_F16:
6039 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6040 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6041 case AMDGPU::S_CVT_F16_F32:
6042 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6043 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6044 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6045 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6046 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6047 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6048 case AMDGPU::S_CEIL_F16:
6049 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6050 : AMDGPU::V_CEIL_F16_fake16_e64;
6051 case AMDGPU::S_FLOOR_F16:
6052 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6053 : AMDGPU::V_FLOOR_F16_fake16_e64;
6054 case AMDGPU::S_TRUNC_F16:
6055 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6056 : AMDGPU::V_TRUNC_F16_fake16_e64;
6057 case AMDGPU::S_RNDNE_F16:
6058 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6059 : AMDGPU::V_RNDNE_F16_fake16_e64;
6060 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6061 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6062 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6063 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6064 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6065 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6066 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6067 case AMDGPU::S_ADD_F16:
6068 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6069 : AMDGPU::V_ADD_F16_fake16_e64;
6070 case AMDGPU::S_SUB_F16:
6071 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6072 : AMDGPU::V_SUB_F16_fake16_e64;
6073 case AMDGPU::S_MIN_F16:
6074 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6075 : AMDGPU::V_MIN_F16_fake16_e64;
6076 case AMDGPU::S_MAX_F16:
6077 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6078 : AMDGPU::V_MAX_F16_fake16_e64;
6079 case AMDGPU::S_MINIMUM_F16:
6080 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6081 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6082 case AMDGPU::S_MAXIMUM_F16:
6083 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6084 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6085 case AMDGPU::S_MUL_F16:
6086 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6087 : AMDGPU::V_MUL_F16_fake16_e64;
6088 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6089 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6090 case AMDGPU::S_FMAC_F16:
6091 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6092 : AMDGPU::V_FMAC_F16_fake16_e64;
6093 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6094 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6095 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6096 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6097 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6098 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6099 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6100 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6101 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6102 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6103 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6104 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6105 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6106 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6107 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6108 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6109 case AMDGPU::S_CMP_LT_F16:
6110 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6111 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6112 case AMDGPU::S_CMP_EQ_F16:
6113 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6114 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6115 case AMDGPU::S_CMP_LE_F16:
6116 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6117 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6118 case AMDGPU::S_CMP_GT_F16:
6119 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6120 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6121 case AMDGPU::S_CMP_LG_F16:
6122 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6123 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6124 case AMDGPU::S_CMP_GE_F16:
6125 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6126 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6127 case AMDGPU::S_CMP_O_F16:
6128 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6129 : AMDGPU::V_CMP_O_F16_fake16_e64;
6130 case AMDGPU::S_CMP_U_F16:
6131 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6132 : AMDGPU::V_CMP_U_F16_fake16_e64;
6133 case AMDGPU::S_CMP_NGE_F16:
6134 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6135 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6136 case AMDGPU::S_CMP_NLG_F16:
6137 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6138 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6139 case AMDGPU::S_CMP_NGT_F16:
6140 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6141 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6142 case AMDGPU::S_CMP_NLE_F16:
6143 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6144 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6145 case AMDGPU::S_CMP_NEQ_F16:
6146 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6147 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6148 case AMDGPU::S_CMP_NLT_F16:
6149 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6150 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6151 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6152 case AMDGPU::V_S_EXP_F16_e64:
6153 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6154 : AMDGPU::V_EXP_F16_fake16_e64;
6155 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6156 case AMDGPU::V_S_LOG_F16_e64:
6157 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6158 : AMDGPU::V_LOG_F16_fake16_e64;
6159 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6160 case AMDGPU::V_S_RCP_F16_e64:
6161 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6162 : AMDGPU::V_RCP_F16_fake16_e64;
6163 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6164 case AMDGPU::V_S_RSQ_F16_e64:
6165 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6166 : AMDGPU::V_RSQ_F16_fake16_e64;
6167 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6168 case AMDGPU::V_S_SQRT_F16_e64:
6169 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6170 : AMDGPU::V_SQRT_F16_fake16_e64;
6173 "Unexpected scalar opcode without corresponding vector one!");
6222 "Not a whole wave func");
6225 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6226 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6233 unsigned OpNo)
const {
6235 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6236 Desc.operands()[OpNo].RegClass == -1) {
6239 if (Reg.isVirtual()) {
6241 return MRI.getRegClass(Reg);
6243 return RI.getPhysRegBaseClass(Reg);
6246 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6247 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6255 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6257 unsigned Size = RI.getRegSizeInBits(*RC);
6258 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6259 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6260 : AMDGPU::V_MOV_B32_e32;
6262 Opcode = AMDGPU::COPY;
6263 else if (RI.isSGPRClass(RC))
6264 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6278 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6284 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6295 if (SubIdx == AMDGPU::sub0)
6297 if (SubIdx == AMDGPU::sub1)
6309void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6325 if (Reg.isPhysical())
6335 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6338 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6345 unsigned Opc =
MI.getOpcode();
6351 constexpr AMDGPU::OpName OpNames[] = {
6352 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6355 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6356 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6366 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6367 if (IsAGPR && !ST.hasMAIInsts())
6369 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6373 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6374 const int DataIdx = AMDGPU::getNamedOperandIdx(
6375 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6376 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6377 MI.getOperand(DataIdx).isReg() &&
6378 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6380 if ((
int)
OpIdx == DataIdx) {
6381 if (VDstIdx != -1 &&
6382 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6385 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6386 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6387 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6392 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6393 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6397 if (ST.hasFlatScratchHiInB64InstHazard() &&
6404 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6425 constexpr unsigned NumOps = 3;
6426 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6427 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6428 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6429 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6434 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6437 MO = &
MI.getOperand(SrcIdx);
6444 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6448 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6452 return !OpSel && !OpSelHi;
6461 int64_t RegClass = getOpRegClassID(OpInfo);
6463 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6472 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6473 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6477 if (!LiteralLimit--)
6487 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6495 if (--ConstantBusLimit <= 0)
6507 if (!LiteralLimit--)
6509 if (--ConstantBusLimit <= 0)
6515 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6519 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6521 !
Op.isIdenticalTo(*MO))
6531 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6545 bool Is64BitOp = Is64BitFPOp ||
6552 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6561 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6579 bool IsGFX950Only = ST.hasGFX950Insts();
6580 bool IsGFX940Only = ST.hasGFX940Insts();
6582 if (!IsGFX950Only && !IsGFX940Only)
6600 unsigned Opcode =
MI.getOpcode();
6602 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6603 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6604 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6605 case AMDGPU::V_MQSAD_U32_U8_e64:
6606 case AMDGPU::V_PK_ADD_F16:
6607 case AMDGPU::V_PK_ADD_F32:
6608 case AMDGPU::V_PK_ADD_I16:
6609 case AMDGPU::V_PK_ADD_U16:
6610 case AMDGPU::V_PK_ASHRREV_I16:
6611 case AMDGPU::V_PK_FMA_F16:
6612 case AMDGPU::V_PK_FMA_F32:
6613 case AMDGPU::V_PK_FMAC_F16_e32:
6614 case AMDGPU::V_PK_FMAC_F16_e64:
6615 case AMDGPU::V_PK_LSHLREV_B16:
6616 case AMDGPU::V_PK_LSHRREV_B16:
6617 case AMDGPU::V_PK_MAD_I16:
6618 case AMDGPU::V_PK_MAD_U16:
6619 case AMDGPU::V_PK_MAX_F16:
6620 case AMDGPU::V_PK_MAX_I16:
6621 case AMDGPU::V_PK_MAX_U16:
6622 case AMDGPU::V_PK_MIN_F16:
6623 case AMDGPU::V_PK_MIN_I16:
6624 case AMDGPU::V_PK_MIN_U16:
6625 case AMDGPU::V_PK_MOV_B32:
6626 case AMDGPU::V_PK_MUL_F16:
6627 case AMDGPU::V_PK_MUL_F32:
6628 case AMDGPU::V_PK_MUL_LO_U16:
6629 case AMDGPU::V_PK_SUB_I16:
6630 case AMDGPU::V_PK_SUB_U16:
6631 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6640 unsigned Opc =
MI.getOpcode();
6643 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6646 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6652 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6659 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6662 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6668 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6678 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6679 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6680 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6692 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6694 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6706 if (HasImplicitSGPR || !
MI.isCommutable()) {
6723 if (CommutedOpc == -1) {
6728 MI.setDesc(
get(CommutedOpc));
6732 bool Src0Kill = Src0.
isKill();
6736 else if (Src1.
isReg()) {
6751 unsigned Opc =
MI.getOpcode();
6754 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6755 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6756 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6759 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6760 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6761 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6762 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6763 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6764 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6765 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6769 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6770 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6775 if (VOP3Idx[2] != -1) {
6777 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6778 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6787 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6788 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6790 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6792 SGPRsUsed.
insert(SGPRReg);
6796 for (
int Idx : VOP3Idx) {
6805 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6817 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6824 if (ConstantBusLimit > 0) {
6836 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6837 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6843 for (
unsigned I = 0;
I < 3; ++
I) {
6856 SRC = RI.getCommonSubClass(SRC, DstRC);
6859 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6861 if (RI.hasAGPRs(VRC)) {
6862 VRC = RI.getEquivalentVGPRClass(VRC);
6863 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6865 get(TargetOpcode::COPY), NewSrcReg)
6872 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6878 for (
unsigned i = 0; i < SubRegs; ++i) {
6879 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6881 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6882 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6888 get(AMDGPU::REG_SEQUENCE), DstReg);
6889 for (
unsigned i = 0; i < SubRegs; ++i) {
6891 MIB.
addImm(RI.getSubRegFromChannel(i));
6904 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6906 SBase->setReg(SGPR);
6909 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6917 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6918 if (OldSAddrIdx < 0)
6934 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6935 if (NewVAddrIdx < 0)
6938 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6942 if (OldVAddrIdx >= 0) {
6944 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6956 if (OldVAddrIdx == NewVAddrIdx) {
6959 MRI.removeRegOperandFromUseList(&NewVAddr);
6960 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6964 MRI.removeRegOperandFromUseList(&NewVAddr);
6965 MRI.addRegOperandToUseList(&NewVAddr);
6967 assert(OldSAddrIdx == NewVAddrIdx);
6969 if (OldVAddrIdx >= 0) {
6970 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6971 AMDGPU::OpName::vdst_in);
6975 if (NewVDstIn != -1) {
6976 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6982 if (NewVDstIn != -1) {
6983 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7004 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
7024 unsigned OpSubReg =
Op.getSubReg();
7027 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
7033 Register DstReg =
MRI.createVirtualRegister(DstRC);
7043 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7046 bool ImpDef = Def->isImplicitDef();
7047 while (!ImpDef && Def && Def->isCopy()) {
7048 if (Def->getOperand(1).getReg().isPhysical())
7050 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
7051 ImpDef = Def && Def->isImplicitDef();
7053 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7072 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7078 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7079 unsigned NumSubRegs =
RegSize / 32;
7080 Register VScalarOp = ScalarOp->getReg();
7082 if (NumSubRegs == 1) {
7083 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7085 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7088 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7090 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7096 CondReg = NewCondReg;
7098 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7106 ScalarOp->setReg(CurReg);
7107 ScalarOp->setIsKill();
7111 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7112 "Unhandled register size");
7114 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7116 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7118 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7121 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7122 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7125 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7126 .
addReg(VScalarOp, VScalarOpUndef,
7127 TRI->getSubRegFromChannel(Idx + 1));
7133 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7134 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7140 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7141 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7144 if (NumSubRegs <= 2)
7145 Cmp.addReg(VScalarOp);
7147 Cmp.addReg(VScalarOp, VScalarOpUndef,
7148 TRI->getSubRegFromChannel(Idx, 2));
7152 CondReg = NewCondReg;
7154 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7162 const auto *SScalarOpRC =
7163 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7164 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7168 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7169 unsigned Channel = 0;
7170 for (
Register Piece : ReadlanePieces) {
7171 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7175 ScalarOp->setReg(SScalarOp);
7176 ScalarOp->setIsKill();
7180 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7181 MRI.setSimpleHint(SaveExec, CondReg);
7212 if (!Begin.isValid())
7214 if (!End.isValid()) {
7220 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7228 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7229 std::numeric_limits<unsigned>::max()) !=
7232 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7238 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7247 for (
auto I = Begin;
I != AfterMI;
I++) {
7248 for (
auto &MO :
I->all_uses())
7249 MRI.clearKillFlags(MO.getReg());
7274 MBB.addSuccessor(LoopBB);
7284 for (
auto &Succ : RemainderBB->
successors()) {
7308static std::tuple<unsigned, unsigned>
7316 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7317 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7320 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7321 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7322 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7323 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7324 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7341 .
addImm(AMDGPU::sub0_sub1)
7347 return std::tuple(RsrcPtr, NewSRsrc);
7384 if (
MI.getOpcode() == AMDGPU::PHI) {
7386 assert(!RI.isSGPRClass(VRC));
7389 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7391 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7407 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7410 if (RI.hasVGPRs(DstRC)) {
7414 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7416 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7434 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7439 if (DstRC != Src0RC) {
7448 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7450 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7456 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7457 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7458 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7459 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7460 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7461 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7462 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7464 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7477 ? AMDGPU::OpName::rsrc
7478 : AMDGPU::OpName::srsrc;
7480 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7483 AMDGPU::OpName SampOpName =
7484 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7486 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7493 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7495 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7499 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7500 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7505 while (Start->getOpcode() != FrameSetupOpcode)
7508 while (End->getOpcode() != FrameDestroyOpcode)
7512 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7513 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7521 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7523 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7525 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7535 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7536 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7537 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7538 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7540 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7547 bool isSoffsetLegal =
true;
7549 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7550 if (SoffsetIdx != -1) {
7553 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7554 isSoffsetLegal =
false;
7558 bool isRsrcLegal =
true;
7560 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7561 if (RsrcIdx != -1) {
7564 isRsrcLegal =
false;
7568 if (isRsrcLegal && isSoffsetLegal)
7592 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7593 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7594 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7596 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7597 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7598 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7600 unsigned RsrcPtr, NewSRsrc;
7607 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7608 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7614 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7615 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7628 }
else if (!VAddr && ST.hasAddr64()) {
7632 "FIXME: Need to emit flat atomics here");
7634 unsigned RsrcPtr, NewSRsrc;
7637 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7660 MIB.
addImm(CPol->getImm());
7665 MIB.
addImm(TFE->getImm());
7685 MI.removeFromParent();
7690 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7691 .addImm(AMDGPU::sub0)
7692 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7693 .addImm(AMDGPU::sub1);
7696 if (!isSoffsetLegal) {
7708 if (!isSoffsetLegal) {
7720 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7721 if (RsrcIdx != -1) {
7722 DeferredList.insert(
MI);
7727 return DeferredList.contains(
MI);
7737 if (!ST.useRealTrue16Insts())
7740 unsigned Opcode =
MI.getOpcode();
7744 OpIdx >=
get(Opcode).getNumOperands() ||
7745 get(Opcode).operands()[
OpIdx].RegClass == -1)
7749 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7753 if (!RI.isVGPRClass(CurrRC))
7756 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7758 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7759 Op.setSubReg(AMDGPU::lo16);
7760 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7762 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7770 Op.setReg(NewDstReg);
7782 while (!Worklist.
empty()) {
7796 "Deferred MachineInstr are not supposed to re-populate worklist");
7816 case AMDGPU::S_ADD_I32:
7817 case AMDGPU::S_SUB_I32: {
7821 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7829 case AMDGPU::S_MUL_U64:
7830 if (ST.hasVectorMulU64()) {
7831 NewOpcode = AMDGPU::V_MUL_U64_e64;
7835 splitScalarSMulU64(Worklist, Inst, MDT);
7839 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7840 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7843 splitScalarSMulPseudo(Worklist, Inst, MDT);
7847 case AMDGPU::S_AND_B64:
7848 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7852 case AMDGPU::S_OR_B64:
7853 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7857 case AMDGPU::S_XOR_B64:
7858 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7862 case AMDGPU::S_NAND_B64:
7863 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7867 case AMDGPU::S_NOR_B64:
7868 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7872 case AMDGPU::S_XNOR_B64:
7873 if (ST.hasDLInsts())
7874 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7876 splitScalar64BitXnor(Worklist, Inst, MDT);
7880 case AMDGPU::S_ANDN2_B64:
7881 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7885 case AMDGPU::S_ORN2_B64:
7886 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7890 case AMDGPU::S_BREV_B64:
7891 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7895 case AMDGPU::S_NOT_B64:
7896 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7900 case AMDGPU::S_BCNT1_I32_B64:
7901 splitScalar64BitBCNT(Worklist, Inst);
7905 case AMDGPU::S_BFE_I64:
7906 splitScalar64BitBFE(Worklist, Inst);
7910 case AMDGPU::S_FLBIT_I32_B64:
7911 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7914 case AMDGPU::S_FF1_I32_B64:
7915 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7919 case AMDGPU::S_LSHL_B32:
7920 if (ST.hasOnlyRevVALUShifts()) {
7921 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7925 case AMDGPU::S_ASHR_I32:
7926 if (ST.hasOnlyRevVALUShifts()) {
7927 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7931 case AMDGPU::S_LSHR_B32:
7932 if (ST.hasOnlyRevVALUShifts()) {
7933 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7937 case AMDGPU::S_LSHL_B64:
7938 if (ST.hasOnlyRevVALUShifts()) {
7940 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7941 : AMDGPU::V_LSHLREV_B64_e64;
7945 case AMDGPU::S_ASHR_I64:
7946 if (ST.hasOnlyRevVALUShifts()) {
7947 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7951 case AMDGPU::S_LSHR_B64:
7952 if (ST.hasOnlyRevVALUShifts()) {
7953 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7958 case AMDGPU::S_ABS_I32:
7959 lowerScalarAbs(Worklist, Inst);
7963 case AMDGPU::S_ABSDIFF_I32:
7964 lowerScalarAbsDiff(Worklist, Inst);
7968 case AMDGPU::S_CBRANCH_SCC0:
7969 case AMDGPU::S_CBRANCH_SCC1: {
7972 bool IsSCC = CondReg == AMDGPU::SCC;
7980 case AMDGPU::S_BFE_U64:
7981 case AMDGPU::S_BFM_B64:
7984 case AMDGPU::S_PACK_LL_B32_B16:
7985 case AMDGPU::S_PACK_LH_B32_B16:
7986 case AMDGPU::S_PACK_HL_B32_B16:
7987 case AMDGPU::S_PACK_HH_B32_B16:
7988 movePackToVALU(Worklist,
MRI, Inst);
7992 case AMDGPU::S_XNOR_B32:
7993 lowerScalarXnor(Worklist, Inst);
7997 case AMDGPU::S_NAND_B32:
7998 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8002 case AMDGPU::S_NOR_B32:
8003 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8007 case AMDGPU::S_ANDN2_B32:
8008 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8012 case AMDGPU::S_ORN2_B32:
8013 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8021 case AMDGPU::S_ADD_CO_PSEUDO:
8022 case AMDGPU::S_SUB_CO_PSEUDO: {
8023 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8024 ? AMDGPU::V_ADDC_U32_e64
8025 : AMDGPU::V_SUBB_U32_e64;
8026 const auto *CarryRC = RI.getWaveMaskRegClass();
8029 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
8030 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
8037 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8048 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8052 case AMDGPU::S_UADDO_PSEUDO:
8053 case AMDGPU::S_USUBO_PSEUDO: {
8059 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8060 ? AMDGPU::V_ADD_CO_U32_e64
8061 : AMDGPU::V_SUB_CO_U32_e64;
8063 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8064 Register DestReg =
MRI.createVirtualRegister(NewRC);
8072 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8073 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8077 case AMDGPU::S_LSHL1_ADD_U32:
8078 case AMDGPU::S_LSHL2_ADD_U32:
8079 case AMDGPU::S_LSHL3_ADD_U32:
8080 case AMDGPU::S_LSHL4_ADD_U32: {
8084 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8085 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8086 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8090 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8091 Register DestReg =
MRI.createVirtualRegister(NewRC);
8099 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8100 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8104 case AMDGPU::S_CSELECT_B32:
8105 case AMDGPU::S_CSELECT_B64:
8106 lowerSelect(Worklist, Inst, MDT);
8109 case AMDGPU::S_CMP_EQ_I32:
8110 case AMDGPU::S_CMP_LG_I32:
8111 case AMDGPU::S_CMP_GT_I32:
8112 case AMDGPU::S_CMP_GE_I32:
8113 case AMDGPU::S_CMP_LT_I32:
8114 case AMDGPU::S_CMP_LE_I32:
8115 case AMDGPU::S_CMP_EQ_U32:
8116 case AMDGPU::S_CMP_LG_U32:
8117 case AMDGPU::S_CMP_GT_U32:
8118 case AMDGPU::S_CMP_GE_U32:
8119 case AMDGPU::S_CMP_LT_U32:
8120 case AMDGPU::S_CMP_LE_U32:
8121 case AMDGPU::S_CMP_EQ_U64:
8122 case AMDGPU::S_CMP_LG_U64:
8123 case AMDGPU::S_CMP_LT_F32:
8124 case AMDGPU::S_CMP_EQ_F32:
8125 case AMDGPU::S_CMP_LE_F32:
8126 case AMDGPU::S_CMP_GT_F32:
8127 case AMDGPU::S_CMP_LG_F32:
8128 case AMDGPU::S_CMP_GE_F32:
8129 case AMDGPU::S_CMP_O_F32:
8130 case AMDGPU::S_CMP_U_F32:
8131 case AMDGPU::S_CMP_NGE_F32:
8132 case AMDGPU::S_CMP_NLG_F32:
8133 case AMDGPU::S_CMP_NGT_F32:
8134 case AMDGPU::S_CMP_NLE_F32:
8135 case AMDGPU::S_CMP_NEQ_F32:
8136 case AMDGPU::S_CMP_NLT_F32: {
8137 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8141 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8155 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8159 case AMDGPU::S_CMP_LT_F16:
8160 case AMDGPU::S_CMP_EQ_F16:
8161 case AMDGPU::S_CMP_LE_F16:
8162 case AMDGPU::S_CMP_GT_F16:
8163 case AMDGPU::S_CMP_LG_F16:
8164 case AMDGPU::S_CMP_GE_F16:
8165 case AMDGPU::S_CMP_O_F16:
8166 case AMDGPU::S_CMP_U_F16:
8167 case AMDGPU::S_CMP_NGE_F16:
8168 case AMDGPU::S_CMP_NLG_F16:
8169 case AMDGPU::S_CMP_NGT_F16:
8170 case AMDGPU::S_CMP_NLE_F16:
8171 case AMDGPU::S_CMP_NEQ_F16:
8172 case AMDGPU::S_CMP_NLT_F16: {
8173 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8195 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8199 case AMDGPU::S_CVT_HI_F32_F16: {
8200 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8201 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8202 if (ST.useRealTrue16Insts()) {
8207 .
addReg(TmpReg, {}, AMDGPU::hi16)
8223 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8227 case AMDGPU::S_MINIMUM_F32:
8228 case AMDGPU::S_MAXIMUM_F32: {
8229 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8240 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8244 case AMDGPU::S_MINIMUM_F16:
8245 case AMDGPU::S_MAXIMUM_F16: {
8246 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8247 ? &AMDGPU::VGPR_16RegClass
8248 : &AMDGPU::VGPR_32RegClass);
8260 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8264 case AMDGPU::V_S_EXP_F16_e64:
8265 case AMDGPU::V_S_LOG_F16_e64:
8266 case AMDGPU::V_S_RCP_F16_e64:
8267 case AMDGPU::V_S_RSQ_F16_e64:
8268 case AMDGPU::V_S_SQRT_F16_e64: {
8269 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8270 ? &AMDGPU::VGPR_16RegClass
8271 : &AMDGPU::VGPR_32RegClass);
8283 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8289 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8297 if (NewOpcode == Opcode) {
8305 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8307 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8321 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8333 unsigned OpIdx =
UseMI->getOperandNo(&UseMO);
8337 RI.getCommonSubClass(CommonRC, OpRC))
8338 CommonRC = Narrowed;
8348 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8349 MRI.replaceRegWith(DstReg, NewDstReg);
8350 MRI.clearKillFlags(NewDstReg);
8353 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8370 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8374 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8375 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8380 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8386 MRI.replaceRegWith(DstReg, NewDstReg);
8387 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8389 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8392 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8393 MRI.replaceRegWith(DstReg, NewDstReg);
8394 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8399 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8400 MRI.replaceRegWith(DstReg, NewDstReg);
8402 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8412 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8413 AMDGPU::OpName::src0_modifiers) >= 0)
8417 NewInstr->addOperand(Src);
8420 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8423 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8425 NewInstr.addImm(
Size);
8426 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8430 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8435 "Scalar BFE is only implemented for constant width and offset");
8443 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8444 AMDGPU::OpName::src1_modifiers) >= 0)
8446 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8448 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8449 AMDGPU::OpName::src2_modifiers) >= 0)
8451 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8453 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8455 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8457 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8463 NewInstr->addOperand(
Op);
8470 if (
Op.getReg() == AMDGPU::SCC) {
8472 if (
Op.isDef() && !
Op.isDead())
8473 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8475 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8480 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8481 Register DstReg = NewInstr->getOperand(0).getReg();
8486 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8487 MRI.replaceRegWith(DstReg, NewDstReg);
8496 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8500std::pair<bool, MachineBasicBlock *>
8503 if (ST.hasAddNoCarryInsts()) {
8512 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8515 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8517 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8518 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8526 MRI.replaceRegWith(OldDstReg, ResultReg);
8529 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8530 return std::pair(
true, NewBB);
8533 return std::pair(
false,
nullptr);
8550 bool IsSCC = (CondReg == AMDGPU::SCC);
8558 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8564 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8565 NewCondReg =
MRI.createVirtualRegister(TC);
8569 bool CopyFound =
false;
8570 for (MachineInstr &CandI :
8573 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8575 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8577 .
addReg(CandI.getOperand(1).getReg());
8589 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8597 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8598 MachineInstr *NewInst;
8599 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8600 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8613 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8615 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8627 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8628 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8630 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8631 : AMDGPU::V_SUB_CO_U32_e32;
8641 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8642 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8655 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8656 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8657 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8659 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8660 : AMDGPU::V_SUB_CO_U32_e32;
8672 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8673 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8687 if (ST.hasDLInsts()) {
8688 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8696 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8697 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8703 bool Src0IsSGPR = Src0.
isReg() &&
8704 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8705 bool Src1IsSGPR = Src1.
isReg() &&
8706 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8708 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8709 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8719 }
else if (Src1IsSGPR) {
8733 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8737 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8743 unsigned Opcode)
const {
8753 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8754 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8766 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8767 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8772 unsigned Opcode)
const {
8782 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8783 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8795 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8796 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8811 const MCInstrDesc &InstDesc =
get(Opcode);
8812 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8814 &AMDGPU::SGPR_32RegClass;
8816 const TargetRegisterClass *Src0SubRC =
8817 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8820 AMDGPU::sub0, Src0SubRC);
8822 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8823 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8824 const TargetRegisterClass *NewDestSubRC =
8825 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8827 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8828 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8831 AMDGPU::sub1, Src0SubRC);
8833 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8834 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8839 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8846 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8848 Worklist.
insert(&LoHalf);
8849 Worklist.
insert(&HiHalf);
8855 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8866 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8867 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8868 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8876 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8877 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8878 const TargetRegisterClass *Src0SubRC =
8879 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8880 if (RI.isSGPRClass(Src0SubRC))
8881 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8882 const TargetRegisterClass *Src1SubRC =
8883 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8884 if (RI.isSGPRClass(Src1SubRC))
8885 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8889 MachineOperand Op0L =
8891 MachineOperand Op1L =
8893 MachineOperand Op0H =
8895 MachineOperand Op1H =
8913 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8914 MachineInstr *Op1L_Op0H =
8919 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8920 MachineInstr *Op1H_Op0L =
8925 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8926 MachineInstr *Carry =
8931 MachineInstr *LoHalf =
8936 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8941 MachineInstr *HiHalf =
8952 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8964 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8975 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8976 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8977 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8985 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8986 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8987 const TargetRegisterClass *Src0SubRC =
8988 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8989 if (RI.isSGPRClass(Src0SubRC))
8990 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8991 const TargetRegisterClass *Src1SubRC =
8992 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8993 if (RI.isSGPRClass(Src1SubRC))
8994 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8998 MachineOperand Op0L =
9000 MachineOperand Op1L =
9004 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9005 ? AMDGPU::V_MUL_HI_U32_e64
9006 : AMDGPU::V_MUL_HI_I32_e64;
9007 MachineInstr *HiHalf =
9010 MachineInstr *LoHalf =
9021 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9029 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9045 const MCInstrDesc &InstDesc =
get(Opcode);
9046 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9048 &AMDGPU::SGPR_32RegClass;
9050 const TargetRegisterClass *Src0SubRC =
9051 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9052 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9054 &AMDGPU::SGPR_32RegClass;
9056 const TargetRegisterClass *Src1SubRC =
9057 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9060 AMDGPU::sub0, Src0SubRC);
9062 AMDGPU::sub0, Src1SubRC);
9064 AMDGPU::sub1, Src0SubRC);
9066 AMDGPU::sub1, Src1SubRC);
9068 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9069 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9070 const TargetRegisterClass *NewDestSubRC =
9071 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9073 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9074 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9078 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9079 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9083 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9090 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9092 Worklist.
insert(&LoHalf);
9093 Worklist.
insert(&HiHalf);
9096 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9112 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9114 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9116 MachineOperand* Op0;
9117 MachineOperand* Op1;
9130 Register NewDest =
MRI.createVirtualRegister(DestRC);
9136 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9152 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9153 const TargetRegisterClass *SrcRC = Src.isReg() ?
9154 MRI.getRegClass(Src.getReg()) :
9155 &AMDGPU::SGPR_32RegClass;
9157 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9158 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9160 const TargetRegisterClass *SrcSubRC =
9161 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9164 AMDGPU::sub0, SrcSubRC);
9166 AMDGPU::sub1, SrcSubRC);
9172 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9176 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9195 Offset == 0 &&
"Not implemented");
9198 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9199 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9200 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9217 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9218 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9223 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9224 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9228 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9231 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9236 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9237 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9256 const MCInstrDesc &InstDesc =
get(Opcode);
9258 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9259 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9260 : AMDGPU::V_ADD_CO_U32_e32;
9262 const TargetRegisterClass *SrcRC =
9263 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9264 const TargetRegisterClass *SrcSubRC =
9265 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9267 MachineOperand SrcRegSub0 =
9269 MachineOperand SrcRegSub1 =
9272 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9273 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9274 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9275 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9282 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9288 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9290 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9292 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9295void SIInstrInfo::addUsersToMoveToVALUWorklist(
9299 MachineInstr &
UseMI = *MO.getParent();
9303 switch (
UseMI.getOpcode()) {
9306 case AMDGPU::SOFT_WQM:
9307 case AMDGPU::STRICT_WWM:
9308 case AMDGPU::STRICT_WQM:
9309 case AMDGPU::REG_SEQUENCE:
9311 case AMDGPU::INSERT_SUBREG:
9314 OpNo = MO.getOperandNo();
9319 MRI.constrainRegClass(DstReg, OpRC);
9321 if (!RI.hasVectorRegisters(OpRC))
9332 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9338 if (ST.useRealTrue16Insts()) {
9341 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9348 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9354 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9355 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9357 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9359 case AMDGPU::S_PACK_LL_B32_B16:
9361 .addReg(SrcReg0, {},
9362 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9363 .addImm(AMDGPU::lo16)
9364 .addReg(SrcReg1, {},
9365 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9366 .addImm(AMDGPU::hi16);
9368 case AMDGPU::S_PACK_LH_B32_B16:
9370 .addReg(SrcReg0, {},
9371 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9372 .addImm(AMDGPU::lo16)
9373 .addReg(SrcReg1, {}, AMDGPU::hi16)
9374 .addImm(AMDGPU::hi16);
9376 case AMDGPU::S_PACK_HL_B32_B16:
9377 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9378 .addImm(AMDGPU::lo16)
9379 .addReg(SrcReg1, {},
9380 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9381 .addImm(AMDGPU::hi16);
9383 case AMDGPU::S_PACK_HH_B32_B16:
9384 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9385 .addImm(AMDGPU::lo16)
9386 .addReg(SrcReg1, {}, AMDGPU::hi16)
9387 .addImm(AMDGPU::hi16);
9394 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9395 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9400 case AMDGPU::S_PACK_LL_B32_B16: {
9401 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9402 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9419 case AMDGPU::S_PACK_LH_B32_B16: {
9420 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9429 case AMDGPU::S_PACK_HL_B32_B16: {
9430 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9440 case AMDGPU::S_PACK_HH_B32_B16: {
9441 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9442 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9459 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9460 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9469 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9470 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9471 SmallVector<MachineInstr *, 4> CopyToDelete;
9474 for (MachineInstr &
MI :
9478 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9481 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9482 Register DestReg =
MI.getOperand(0).getReg();
9484 MRI.replaceRegWith(DestReg, NewCond);
9489 MI.getOperand(SCCIdx).setReg(NewCond);
9495 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9498 for (
auto &Copy : CopyToDelete)
9499 Copy->eraseFromParent();
9507void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9513 for (MachineInstr &
MI :
9516 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9518 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9527 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9535 case AMDGPU::REG_SEQUENCE:
9536 case AMDGPU::INSERT_SUBREG:
9538 case AMDGPU::SOFT_WQM:
9539 case AMDGPU::STRICT_WWM:
9540 case AMDGPU::STRICT_WQM: {
9542 if (RI.isAGPRClass(SrcRC)) {
9543 if (RI.isAGPRClass(NewDstRC))
9548 case AMDGPU::REG_SEQUENCE:
9549 case AMDGPU::INSERT_SUBREG:
9550 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9553 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9559 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9562 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9576 int OpIndices[3])
const {
9577 const MCInstrDesc &
Desc =
MI.getDesc();
9593 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9595 for (
unsigned i = 0; i < 3; ++i) {
9596 int Idx = OpIndices[i];
9600 const MachineOperand &MO =
MI.getOperand(Idx);
9606 const TargetRegisterClass *OpRC =
9607 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9608 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9614 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9615 if (RI.isSGPRClass(RegRC))
9633 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9634 SGPRReg = UsedSGPRs[0];
9637 if (!SGPRReg && UsedSGPRs[1]) {
9638 if (UsedSGPRs[1] == UsedSGPRs[2])
9639 SGPRReg = UsedSGPRs[1];
9646 AMDGPU::OpName OperandName)
const {
9647 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9650 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9654 return &
MI.getOperand(Idx);
9668 if (ST.isAmdHsaOS()) {
9671 RsrcDataFormat |= (1ULL << 56);
9676 RsrcDataFormat |= (2ULL << 59);
9679 return RsrcDataFormat;
9689 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9694 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9701 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9707 unsigned Opc =
MI.getOpcode();
9713 return get(
Opc).mayLoad() &&
9718 int &FrameIndex)
const {
9720 if (!Addr || !Addr->
isFI())
9731 int &FrameIndex)
const {
9739 int &FrameIndex)
const {
9753 int &FrameIndex)
const {
9770 while (++
I != E &&
I->isInsideBundle()) {
9771 assert(!
I->isBundle() &&
"No nested bundle!");
9779 unsigned Opc =
MI.getOpcode();
9781 unsigned DescSize =
Desc.getSize();
9786 unsigned Size = DescSize;
9790 if (
MI.isBranch() && ST.hasOffset3fBug())
9801 bool HasLiteral =
false;
9802 unsigned LiteralSize = 4;
9803 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9808 if (ST.has64BitLiterals()) {
9809 switch (OpInfo.OperandType) {
9832 return HasLiteral ? DescSize + LiteralSize : DescSize;
9837 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9841 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9842 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9846 case TargetOpcode::BUNDLE:
9848 case TargetOpcode::INLINEASM:
9849 case TargetOpcode::INLINEASM_BR: {
9851 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9855 if (
MI.isMetaInstruction())
9859 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9862 unsigned LoInstOpcode = D16Info->LoOp;
9864 DescSize =
Desc.getSize();
9868 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9871 DescSize =
Desc.getSize();
9882 if (
MI.memoperands_empty())
9894 static const std::pair<int, const char *> TargetIndices[] = {
9933std::pair<unsigned, unsigned>
9940 static const std::pair<unsigned, const char *> TargetFlags[] = {
9958 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9974 return AMDGPU::WWM_COPY;
9976 return AMDGPU::COPY;
9993 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9997 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9998 return IsLRSplitInst;
10011 bool IsNullOrVectorRegister =
true;
10015 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
10018 return IsNullOrVectorRegister &&
10020 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10021 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10029 if (ST.hasAddNoCarryInsts())
10033 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
10034 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
10045 if (ST.hasAddNoCarryInsts())
10049 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10051 : RS.scavengeRegisterBackwards(
10052 *RI.getBoolRC(),
I,
false,
10065 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10066 case AMDGPU::SI_KILL_I1_TERMINATOR:
10075 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10076 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10077 case AMDGPU::SI_KILL_I1_PSEUDO:
10078 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10090 const unsigned OffsetBits =
10092 return (1 << OffsetBits) - 1;
10096 if (!ST.isWave32())
10099 if (
MI.isInlineAsm())
10102 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10105 for (
auto &
Op :
MI.implicit_operands()) {
10106 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10107 Op.setReg(AMDGPU::VCC_LO);
10116 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10120 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10121 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10137 if (Imm > MaxImm) {
10138 if (Imm <= MaxImm + 64) {
10140 Overflow = Imm - MaxImm;
10159 if (Overflow > 0) {
10167 if (ST.hasRestrictedSOffset())
10172 SOffset = Overflow;
10210 if (!ST.hasFlatInstOffsets())
10218 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10230std::pair<int64_t, int64_t>
10233 int64_t RemainderOffset = COffsetVal;
10234 int64_t ImmField = 0;
10239 if (AllowNegative) {
10241 int64_t
D = 1LL << NumBits;
10242 RemainderOffset = (COffsetVal /
D) *
D;
10243 ImmField = COffsetVal - RemainderOffset;
10245 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10247 (ImmField % 4) != 0) {
10249 RemainderOffset += ImmField % 4;
10250 ImmField -= ImmField % 4;
10252 }
else if (COffsetVal >= 0) {
10254 RemainderOffset = COffsetVal - ImmField;
10258 assert(RemainderOffset + ImmField == COffsetVal);
10259 return {ImmField, RemainderOffset};
10263 if (ST.hasNegativeScratchOffsetBug() &&
10271 switch (ST.getGeneration()) {
10299 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10300 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10301 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10302 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10303 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10304 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10305 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10313#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10314 case OPCODE##_dpp: \
10315 case OPCODE##_e32: \
10316 case OPCODE##_e64: \
10317 case OPCODE##_e64_dpp: \
10318 case OPCODE##_sdwa:
10332 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10333 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10334 case AMDGPU::V_FMA_F16_gfx9_e64:
10335 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10336 case AMDGPU::V_INTERP_P2_F16:
10337 case AMDGPU::V_MAD_F16_e64:
10338 case AMDGPU::V_MAD_U16_e64:
10339 case AMDGPU::V_MAD_I16_e64:
10348 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10362 switch (ST.getGeneration()) {
10375 if (
isMAI(Opcode)) {
10383 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10390 if (ST.hasGFX90AInsts()) {
10391 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10392 if (ST.hasGFX940Insts())
10394 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10396 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10398 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10404 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10423 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10424 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10425 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10437 switch (
MI.getOpcode()) {
10439 case AMDGPU::REG_SEQUENCE:
10443 case AMDGPU::INSERT_SUBREG:
10444 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10461 if (!
P.Reg.isVirtual())
10465 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10466 while (
auto *
MI = DefInst) {
10468 switch (
MI->getOpcode()) {
10470 case AMDGPU::V_MOV_B32_e32: {
10471 auto &Op1 =
MI->getOperand(1);
10476 DefInst =
MRI.getVRegDef(RSR.Reg);
10484 DefInst =
MRI.getVRegDef(RSR.Reg);
10497 assert(
MRI.isSSA() &&
"Must be run on SSA");
10499 auto *
TRI =
MRI.getTargetRegisterInfo();
10500 auto *DefBB =
DefMI.getParent();
10504 if (
UseMI.getParent() != DefBB)
10507 const int MaxInstScan = 20;
10511 auto E =
UseMI.getIterator();
10512 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10513 if (
I->isDebugInstr())
10516 if (++NumInst > MaxInstScan)
10519 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10529 assert(
MRI.isSSA() &&
"Must be run on SSA");
10531 auto *
TRI =
MRI.getTargetRegisterInfo();
10532 auto *DefBB =
DefMI.getParent();
10534 const int MaxUseScan = 10;
10537 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10538 auto &UseInst = *
Use.getParent();
10541 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10544 if (++NumUse > MaxUseScan)
10551 const int MaxInstScan = 20;
10555 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10558 if (
I->isDebugInstr())
10561 if (++NumInst > MaxInstScan)
10574 if (Reg == VReg && --NumUse == 0)
10576 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10585 auto Cur =
MBB.begin();
10586 if (Cur !=
MBB.end())
10588 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10591 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10600 if (InsPt !=
MBB.end() &&
10601 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10602 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10603 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10604 InsPt->definesRegister(Src,
nullptr)) {
10608 .
addReg(Src, {}, SrcSubReg)
10633 if (isFullCopyInstr(
MI)) {
10634 Register DstReg =
MI.getOperand(0).getReg();
10635 Register SrcReg =
MI.getOperand(1).getReg();
10642 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10646 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10657 unsigned *PredCost)
const {
10658 if (
MI.isBundle()) {
10661 unsigned Lat = 0,
Count = 0;
10662 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10664 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10666 return Lat +
Count - 1;
10669 return SchedModel.computeInstrLatency(&
MI);
10676 return *CallAddrOp;
10683 unsigned Opcode =
MI.getOpcode();
10688 :
MI.getOperand(1).getReg();
10689 LLT DstTy =
MRI.getType(Dst);
10690 LLT SrcTy =
MRI.getType(Src);
10692 unsigned SrcAS = SrcTy.getAddressSpace();
10695 ST.hasGloballyAddressableScratch()
10703 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10704 return HandleAddrSpaceCast(
MI);
10707 auto IID = GI->getIntrinsicID();
10714 case Intrinsic::amdgcn_addrspacecast_nonnull:
10715 return HandleAddrSpaceCast(
MI);
10716 case Intrinsic::amdgcn_if:
10717 case Intrinsic::amdgcn_else:
10731 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10732 Opcode == AMDGPU::G_SEXTLOAD) {
10733 if (
MI.memoperands_empty())
10737 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10738 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10746 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10747 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10748 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10757 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10758 return Formatter.get();
10767 unsigned opcode =
MI.getOpcode();
10768 if (opcode == AMDGPU::V_READLANE_B32 ||
10769 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10770 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10773 if (isCopyInstr(
MI)) {
10777 RI.getPhysRegBaseClass(srcOp.
getReg());
10785 if (
MI.isPreISelOpcode())
10800 if (
MI.memoperands_empty())
10804 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10805 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10820 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10822 if (!
SrcOp.isReg())
10826 if (!Reg || !
SrcOp.readsReg())
10832 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10859 F,
"ds_ordered_count unsupported for this calling conv"));
10873 Register &SrcReg2, int64_t &CmpMask,
10874 int64_t &CmpValue)
const {
10875 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10878 switch (
MI.getOpcode()) {
10881 case AMDGPU::S_CMP_EQ_U32:
10882 case AMDGPU::S_CMP_EQ_I32:
10883 case AMDGPU::S_CMP_LG_U32:
10884 case AMDGPU::S_CMP_LG_I32:
10885 case AMDGPU::S_CMP_LT_U32:
10886 case AMDGPU::S_CMP_LT_I32:
10887 case AMDGPU::S_CMP_GT_U32:
10888 case AMDGPU::S_CMP_GT_I32:
10889 case AMDGPU::S_CMP_LE_U32:
10890 case AMDGPU::S_CMP_LE_I32:
10891 case AMDGPU::S_CMP_GE_U32:
10892 case AMDGPU::S_CMP_GE_I32:
10893 case AMDGPU::S_CMP_EQ_U64:
10894 case AMDGPU::S_CMP_LG_U64:
10895 SrcReg =
MI.getOperand(0).getReg();
10896 if (
MI.getOperand(1).isReg()) {
10897 if (
MI.getOperand(1).getSubReg())
10899 SrcReg2 =
MI.getOperand(1).getReg();
10901 }
else if (
MI.getOperand(1).isImm()) {
10903 CmpValue =
MI.getOperand(1).getImm();
10909 case AMDGPU::S_CMPK_EQ_U32:
10910 case AMDGPU::S_CMPK_EQ_I32:
10911 case AMDGPU::S_CMPK_LG_U32:
10912 case AMDGPU::S_CMPK_LG_I32:
10913 case AMDGPU::S_CMPK_LT_U32:
10914 case AMDGPU::S_CMPK_LT_I32:
10915 case AMDGPU::S_CMPK_GT_U32:
10916 case AMDGPU::S_CMPK_GT_I32:
10917 case AMDGPU::S_CMPK_LE_U32:
10918 case AMDGPU::S_CMPK_LE_I32:
10919 case AMDGPU::S_CMPK_GE_U32:
10920 case AMDGPU::S_CMPK_GE_I32:
10921 SrcReg =
MI.getOperand(0).getReg();
10923 CmpValue =
MI.getOperand(1).getImm();
10933 if (S->isLiveIn(AMDGPU::SCC))
10942bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10945 bool SCCIsDead =
false;
10948 constexpr unsigned ScanLimit = 12;
10949 unsigned Count = 0;
10950 for (MachineInstr &
MI :
10952 if (++
Count > ScanLimit)
10954 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10955 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10956 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10957 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10958 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10963 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10976 for (MachineInstr *
MI : InvertInstr) {
10977 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10978 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10980 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10981 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10982 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10983 ? AMDGPU::S_CBRANCH_SCC1
10984 : AMDGPU::S_CBRANCH_SCC0));
10997 bool NeedInversion)
const {
10998 MachineInstr *KillsSCC =
nullptr;
11003 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11005 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11008 if (NeedInversion && !invertSCCUse(SCCRedefine))
11010 if (MachineOperand *SccDef =
11012 SccDef->setIsDead(
false);
11020 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11021 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11023 bool Op1IsNonZeroImm =
11024 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11025 bool Op2IsZeroImm =
11026 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11027 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11033 unsigned &NewDefOpc) {
11036 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11037 Def.getOpcode() != AMDGPU::S_ADD_U32)
11043 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11049 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11051 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11054 NewDefOpc = AMDGPU::S_ADD_U32;
11056 NeedInversion = !NeedInversion;
11061 Register SrcReg2, int64_t CmpMask,
11070 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
11071 this](
bool NeedInversion) ->
bool {
11095 unsigned NewDefOpc = Def->getOpcode();
11101 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11104 if (NewDefOpc != Def->getOpcode())
11105 Def->setDesc(
get(NewDefOpc));
11114 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11115 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11121 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11129 optimizeSCC(
Select, Def,
false);
11136 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11137 this](int64_t ExpectedValue,
unsigned SrcSize,
11138 bool IsReversible,
bool IsSigned) ->
bool {
11166 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11167 Def->getOpcode() != AMDGPU::S_AND_B64)
11171 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11182 SrcOp = &Def->getOperand(2);
11183 else if (isMask(&Def->getOperand(2)))
11184 SrcOp = &Def->getOperand(1);
11192 if (IsSigned && BitNo == SrcSize - 1)
11195 ExpectedValue <<= BitNo;
11197 bool IsReversedCC =
false;
11198 if (CmpValue != ExpectedValue) {
11201 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11206 Register DefReg = Def->getOperand(0).getReg();
11207 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11210 if (!optimizeSCC(Def, &CmpInstr,
false))
11213 if (!
MRI->use_nodbg_empty(DefReg)) {
11221 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11222 : AMDGPU::S_BITCMP1_B32
11223 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11224 : AMDGPU::S_BITCMP1_B64;
11229 Def->eraseFromParent();
11237 case AMDGPU::S_CMP_EQ_U32:
11238 case AMDGPU::S_CMP_EQ_I32:
11239 case AMDGPU::S_CMPK_EQ_U32:
11240 case AMDGPU::S_CMPK_EQ_I32:
11241 return optimizeCmpAnd(1, 32,
true,
false) ||
11242 optimizeCmpSelect(
true);
11243 case AMDGPU::S_CMP_GE_U32:
11244 case AMDGPU::S_CMPK_GE_U32:
11245 return optimizeCmpAnd(1, 32,
false,
false);
11246 case AMDGPU::S_CMP_GE_I32:
11247 case AMDGPU::S_CMPK_GE_I32:
11248 return optimizeCmpAnd(1, 32,
false,
true);
11249 case AMDGPU::S_CMP_EQ_U64:
11250 return optimizeCmpAnd(1, 64,
true,
false);
11251 case AMDGPU::S_CMP_LG_U32:
11252 case AMDGPU::S_CMP_LG_I32:
11253 case AMDGPU::S_CMPK_LG_U32:
11254 case AMDGPU::S_CMPK_LG_I32:
11255 return optimizeCmpAnd(0, 32,
true,
false) ||
11256 optimizeCmpSelect(
false);
11257 case AMDGPU::S_CMP_GT_U32:
11258 case AMDGPU::S_CMPK_GT_U32:
11259 return optimizeCmpAnd(0, 32,
false,
false);
11260 case AMDGPU::S_CMP_GT_I32:
11261 case AMDGPU::S_CMPK_GT_I32:
11262 return optimizeCmpAnd(0, 32,
false,
true);
11263 case AMDGPU::S_CMP_LG_U64:
11264 return optimizeCmpAnd(0, 64,
true,
false) ||
11265 optimizeCmpSelect(
false);
11272 AMDGPU::OpName
OpName)
const {
11273 if (!ST.needsAlignedVGPRs())
11276 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11288 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11290 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11293 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11294 : &AMDGPU::VReg_64_Align2RegClass);
11296 .
addReg(DataReg, {},
Op.getSubReg())
11301 Op.setSubReg(AMDGPU::sub0);
11316 if (ST.hasGFX1250Insts())
11323 unsigned Opcode =
MI.getOpcode();
11329 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11330 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11333 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.