31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
220 assert(ExitBlocks[0]->getSinglePredecessor());
237 int64_t &Offset1)
const {
245 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
249 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 if (Offset0Idx == -1 || Offset1Idx == -1)
274 Offset0Idx -=
get(Opc0).NumDefs;
275 Offset1Idx -=
get(Opc1).NumDefs;
296 assert(NumOps == 4 || NumOps == 5);
301 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
303 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
305 if (!Load0Offset || !Load1Offset)
325 if (OffIdx0 == -1 || OffIdx1 == -1)
331 OffIdx0 -=
get(Opc0).NumDefs;
332 OffIdx1 -=
get(Opc1).NumDefs;
338 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
351 case AMDGPU::DS_READ2ST64_B32:
352 case AMDGPU::DS_READ2ST64_B64:
353 case AMDGPU::DS_WRITE2ST64_B32:
354 case AMDGPU::DS_WRITE2ST64_B64:
369 OffsetIsScalable =
false;
399 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
400 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
401 if (Offset0 + 1 != Offset1)
420 Offset = EltSize * Offset0;
423 if (DataOpIdx == -1) {
441 if (BaseOp && !BaseOp->
isFI())
449 if (SOffset->
isReg())
466 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
470 if (VAddr0Idx >= 0) {
472 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
528 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
536 if (MO1->getAddrSpace() != MO2->getAddrSpace())
539 auto Base1 = MO1->getValue();
540 auto Base2 = MO2->getValue();
541 if (!Base1 || !Base2)
546 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
549 return Base1 == Base2;
553 int64_t Offset1,
bool OffsetIsScalable1,
555 int64_t Offset2,
bool OffsetIsScalable2,
556 unsigned ClusterSize,
557 unsigned NumBytes)
const {
565 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
582 const unsigned LoadSize = NumBytes / ClusterSize;
583 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
584 return NumDWORDs <= 8;
598 int64_t Offset0, int64_t Offset1,
599 unsigned NumLoads)
const {
600 assert(Offset1 > Offset0 &&
601 "Second offset should be larger than first offset!");
606 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
613 const char *Msg =
"illegal VGPR to SGPR copy") {
617 C.diagnose(IllegalCopy);
634 assert((
TII.getSubtarget().hasMAIInsts() &&
635 !
TII.getSubtarget().hasGFX90AInsts()) &&
636 "Expected GFX908 subtarget.");
639 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
640 "Source register of the copy should be either an SGPR or an AGPR.");
643 "Destination register of the copy should be an AGPR.");
652 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
655 if (!Def->modifiesRegister(SrcReg, &RI))
658 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
659 Def->getOperand(0).getReg() != SrcReg)
666 bool SafeToPropagate =
true;
669 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
670 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
671 SafeToPropagate =
false;
673 if (!SafeToPropagate)
685 if (ImpUseSuperReg) {
686 Builder.
addReg(ImpUseSuperReg,
704 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
708 "VGPR used for an intermediate copy should have been reserved.");
723 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
724 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
725 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
732 if (ImpUseSuperReg) {
733 UseBuilder.
addReg(ImpUseSuperReg,
755 int16_t SubIdx = BaseIndices[
Idx];
756 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
757 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
758 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
759 unsigned Opcode = AMDGPU::S_MOV_B32;
762 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
763 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
764 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
768 DestSubReg = RI.getSubReg(DestReg, SubIdx);
769 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
770 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
771 Opcode = AMDGPU::S_MOV_B64;
786 assert(FirstMI && LastMI);
794 LastMI->addRegisterKilled(SrcReg, &RI);
802 unsigned Size = RI.getRegSizeInBits(*RC);
804 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
810 if (((
Size == 16) != (SrcSize == 16))) {
817 if (DestReg == SrcReg) {
823 RC = RI.getPhysRegBaseClass(DestReg);
824 Size = RI.getRegSizeInBits(*RC);
825 SrcRC = RI.getPhysRegBaseClass(SrcReg);
826 SrcSize = RI.getRegSizeInBits(*SrcRC);
830 if (RC == &AMDGPU::VGPR_32RegClass) {
832 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
833 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
834 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
835 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
841 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
842 RC == &AMDGPU::SReg_32RegClass) {
843 if (SrcReg == AMDGPU::SCC) {
850 if (DestReg == AMDGPU::VCC_LO) {
851 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
865 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
875 if (RC == &AMDGPU::SReg_64RegClass) {
876 if (SrcReg == AMDGPU::SCC) {
883 if (DestReg == AMDGPU::VCC) {
884 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
898 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
908 if (DestReg == AMDGPU::SCC) {
911 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
929 if (RC == &AMDGPU::AGPR_32RegClass) {
930 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
931 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
946 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
953 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
954 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
956 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
957 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
958 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
959 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
976 if (IsAGPRDst || IsAGPRSrc) {
977 if (!DstLow || !SrcLow) {
979 "Cannot use hi16 subreg with an AGPR!");
992 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
993 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1006 if (!DstLow || !SrcLow) {
1008 "Cannot use hi16 subreg on VI!");
1059 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1065 unsigned EltSize = 4;
1066 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1069 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1072 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1074 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1076 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1082 Opcode = AMDGPU::V_MOV_B64_e32;
1085 Opcode = AMDGPU::V_PK_MOV_B32;
1095 std::unique_ptr<RegScavenger> RS;
1096 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1103 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1104 const bool CanKillSuperReg = KillSrc && !Overlap;
1109 SubIdx = SubIndices[
Idx];
1111 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1112 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1113 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1114 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1116 bool IsFirstSubreg =
Idx == 0;
1117 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1119 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1123 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1124 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1171 int64_t
Value)
const {
1174 if (RegClass == &AMDGPU::SReg_32RegClass ||
1175 RegClass == &AMDGPU::SGPR_32RegClass ||
1176 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1177 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1183 if (RegClass == &AMDGPU::SReg_64RegClass ||
1184 RegClass == &AMDGPU::SGPR_64RegClass ||
1185 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1191 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1202 unsigned EltSize = 4;
1203 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1205 if (RI.getRegSizeInBits(*RegClass) > 32) {
1206 Opcode = AMDGPU::S_MOV_B64;
1209 Opcode = AMDGPU::S_MOV_B32;
1216 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1219 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1220 Builder.
addImm(IdxValue);
1226 return &AMDGPU::VGPR_32RegClass;
1238 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1239 "Not a VGPR32 reg");
1241 if (
Cond.size() == 1) {
1242 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1251 }
else if (
Cond.size() == 2) {
1253 switch (
Cond[0].getImm()) {
1254 case SIInstrInfo::SCC_TRUE: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 : AMDGPU::S_CSELECT_B64), SReg)
1268 case SIInstrInfo::SCC_FALSE: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1271 : AMDGPU::S_CSELECT_B64), SReg)
1282 case SIInstrInfo::VCCNZ: {
1285 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1296 case SIInstrInfo::VCCZ: {
1299 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1310 case SIInstrInfo::EXECNZ: {
1311 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1314 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1317 : AMDGPU::S_CSELECT_B64), SReg)
1328 case SIInstrInfo::EXECZ: {
1329 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1332 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1335 : AMDGPU::S_CSELECT_B64), SReg)
1384 return AMDGPU::COPY;
1385 if (RI.getRegSizeInBits(*DstRC) == 16) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1390 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::S_MOV_B64;
1393 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1394 return AMDGPU::V_MOV_B64_PSEUDO;
1396 return AMDGPU::COPY;
1401 bool IsIndirectSrc)
const {
1402 if (IsIndirectSrc) {
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1425 if (VecSize <= 1024)
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1453 if (VecSize <= 1024)
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1482 if (VecSize <= 1024)
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1511 if (VecSize <= 1024)
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1526 if (VecSize <= 1024)
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1534 bool IsSGPR)
const {
1546 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1553 return AMDGPU::SI_SPILL_S32_SAVE;
1555 return AMDGPU::SI_SPILL_S64_SAVE;
1557 return AMDGPU::SI_SPILL_S96_SAVE;
1559 return AMDGPU::SI_SPILL_S128_SAVE;
1561 return AMDGPU::SI_SPILL_S160_SAVE;
1563 return AMDGPU::SI_SPILL_S192_SAVE;
1565 return AMDGPU::SI_SPILL_S224_SAVE;
1567 return AMDGPU::SI_SPILL_S256_SAVE;
1569 return AMDGPU::SI_SPILL_S288_SAVE;
1571 return AMDGPU::SI_SPILL_S320_SAVE;
1573 return AMDGPU::SI_SPILL_S352_SAVE;
1575 return AMDGPU::SI_SPILL_S384_SAVE;
1577 return AMDGPU::SI_SPILL_S512_SAVE;
1579 return AMDGPU::SI_SPILL_S1024_SAVE;
1588 return AMDGPU::SI_SPILL_V32_SAVE;
1590 return AMDGPU::SI_SPILL_V64_SAVE;
1592 return AMDGPU::SI_SPILL_V96_SAVE;
1594 return AMDGPU::SI_SPILL_V128_SAVE;
1596 return AMDGPU::SI_SPILL_V160_SAVE;
1598 return AMDGPU::SI_SPILL_V192_SAVE;
1600 return AMDGPU::SI_SPILL_V224_SAVE;
1602 return AMDGPU::SI_SPILL_V256_SAVE;
1604 return AMDGPU::SI_SPILL_V288_SAVE;
1606 return AMDGPU::SI_SPILL_V320_SAVE;
1608 return AMDGPU::SI_SPILL_V352_SAVE;
1610 return AMDGPU::SI_SPILL_V384_SAVE;
1612 return AMDGPU::SI_SPILL_V512_SAVE;
1614 return AMDGPU::SI_SPILL_V1024_SAVE;
1623 return AMDGPU::SI_SPILL_A32_SAVE;
1625 return AMDGPU::SI_SPILL_A64_SAVE;
1627 return AMDGPU::SI_SPILL_A96_SAVE;
1629 return AMDGPU::SI_SPILL_A128_SAVE;
1631 return AMDGPU::SI_SPILL_A160_SAVE;
1633 return AMDGPU::SI_SPILL_A192_SAVE;
1635 return AMDGPU::SI_SPILL_A224_SAVE;
1637 return AMDGPU::SI_SPILL_A256_SAVE;
1639 return AMDGPU::SI_SPILL_A288_SAVE;
1641 return AMDGPU::SI_SPILL_A320_SAVE;
1643 return AMDGPU::SI_SPILL_A352_SAVE;
1645 return AMDGPU::SI_SPILL_A384_SAVE;
1647 return AMDGPU::SI_SPILL_A512_SAVE;
1649 return AMDGPU::SI_SPILL_A1024_SAVE;
1658 return AMDGPU::SI_SPILL_AV32_SAVE;
1660 return AMDGPU::SI_SPILL_AV64_SAVE;
1662 return AMDGPU::SI_SPILL_AV96_SAVE;
1664 return AMDGPU::SI_SPILL_AV128_SAVE;
1666 return AMDGPU::SI_SPILL_AV160_SAVE;
1668 return AMDGPU::SI_SPILL_AV192_SAVE;
1670 return AMDGPU::SI_SPILL_AV224_SAVE;
1672 return AMDGPU::SI_SPILL_AV256_SAVE;
1674 return AMDGPU::SI_SPILL_AV288_SAVE;
1676 return AMDGPU::SI_SPILL_AV320_SAVE;
1678 return AMDGPU::SI_SPILL_AV352_SAVE;
1680 return AMDGPU::SI_SPILL_AV384_SAVE;
1682 return AMDGPU::SI_SPILL_AV512_SAVE;
1684 return AMDGPU::SI_SPILL_AV1024_SAVE;
1691 bool IsVectorSuperClass) {
1696 if (IsVectorSuperClass)
1697 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1699 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1707 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1713 if (IsVectorSuperClass)
1733 FrameInfo.getObjectAlign(FrameIndex));
1734 unsigned SpillSize =
TRI->getSpillSize(*RC);
1739 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1740 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1741 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1749 if (SrcReg.
isVirtual() && SpillSize == 4) {
1750 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1765 SpillSize, RI, *MFI);
1779 return AMDGPU::SI_SPILL_S32_RESTORE;
1781 return AMDGPU::SI_SPILL_S64_RESTORE;
1783 return AMDGPU::SI_SPILL_S96_RESTORE;
1785 return AMDGPU::SI_SPILL_S128_RESTORE;
1787 return AMDGPU::SI_SPILL_S160_RESTORE;
1789 return AMDGPU::SI_SPILL_S192_RESTORE;
1791 return AMDGPU::SI_SPILL_S224_RESTORE;
1793 return AMDGPU::SI_SPILL_S256_RESTORE;
1795 return AMDGPU::SI_SPILL_S288_RESTORE;
1797 return AMDGPU::SI_SPILL_S320_RESTORE;
1799 return AMDGPU::SI_SPILL_S352_RESTORE;
1801 return AMDGPU::SI_SPILL_S384_RESTORE;
1803 return AMDGPU::SI_SPILL_S512_RESTORE;
1805 return AMDGPU::SI_SPILL_S1024_RESTORE;
1814 return AMDGPU::SI_SPILL_V32_RESTORE;
1816 return AMDGPU::SI_SPILL_V64_RESTORE;
1818 return AMDGPU::SI_SPILL_V96_RESTORE;
1820 return AMDGPU::SI_SPILL_V128_RESTORE;
1822 return AMDGPU::SI_SPILL_V160_RESTORE;
1824 return AMDGPU::SI_SPILL_V192_RESTORE;
1826 return AMDGPU::SI_SPILL_V224_RESTORE;
1828 return AMDGPU::SI_SPILL_V256_RESTORE;
1830 return AMDGPU::SI_SPILL_V288_RESTORE;
1832 return AMDGPU::SI_SPILL_V320_RESTORE;
1834 return AMDGPU::SI_SPILL_V352_RESTORE;
1836 return AMDGPU::SI_SPILL_V384_RESTORE;
1838 return AMDGPU::SI_SPILL_V512_RESTORE;
1840 return AMDGPU::SI_SPILL_V1024_RESTORE;
1849 return AMDGPU::SI_SPILL_A32_RESTORE;
1851 return AMDGPU::SI_SPILL_A64_RESTORE;
1853 return AMDGPU::SI_SPILL_A96_RESTORE;
1855 return AMDGPU::SI_SPILL_A128_RESTORE;
1857 return AMDGPU::SI_SPILL_A160_RESTORE;
1859 return AMDGPU::SI_SPILL_A192_RESTORE;
1861 return AMDGPU::SI_SPILL_A224_RESTORE;
1863 return AMDGPU::SI_SPILL_A256_RESTORE;
1865 return AMDGPU::SI_SPILL_A288_RESTORE;
1867 return AMDGPU::SI_SPILL_A320_RESTORE;
1869 return AMDGPU::SI_SPILL_A352_RESTORE;
1871 return AMDGPU::SI_SPILL_A384_RESTORE;
1873 return AMDGPU::SI_SPILL_A512_RESTORE;
1875 return AMDGPU::SI_SPILL_A1024_RESTORE;
1884 return AMDGPU::SI_SPILL_AV32_RESTORE;
1886 return AMDGPU::SI_SPILL_AV64_RESTORE;
1888 return AMDGPU::SI_SPILL_AV96_RESTORE;
1890 return AMDGPU::SI_SPILL_AV128_RESTORE;
1892 return AMDGPU::SI_SPILL_AV160_RESTORE;
1894 return AMDGPU::SI_SPILL_AV192_RESTORE;
1896 return AMDGPU::SI_SPILL_AV224_RESTORE;
1898 return AMDGPU::SI_SPILL_AV256_RESTORE;
1900 return AMDGPU::SI_SPILL_AV288_RESTORE;
1902 return AMDGPU::SI_SPILL_AV320_RESTORE;
1904 return AMDGPU::SI_SPILL_AV352_RESTORE;
1906 return AMDGPU::SI_SPILL_AV384_RESTORE;
1908 return AMDGPU::SI_SPILL_AV512_RESTORE;
1910 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1917 bool IsVectorSuperClass) {
1922 if (IsVectorSuperClass)
1923 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1925 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1932 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1938 if (IsVectorSuperClass)
1955 unsigned SpillSize =
TRI->getSpillSize(*RC);
1962 FrameInfo.getObjectAlign(FrameIndex));
1966 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1967 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1968 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1973 if (DestReg.
isVirtual() && SpillSize == 4) {
1975 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1989 SpillSize, RI, *MFI);
2004 unsigned Quantity)
const {
2006 while (Quantity > 0) {
2007 unsigned Arg = std::min(Quantity, 8u);
2021 if (HasNoTerminator) {
2022 if (
Info->returnsVoid()) {
2040 constexpr unsigned DoorbellIDMask = 0x3ff;
2041 constexpr unsigned ECQueueWaveAbort = 0x400;
2047 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2053 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2058 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2060 .
addUse(DoorbellRegMasked)
2061 .
addImm(ECQueueWaveAbort);
2063 .
addUse(SetWaveAbortBit);
2074 if (SplitBB != &
MBB)
2083 switch (
MI.getOpcode()) {
2085 if (
MI.isMetaInstruction())
2090 return MI.getOperand(0).getImm() + 1;
2100 switch (
MI.getOpcode()) {
2102 case AMDGPU::S_MOV_B64_term:
2105 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2108 case AMDGPU::S_MOV_B32_term:
2111 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2114 case AMDGPU::S_XOR_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2120 case AMDGPU::S_XOR_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2125 case AMDGPU::S_OR_B64_term:
2128 MI.setDesc(
get(AMDGPU::S_OR_B64));
2130 case AMDGPU::S_OR_B32_term:
2133 MI.setDesc(
get(AMDGPU::S_OR_B32));
2136 case AMDGPU::S_ANDN2_B64_term:
2139 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2142 case AMDGPU::S_ANDN2_B32_term:
2145 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2148 case AMDGPU::S_AND_B64_term:
2151 MI.setDesc(
get(AMDGPU::S_AND_B64));
2154 case AMDGPU::S_AND_B32_term:
2157 MI.setDesc(
get(AMDGPU::S_AND_B32));
2160 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2163 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2166 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2169 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2172 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2173 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2176 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2177 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2180 case AMDGPU::V_MOV_B64_PSEUDO: {
2182 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2183 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2189 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2194 if (
SrcOp.isImm()) {
2196 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2197 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2240 MI.eraseFromParent();
2243 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2247 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2252 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2257 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2258 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2260 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2261 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2268 MI.eraseFromParent();
2271 case AMDGPU::V_SET_INACTIVE_B32: {
2272 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2273 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2277 .
add(
MI.getOperand(1));
2281 .
add(
MI.getOperand(2));
2284 MI.eraseFromParent();
2287 case AMDGPU::V_SET_INACTIVE_B64: {
2288 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2289 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2291 MI.getOperand(0).getReg())
2292 .
add(
MI.getOperand(1));
2297 MI.getOperand(0).getReg())
2298 .
add(
MI.getOperand(2));
2302 MI.eraseFromParent();
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2332 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2333 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2338 Opc = AMDGPU::V_MOVRELD_B32_e32;
2340 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2341 : AMDGPU::S_MOVRELD_B32;
2346 bool IsUndef =
MI.getOperand(1).isUndef();
2347 unsigned SubReg =
MI.getOperand(3).getImm();
2348 assert(VecReg ==
MI.getOperand(1).getReg());
2353 .
add(
MI.getOperand(2))
2357 const int ImpDefIdx =
2359 const int ImpUseIdx = ImpDefIdx + 1;
2361 MI.eraseFromParent();
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2374 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2375 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2378 bool IsUndef =
MI.getOperand(1).isUndef();
2387 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2391 .
add(
MI.getOperand(2))
2396 const int ImpDefIdx =
2398 const int ImpUseIdx = ImpDefIdx + 1;
2405 MI.eraseFromParent();
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2418 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2419 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2423 bool IsUndef =
MI.getOperand(1).isUndef();
2441 MI.eraseFromParent();
2444 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2447 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2448 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2471 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2478 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2488 MI.eraseFromParent();
2491 case AMDGPU::ENTER_STRICT_WWM: {
2495 : AMDGPU::S_OR_SAVEEXEC_B64));
2498 case AMDGPU::ENTER_STRICT_WQM: {
2501 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2502 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2503 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2507 MI.eraseFromParent();
2510 case AMDGPU::EXIT_STRICT_WWM:
2511 case AMDGPU::EXIT_STRICT_WQM: {
2514 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2517 case AMDGPU::ENTER_PSEUDO_WM:
2518 case AMDGPU::EXIT_PSEUDO_WM: {
2520 MI.eraseFromParent();
2523 case AMDGPU::SI_RETURN: {
2537 MI.eraseFromParent();
2541 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2542 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2543 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2546 case AMDGPU::S_GETPC_B64_pseudo:
2547 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2550 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2572 case AMDGPU::S_LOAD_DWORDX16_IMM:
2573 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2586 for (
auto &CandMO :
I->operands()) {
2587 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2595 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2603 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2605 unsigned NewOpcode = -1;
2606 if (SubregSize == 256)
2607 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2608 else if (SubregSize == 128)
2609 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2616 MRI.setRegClass(DestReg, NewRC);
2619 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2624 MI->getOperand(0).setReg(DestReg);
2625 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2629 OffsetMO->
setImm(FinalOffset);
2635 MI->setMemRefs(*MF, NewMMOs);
2648std::pair<MachineInstr*, MachineInstr*>
2650 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2655 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2656 return std::pair(&
MI,
nullptr);
2667 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2669 if (Dst.isPhysical()) {
2670 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2673 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2677 for (
unsigned I = 1;
I <= 2; ++
I) {
2680 if (
SrcOp.isImm()) {
2682 Imm.ashrInPlace(Part * 32);
2683 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2687 if (Src.isPhysical())
2688 MovDPP.addReg(RI.getSubReg(Src, Sub));
2695 MovDPP.addImm(MO.getImm());
2697 Split[Part] = MovDPP;
2701 if (Dst.isVirtual())
2708 MI.eraseFromParent();
2709 return std::pair(Split[0], Split[1]);
2712std::optional<DestSourcePair>
2714 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2717 return std::nullopt;
2722 unsigned Src0OpName,
2724 unsigned Src1OpName)
const {
2731 "All commutable instructions have both src0 and src1 modifiers");
2733 int Src0ModsVal = Src0Mods->
getImm();
2734 int Src1ModsVal = Src1Mods->
getImm();
2736 Src1Mods->
setImm(Src0ModsVal);
2737 Src0Mods->
setImm(Src1ModsVal);
2746 bool IsKill = RegOp.
isKill();
2748 bool IsUndef = RegOp.
isUndef();
2749 bool IsDebug = RegOp.
isDebug();
2751 if (NonRegOp.
isImm())
2753 else if (NonRegOp.
isFI())
2772 unsigned Src1Idx)
const {
2773 assert(!NewMI &&
"this should never be used");
2775 unsigned Opc =
MI.getOpcode();
2777 if (CommutedOpcode == -1)
2780 if (Src0Idx > Src1Idx)
2784 static_cast<int>(Src0Idx) &&
2786 static_cast<int>(Src1Idx) &&
2787 "inconsistency with findCommutedOpIndices");
2814 Src1, AMDGPU::OpName::src1_modifiers);
2826 unsigned &SrcOpIdx0,
2827 unsigned &SrcOpIdx1)
const {
2832 unsigned &SrcOpIdx0,
2833 unsigned &SrcOpIdx1)
const {
2834 if (!
Desc.isCommutable())
2837 unsigned Opc =
Desc.getOpcode();
2846 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2850 int64_t BrOffset)
const {
2853 assert(BranchOp != AMDGPU::S_SETPC_B64);
2867 return MI.getOperand(0).getMBB();
2872 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2873 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2874 MI.getOpcode() == AMDGPU::SI_LOOP)
2885 assert(RS &&
"RegScavenger required for long branching");
2887 "new block should be inserted for expanding unconditional branch");
2890 "restore block should be inserted for restoring clobbered registers");
2898 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2908 MCCtx.createTempSymbol(
"post_getpc",
true);
2912 MCCtx.createTempSymbol(
"offset_lo",
true);
2914 MCCtx.createTempSymbol(
"offset_hi",
true);
2917 .
addReg(PCReg, 0, AMDGPU::sub0)
2921 .
addReg(PCReg, 0, AMDGPU::sub1)
2963 if (LongBranchReservedReg) {
2965 Scav = LongBranchReservedReg;
2974 MRI.replaceRegWith(PCReg, Scav);
2975 MRI.clearVirtRegs();
2981 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2982 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2983 MRI.clearVirtRegs();
2998unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3000 case SIInstrInfo::SCC_TRUE:
3001 return AMDGPU::S_CBRANCH_SCC1;
3002 case SIInstrInfo::SCC_FALSE:
3003 return AMDGPU::S_CBRANCH_SCC0;
3004 case SIInstrInfo::VCCNZ:
3005 return AMDGPU::S_CBRANCH_VCCNZ;
3006 case SIInstrInfo::VCCZ:
3007 return AMDGPU::S_CBRANCH_VCCZ;
3008 case SIInstrInfo::EXECNZ:
3009 return AMDGPU::S_CBRANCH_EXECNZ;
3010 case SIInstrInfo::EXECZ:
3011 return AMDGPU::S_CBRANCH_EXECZ;
3017SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3019 case AMDGPU::S_CBRANCH_SCC0:
3021 case AMDGPU::S_CBRANCH_SCC1:
3023 case AMDGPU::S_CBRANCH_VCCNZ:
3025 case AMDGPU::S_CBRANCH_VCCZ:
3027 case AMDGPU::S_CBRANCH_EXECNZ:
3029 case AMDGPU::S_CBRANCH_EXECZ:
3041 bool AllowModify)
const {
3042 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3044 TBB =
I->getOperand(0).getMBB();
3050 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3051 CondBB =
I->getOperand(1).getMBB();
3052 Cond.push_back(
I->getOperand(0));
3054 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3055 if (Pred == INVALID_BR)
3058 CondBB =
I->getOperand(0).getMBB();
3060 Cond.push_back(
I->getOperand(1));
3070 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3072 FBB =
I->getOperand(0).getMBB();
3082 bool AllowModify)
const {
3090 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3091 switch (
I->getOpcode()) {
3092 case AMDGPU::S_MOV_B64_term:
3093 case AMDGPU::S_XOR_B64_term:
3094 case AMDGPU::S_OR_B64_term:
3095 case AMDGPU::S_ANDN2_B64_term:
3096 case AMDGPU::S_AND_B64_term:
3097 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3098 case AMDGPU::S_MOV_B32_term:
3099 case AMDGPU::S_XOR_B32_term:
3100 case AMDGPU::S_OR_B32_term:
3101 case AMDGPU::S_ANDN2_B32_term:
3102 case AMDGPU::S_AND_B32_term:
3103 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3106 case AMDGPU::SI_ELSE:
3107 case AMDGPU::SI_KILL_I1_TERMINATOR:
3108 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3125 int *BytesRemoved)
const {
3127 unsigned RemovedSize = 0;
3130 if (
MI.isBranch() ||
MI.isReturn()) {
3132 MI.eraseFromParent();
3138 *BytesRemoved = RemovedSize;
3155 int *BytesAdded)
const {
3156 if (!FBB &&
Cond.empty()) {
3164 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3174 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3211 if (
Cond.size() != 2) {
3226 Register FalseReg,
int &CondCycles,
3227 int &TrueCycles,
int &FalseCycles)
const {
3228 switch (
Cond[0].getImm()) {
3233 if (
MRI.getRegClass(FalseReg) != RC)
3237 CondCycles = TrueCycles = FalseCycles = NumInsts;
3240 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3248 if (
MRI.getRegClass(FalseReg) != RC)
3254 if (NumInsts % 2 == 0)
3257 CondCycles = TrueCycles = FalseCycles = NumInsts;
3269 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3270 if (Pred == VCCZ || Pred == SCC_FALSE) {
3271 Pred =
static_cast<BranchPredicate
>(-Pred);
3277 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3279 if (DstSize == 32) {
3281 if (Pred == SCC_TRUE) {
3296 if (DstSize == 64 && Pred == SCC_TRUE) {
3306 static const int16_t Sub0_15[] = {
3307 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3308 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3309 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3310 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3313 static const int16_t Sub0_15_64[] = {
3314 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3315 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3316 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3317 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3320 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3322 const int16_t *SubIndices = Sub0_15;
3323 int NElts = DstSize / 32;
3327 if (Pred == SCC_TRUE) {
3329 SelOp = AMDGPU::S_CSELECT_B32;
3330 EltRC = &AMDGPU::SGPR_32RegClass;
3332 SelOp = AMDGPU::S_CSELECT_B64;
3333 EltRC = &AMDGPU::SGPR_64RegClass;
3334 SubIndices = Sub0_15_64;
3340 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3345 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3346 Register DstElt =
MRI.createVirtualRegister(EltRC);
3349 unsigned SubIdx = SubIndices[
Idx];
3352 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3355 .
addReg(FalseReg, 0, SubIdx)
3356 .
addReg(TrueReg, 0, SubIdx);
3360 .
addReg(TrueReg, 0, SubIdx)
3361 .
addReg(FalseReg, 0, SubIdx);
3373 switch (
MI.getOpcode()) {
3374 case AMDGPU::V_MOV_B32_e32:
3375 case AMDGPU::V_MOV_B32_e64:
3376 case AMDGPU::V_MOV_B64_PSEUDO:
3377 case AMDGPU::V_MOV_B64_e32:
3378 case AMDGPU::V_MOV_B64_e64:
3379 case AMDGPU::S_MOV_B32:
3380 case AMDGPU::S_MOV_B64:
3381 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3383 case AMDGPU::WWM_COPY:
3384 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3385 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3386 case AMDGPU::V_ACCVGPR_MOV_B32:
3394 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3395 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3396 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3399 unsigned Opc =
MI.getOpcode();
3403 MI.removeOperand(
Idx);
3409 if (!
MRI->hasOneNonDBGUse(Reg))
3412 switch (
DefMI.getOpcode()) {
3415 case AMDGPU::V_MOV_B64_e32:
3416 case AMDGPU::S_MOV_B64:
3417 case AMDGPU::V_MOV_B64_PSEUDO:
3418 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3419 case AMDGPU::V_MOV_B32_e32:
3420 case AMDGPU::S_MOV_B32:
3421 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3428 if (!ImmOp->
isImm())
3431 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3432 int64_t Imm = ImmOp->
getImm();
3433 switch (UseOp.getSubReg()) {
3444 case AMDGPU::sub1_lo16:
3446 case AMDGPU::sub1_hi16:
3451 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3453 unsigned Opc =
UseMI.getOpcode();
3454 if (Opc == AMDGPU::COPY) {
3455 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3459 bool Is16Bit = OpSize == 2;
3460 bool Is64Bit = OpSize == 8;
3462 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3463 : AMDGPU::V_MOV_B32_e32
3464 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3465 : AMDGPU::S_MOV_B32;
3466 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3471 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3478 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3481 UseMI.getOperand(0).setSubReg(0);
3484 UseMI.getOperand(0).setReg(DstReg);
3494 UseMI.setDesc(NewMCID);
3495 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3500 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3501 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3502 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3503 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3504 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3519 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3520 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3522 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3523 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3524 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3532 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3533 if (!RegSrc->
isReg())
3551 if (Def && Def->isMoveImmediate() &&
3556 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3558 : AMDGPU::V_FMAMK_F16)
3559 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3566 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3569 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3575 unsigned SrcSubReg = RegSrc->
getSubReg();
3580 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3581 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3582 Opc == AMDGPU::V_FMAC_F16_e64)
3583 UseMI.untieRegOperand(
3586 Src1->ChangeToImmediate(Imm);
3591 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3593 DefMI.eraseFromParent();
3603 bool Src0Inlined =
false;
3604 if (Src0->
isReg()) {
3609 if (Def && Def->isMoveImmediate() &&
3621 if (Src1->
isReg() && !Src0Inlined) {
3624 if (Def && Def->isMoveImmediate() &&
3635 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3637 : AMDGPU::V_FMAAK_F16)
3638 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3645 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3651 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3652 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3653 Opc == AMDGPU::V_FMAC_F16_e64)
3654 UseMI.untieRegOperand(
3668 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3670 DefMI.eraseFromParent();
3682 if (BaseOps1.
size() != BaseOps2.
size())
3684 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3685 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3693 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3694 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3695 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3697 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3700bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3703 int64_t Offset0, Offset1;
3705 bool Offset0IsScalable, Offset1IsScalable;
3727 "MIa must load from or modify a memory location");
3729 "MIb must load from or modify a memory location");
3748 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3755 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3765 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3779 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3790 if (Reg.isPhysical())
3792 auto *Def =
MRI.getUniqueVRegDef(Reg);
3794 Imm = Def->getOperand(1).getImm();
3814 unsigned NumOps =
MI.getNumOperands();
3815 for (
unsigned I = 1;
I < NumOps; ++
I) {
3817 if (
Op.isReg() &&
Op.isKill())
3827 unsigned Opc =
MI.getOpcode();
3831 if (NewMFMAOpc != -1) {
3834 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3835 MIB.
add(
MI.getOperand(
I));
3841 if (Def.isEarlyClobber() && Def.isReg() &&
3846 auto UpdateDefIndex = [&](
LiveRange &LR) {
3847 auto S = LR.
find(OldIndex);
3848 if (S != LR.end() && S->start == OldIndex) {
3849 assert(S->valno && S->valno->def == OldIndex);
3850 S->start = NewIndex;
3851 S->valno->def = NewIndex;
3855 for (
auto &SR : LI.subranges())
3866 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3876 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3877 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3881 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3882 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3883 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3884 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3885 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3886 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3887 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3888 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3889 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3890 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3891 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3892 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3893 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3894 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3895 bool Src0Literal =
false;
3900 case AMDGPU::V_MAC_F16_e64:
3901 case AMDGPU::V_FMAC_F16_e64:
3902 case AMDGPU::V_FMAC_F16_t16_e64:
3903 case AMDGPU::V_MAC_F32_e64:
3904 case AMDGPU::V_MAC_LEGACY_F32_e64:
3905 case AMDGPU::V_FMAC_F32_e64:
3906 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3907 case AMDGPU::V_FMAC_F64_e64:
3909 case AMDGPU::V_MAC_F16_e32:
3910 case AMDGPU::V_FMAC_F16_e32:
3911 case AMDGPU::V_MAC_F32_e32:
3912 case AMDGPU::V_MAC_LEGACY_F32_e32:
3913 case AMDGPU::V_FMAC_F32_e32:
3914 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3915 case AMDGPU::V_FMAC_F64_e32: {
3917 AMDGPU::OpName::src0);
3944 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3950 const auto killDef = [&]() ->
void {
3954 if (!
MRI.hasOneNonDBGUse(DefReg))
3968 : AMDGPU::V_FMAAK_F16)
3969 : AMDGPU::V_FMAAK_F32)
3970 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3986 : AMDGPU::V_FMAMK_F16)
3987 : AMDGPU::V_FMAMK_F32)
3988 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4032 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4033 : IsF64 ? AMDGPU::V_FMA_F64_e64
4035 ? AMDGPU::V_FMA_LEGACY_F32_e64
4036 : AMDGPU::V_FMA_F32_e64
4037 : IsF16 ? AMDGPU::V_MAD_F16_e64
4038 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4039 : AMDGPU::V_MAD_F32_e64;
4054 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4065 switch (
MI.getOpcode()) {
4066 case AMDGPU::S_SET_GPR_IDX_ON:
4067 case AMDGPU::S_SET_GPR_IDX_MODE:
4068 case AMDGPU::S_SET_GPR_IDX_OFF:
4086 if (
MI.isTerminator() ||
MI.isPosition())
4090 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4093 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4099 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4100 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4101 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4102 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4107 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4118 unsigned Opcode =
MI.getOpcode();
4133 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4135 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4136 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4139 if (
MI.isCall() ||
MI.isInlineAsm())
4151 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4152 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4153 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4154 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4162 if (
MI.isMetaInstruction())
4166 if (
MI.isCopyLike()) {
4171 return MI.readsRegister(AMDGPU::EXEC, &RI);
4182 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4186 switch (Imm.getBitWidth()) {
4206 APInt IntImm = Imm.bitcastToAPInt();
4225 uint8_t OperandType)
const {
4226 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4235 int64_t Imm = MO.
getImm();
4236 switch (OperandType) {
4249 int32_t Trunc =
static_cast<int32_t
>(Imm);
4289 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4294 int16_t Trunc =
static_cast<int16_t
>(Imm);
4305 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4306 int16_t Trunc =
static_cast<int16_t
>(Imm);
4366 AMDGPU::OpName::src2))
4382 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4402 return Mods && Mods->
getImm();
4415 switch (
MI.getOpcode()) {
4416 default:
return false;
4418 case AMDGPU::V_ADDC_U32_e64:
4419 case AMDGPU::V_SUBB_U32_e64:
4420 case AMDGPU::V_SUBBREV_U32_e64: {
4428 case AMDGPU::V_MAC_F16_e64:
4429 case AMDGPU::V_MAC_F32_e64:
4430 case AMDGPU::V_MAC_LEGACY_F32_e64:
4431 case AMDGPU::V_FMAC_F16_e64:
4432 case AMDGPU::V_FMAC_F16_t16_e64:
4433 case AMDGPU::V_FMAC_F32_e64:
4434 case AMDGPU::V_FMAC_F64_e64:
4435 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4441 case AMDGPU::V_CNDMASK_B32_e64:
4473 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4482 unsigned Op32)
const {
4492 Inst32.
add(
MI.getOperand(0));
4496 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
4497 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4511 if (Op32Src2Idx != -1) {
4541 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4546 return MO.
getReg() == AMDGPU::M0 ||
4547 MO.
getReg() == AMDGPU::VCC ||
4548 MO.
getReg() == AMDGPU::VCC_LO;
4550 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4551 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4561 switch (MO.getReg()) {
4563 case AMDGPU::VCC_LO:
4564 case AMDGPU::VCC_HI:
4566 case AMDGPU::FLAT_SCR:
4579 switch (
MI.getOpcode()) {
4580 case AMDGPU::V_READLANE_B32:
4581 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4582 case AMDGPU::V_WRITELANE_B32:
4583 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4590 if (
MI.isPreISelOpcode() ||
4591 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4602 if (
SubReg.getReg().isPhysical())
4605 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4612 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4622 if (Src0Idx == -1) {
4632 if (!
Desc.isVariadic() &&
4633 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4634 ErrInfo =
"Instruction has wrong number of operands.";
4638 if (
MI.isInlineAsm()) {
4651 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4652 ErrInfo =
"inlineasm operand has incorrect register class.";
4660 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4661 ErrInfo =
"missing memory operand from image instruction.";
4666 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4669 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4670 "all fp values to integers.";
4674 int RegClass =
Desc.operands()[i].RegClass;
4676 switch (
Desc.operands()[i].OperandType) {
4678 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4679 ErrInfo =
"Illegal immediate value for operand.";
4700 ErrInfo =
"Illegal immediate value for operand.";
4707 ErrInfo =
"Expected inline constant for operand.";
4716 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4717 ErrInfo =
"Expected immediate, but got non-immediate";
4739 RI.getSubRegisterClass(RC, MO.
getSubReg());
4747 ErrInfo =
"Subtarget requires even aligned vector registers";
4752 if (RegClass != -1) {
4753 if (Reg.isVirtual())
4758 ErrInfo =
"Operand has incorrect register class.";
4767 ErrInfo =
"SDWA is not supported on this target";
4773 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4781 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4788 "Only reg allowed as operands in SDWA instructions on GFX9+";
4797 if (OMod !=
nullptr &&
4799 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4804 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4805 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4806 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4807 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4810 unsigned Mods = Src0ModsMO->
getImm();
4813 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4819 if (
isVOPC(BasicOpcode)) {
4823 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4824 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4830 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4831 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4837 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4838 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4845 if (DstUnused && DstUnused->isImm() &&
4848 if (!Dst.isReg() || !Dst.isTied()) {
4849 ErrInfo =
"Dst register should have tied register";
4854 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4857 "Dst register should be tied to implicit use of preserved register";
4860 Dst.getReg() != TiedMO.
getReg()) {
4861 ErrInfo =
"Dst register should use same physical register as preserved";
4893 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4894 if (RegCount > DstSize) {
4895 ErrInfo =
"Image instruction returns too many registers for dst "
4904 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4905 unsigned ConstantBusCount = 0;
4906 bool UsesLiteral =
false;
4913 LiteralVal = &
MI.getOperand(ImmIdx);
4922 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4940 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4950 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4951 return !RI.regsOverlap(SGPRUsed, SGPR);
4961 Opcode != AMDGPU::V_WRITELANE_B32) {
4962 ErrInfo =
"VOP* instruction violates constant bus restriction";
4967 ErrInfo =
"VOP3 instruction uses literal";
4974 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4975 unsigned SGPRCount = 0;
4978 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4986 if (MO.
getReg() != SGPRUsed)
4992 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4999 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5000 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5007 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5017 ErrInfo =
"ABS not allowed in VOP3B instructions";
5030 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5037 if (
Desc.isBranch()) {
5039 ErrInfo =
"invalid branch target for SOPK instruction";
5045 if (!isUInt<16>(Imm)) {
5046 ErrInfo =
"invalid immediate for SOPK instruction";
5050 if (!isInt<16>(Imm)) {
5051 ErrInfo =
"invalid immediate for SOPK instruction";
5058 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5059 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5060 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5061 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5062 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5063 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5065 const unsigned StaticNumOps =
5066 Desc.getNumOperands() +
Desc.implicit_uses().size();
5067 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5072 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5073 ErrInfo =
"missing implicit register operands";
5079 if (!Dst->isUse()) {
5080 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5085 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5086 UseOpIdx != StaticNumOps + 1) {
5087 ErrInfo =
"movrel implicit operands should be tied";
5094 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5096 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5097 ErrInfo =
"src0 should be subreg of implicit vector use";
5105 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5106 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5112 if (
MI.mayStore() &&
5117 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5118 ErrInfo =
"scalar stores must use m0 as offset register";
5126 if (
Offset->getImm() != 0) {
5127 ErrInfo =
"subtarget does not support offsets in flat instructions";
5134 if (GDSOp && GDSOp->
getImm() != 0) {
5135 ErrInfo =
"GDS is not supported on this subtarget";
5144 AMDGPU::OpName::vaddr0);
5146 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5155 ErrInfo =
"dim is out of range";
5162 IsA16 = R128A16->
getImm() != 0;
5163 }
else if (ST.
hasA16()) {
5165 IsA16 = A16->
getImm() != 0;
5168 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5170 unsigned AddrWords =
5173 unsigned VAddrWords;
5175 VAddrWords = RsrcIdx - VAddr0Idx;
5178 unsigned LastVAddrIdx = RsrcIdx - 1;
5179 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5187 if (VAddrWords != AddrWords) {
5189 <<
" but got " << VAddrWords <<
"\n");
5190 ErrInfo =
"bad vaddr size";
5198 using namespace AMDGPU::DPP;
5200 unsigned DC = DppCt->
getImm();
5201 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5202 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5203 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5204 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5205 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5206 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5207 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5208 ErrInfo =
"Invalid dpp_ctrl value";
5211 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5213 ErrInfo =
"Invalid dpp_ctrl value: "
5214 "wavefront shifts are not supported on GFX10+";
5217 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5219 ErrInfo =
"Invalid dpp_ctrl value: "
5220 "broadcasts are not supported on GFX10+";
5223 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5225 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5226 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5228 ErrInfo =
"Invalid dpp_ctrl value: "
5229 "row_newbroadcast/row_share is not supported before "
5232 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5233 ErrInfo =
"Invalid dpp_ctrl value: "
5234 "row_share and row_xmask are not supported before GFX10";
5239 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5241 ErrInfo =
"Invalid dpp_ctrl value: "
5242 "DP ALU dpp only support row_newbcast";
5249 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5250 : AMDGPU::OpName::vdata;
5259 ErrInfo =
"Invalid register class: "
5260 "vdata and vdst should be both VGPR or AGPR";
5263 if (
Data && Data2 &&
5265 ErrInfo =
"Invalid register class: "
5266 "both data operands should be VGPR or AGPR";
5270 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5273 ErrInfo =
"Invalid register class: "
5274 "agpr loads and stores not supported on this GPU";
5281 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5286 if (Reg.isPhysical())
5293 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5294 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5295 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5297 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5298 ErrInfo =
"Subtarget requires even aligned vector registers "
5299 "for DS_GWS instructions";
5305 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5306 ErrInfo =
"Subtarget requires even aligned vector registers "
5307 "for vaddr operand of image instructions";
5313 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5316 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5317 ErrInfo =
"Invalid register class: "
5318 "v_accvgpr_write with an SGPR is not supported on this GPU";
5323 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5326 ErrInfo =
"pseudo expects only physical SGPRs";
5338 switch (
MI.getOpcode()) {
5339 default:
return AMDGPU::INSTRUCTION_LIST_END;
5340 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5341 case AMDGPU::COPY:
return AMDGPU::COPY;
5342 case AMDGPU::PHI:
return AMDGPU::PHI;
5343 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5344 case AMDGPU::WQM:
return AMDGPU::WQM;
5345 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5346 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5347 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5348 case AMDGPU::S_MOV_B32: {
5350 return MI.getOperand(1).isReg() ||
5352 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5354 case AMDGPU::S_ADD_I32:
5355 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5356 case AMDGPU::S_ADDC_U32:
5357 return AMDGPU::V_ADDC_U32_e32;
5358 case AMDGPU::S_SUB_I32:
5359 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5362 case AMDGPU::S_ADD_U32:
5363 return AMDGPU::V_ADD_CO_U32_e32;
5364 case AMDGPU::S_SUB_U32:
5365 return AMDGPU::V_SUB_CO_U32_e32;
5366 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5367 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5368 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5369 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5370 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5371 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5372 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5373 case AMDGPU::S_XNOR_B32:
5374 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5375 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5376 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5377 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5378 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5379 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5380 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5381 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5382 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5383 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5384 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5385 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5386 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5387 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5388 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5389 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5390 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5391 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5392 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5393 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5394 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5395 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5396 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5397 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5398 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5399 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5400 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5401 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5402 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5403 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5404 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5405 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5406 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5407 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5408 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5409 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5410 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5411 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5412 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5413 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5414 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5415 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5416 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5417 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5418 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5419 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5420 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5421 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5422 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5423 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5424 case AMDGPU::S_CEIL_F16:
5426 : AMDGPU::V_CEIL_F16_fake16_e64;
5427 case AMDGPU::S_FLOOR_F16:
5429 : AMDGPU::V_FLOOR_F16_fake16_e64;
5430 case AMDGPU::S_TRUNC_F16:
5431 return AMDGPU::V_TRUNC_F16_fake16_e64;
5432 case AMDGPU::S_RNDNE_F16:
5433 return AMDGPU::V_RNDNE_F16_fake16_e64;
5434 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5435 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5436 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5437 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5438 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5439 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5440 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5441 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5442 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5443 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5444 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5445 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5446 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5447 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5448 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5449 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5450 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5451 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5452 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5453 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5454 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5455 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5456 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5457 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5458 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5459 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5460 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5461 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5462 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5463 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5464 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5465 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5466 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5467 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5468 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5469 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5470 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5471 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5472 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5473 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5474 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5475 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5476 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5477 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5478 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5479 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5480 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5481 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5482 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5483 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5484 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5485 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5486 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5487 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5488 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5489 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5490 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5493 "Unexpected scalar opcode without corresponding vector one!");
5506 bool IsWave32 = ST.isWave32();
5511 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5512 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5521 const unsigned OrSaveExec =
5522 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5535 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5537 auto ExecRestoreMI =
5547 bool IsAllocatable) {
5548 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5553 case AMDGPU::AV_32RegClassID:
5554 RCID = AMDGPU::VGPR_32RegClassID;
5556 case AMDGPU::AV_64RegClassID:
5557 RCID = AMDGPU::VReg_64RegClassID;
5559 case AMDGPU::AV_96RegClassID:
5560 RCID = AMDGPU::VReg_96RegClassID;
5562 case AMDGPU::AV_128RegClassID:
5563 RCID = AMDGPU::VReg_128RegClassID;
5565 case AMDGPU::AV_160RegClassID:
5566 RCID = AMDGPU::VReg_160RegClassID;
5568 case AMDGPU::AV_512RegClassID:
5569 RCID = AMDGPU::VReg_512RegClassID;
5585 auto RegClass = TID.
operands()[OpNum].RegClass;
5586 bool IsAllocatable =
false;
5596 AMDGPU::OpName::vdst);
5599 : AMDGPU::OpName::vdata);
5600 if (DataIdx != -1) {
5602 TID.
Opcode, AMDGPU::OpName::data1);
5610 unsigned OpNo)
const {
5613 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5614 Desc.operands()[OpNo].RegClass == -1) {
5617 if (Reg.isVirtual())
5618 return MRI.getRegClass(Reg);
5619 return RI.getPhysRegBaseClass(Reg);
5622 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5631 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5633 unsigned Size = RI.getRegSizeInBits(*RC);
5634 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5636 Opcode = AMDGPU::COPY;
5638 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5655 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
5665 Register NewSuperReg =
MRI.createVirtualRegister(SuperRC);
5671 .
addReg(NewSuperReg, 0, SubIdx);
5681 if (SubIdx == AMDGPU::sub0)
5683 if (SubIdx == AMDGPU::sub1)
5695void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5711 if (Reg.isPhysical())
5722 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5749 MO = &
MI.getOperand(OpIdx);
5761 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5767 if (!SGPRsUsed.
count(SGPR) &&
5770 if (--ConstantBusLimit <= 0)
5776 if (!LiteralLimit--)
5778 if (--ConstantBusLimit <= 0)
5792 unsigned Opc =
MI.getOpcode();
5800 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5801 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5802 MI.getOperand(DataIdx).isReg() &&
5803 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5805 if ((
int)OpIdx == DataIdx) {
5806 if (VDstIdx != -1 &&
5807 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5811 AMDGPU::OpName::data1);
5812 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5813 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5816 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5826 bool Is64BitOp = Is64BitFPOp ||
5839 if (!Is64BitFPOp && (int32_t)Imm < 0)
5857 unsigned Opc =
MI.getOpcode();
5876 if (Opc == AMDGPU::V_WRITELANE_B32) {
5879 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5885 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5902 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5904 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5916 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5918 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5930 if (HasImplicitSGPR || !
MI.isCommutable()) {
5947 if (CommutedOpc == -1) {
5952 MI.setDesc(
get(CommutedOpc));
5956 bool Src0Kill = Src0.
isKill();
5960 else if (Src1.
isReg()) {
5975 unsigned Opc =
MI.getOpcode();
5983 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5984 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5990 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5996 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6007 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6009 SGPRsUsed.
insert(SGPRReg);
6013 for (
int Idx : VOP3Idx) {
6022 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6047 if (ConstantBusLimit > 0) {
6059 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6060 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6069 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6073 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6075 get(TargetOpcode::COPY), NewSrcReg)
6082 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6088 for (
unsigned i = 0; i < SubRegs; ++i) {
6089 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6091 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6098 get(AMDGPU::REG_SEQUENCE), DstReg);
6099 for (
unsigned i = 0; i < SubRegs; ++i) {
6114 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6116 SBase->setReg(SGPR);
6128 if (OldSAddrIdx < 0)
6145 if (NewVAddrIdx < 0)
6152 if (OldVAddrIdx >= 0) {
6154 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6155 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6166 if (OldVAddrIdx == NewVAddrIdx) {
6169 MRI.removeRegOperandFromUseList(&NewVAddr);
6170 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6174 MRI.removeRegOperandFromUseList(&NewVAddr);
6175 MRI.addRegOperandToUseList(&NewVAddr);
6177 assert(OldSAddrIdx == NewVAddrIdx);
6179 if (OldVAddrIdx >= 0) {
6181 AMDGPU::OpName::vdst_in);
6185 if (NewVDstIn != -1) {
6192 if (NewVDstIn != -1) {
6231 unsigned OpSubReg =
Op.getSubReg();
6240 Register DstReg =
MRI.createVirtualRegister(DstRC);
6251 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6254 bool ImpDef = Def->isImplicitDef();
6255 while (!ImpDef && Def && Def->isCopy()) {
6256 if (Def->getOperand(1).getReg().isPhysical())
6258 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6259 ImpDef = Def && Def->isImplicitDef();
6261 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6276 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6277 unsigned SaveExecOpc =
6278 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6279 unsigned XorTermOpc =
6280 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6282 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6283 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6291 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6292 unsigned NumSubRegs =
RegSize / 32;
6293 Register VScalarOp = ScalarOp->getReg();
6295 if (NumSubRegs == 1) {
6296 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6298 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6301 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6303 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6309 CondReg = NewCondReg;
6311 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6319 ScalarOp->setReg(CurReg);
6320 ScalarOp->setIsKill();
6323 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6324 "Unhandled register size");
6326 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6327 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6328 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6331 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6332 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6335 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6336 .
addReg(VScalarOp, VScalarOpUndef,
6337 TRI->getSubRegFromChannel(
Idx + 1));
6343 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6344 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6350 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6351 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6354 if (NumSubRegs <= 2)
6355 Cmp.addReg(VScalarOp);
6357 Cmp.addReg(VScalarOp, VScalarOpUndef,
6358 TRI->getSubRegFromChannel(
Idx, 2));
6362 CondReg = NewCondReg;
6364 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6373 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6374 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6378 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6379 unsigned Channel = 0;
6380 for (
Register Piece : ReadlanePieces) {
6381 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6385 ScalarOp->setReg(SScalarOp);
6386 ScalarOp->setIsKill();
6390 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6391 MRI.setSimpleHint(SaveExec, CondReg);
6422 if (!Begin.isValid())
6424 if (!
End.isValid()) {
6429 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6430 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6431 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6438 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6444 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6453 for (
auto I = Begin;
I != AfterMI;
I++) {
6454 for (
auto &MO :
I->all_uses())
6455 MRI.clearKillFlags(MO.getReg());
6490 for (
auto &Succ : RemainderBB->
successors()) {
6513static std::tuple<unsigned, unsigned>
6521 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6522 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6525 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6526 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6527 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6528 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6529 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6537 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6541 .
addImm(RsrcDataFormat >> 32);
6546 .
addImm(AMDGPU::sub0_sub1)
6552 return std::tuple(RsrcPtr, NewSRsrc);
6589 if (
MI.getOpcode() == AMDGPU::PHI) {
6591 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6592 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6595 MRI.getRegClass(
MI.getOperand(i).getReg());
6610 VRC = &AMDGPU::VReg_1RegClass;
6626 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6628 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6644 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6651 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6653 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6671 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6676 if (DstRC != Src0RC) {
6685 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6693 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6694 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6695 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6696 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6697 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6712 : AMDGPU::OpName::srsrc;
6717 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6726 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6732 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6733 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6738 while (Start->getOpcode() != FrameSetupOpcode)
6741 while (
End->getOpcode() != FrameDestroyOpcode)
6745 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6746 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6754 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6756 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6767 bool isSoffsetLegal =
true;
6770 if (SoffsetIdx != -1) {
6774 isSoffsetLegal =
false;
6778 bool isRsrcLegal =
true;
6781 if (RsrcIdx != -1) {
6784 isRsrcLegal =
false;
6789 if (isRsrcLegal && isSoffsetLegal)
6813 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6814 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6815 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6817 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6818 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6819 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6821 unsigned RsrcPtr, NewSRsrc;
6828 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6835 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6853 "FIXME: Need to emit flat atomics here");
6855 unsigned RsrcPtr, NewSRsrc;
6858 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6881 MIB.
addImm(CPol->getImm());
6886 MIB.
addImm(TFE->getImm());
6906 MI.removeFromParent();
6911 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6913 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6917 if (!isSoffsetLegal) {
6929 if (!isSoffsetLegal) {
6938 InstrList.insert(
MI);
6942 if (RsrcIdx != -1) {
6943 DeferredList.insert(
MI);
6948 return DeferredList.contains(
MI);
6954 while (!Worklist.
empty()) {
6968 "Deferred MachineInstr are not supposed to re-populate worklist");
6986 case AMDGPU::S_ADD_U64_PSEUDO:
6987 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6989 case AMDGPU::S_SUB_U64_PSEUDO:
6990 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6992 case AMDGPU::S_ADD_I32:
6993 case AMDGPU::S_SUB_I32: {
6997 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7005 case AMDGPU::S_MUL_U64:
7007 splitScalarSMulU64(Worklist, Inst, MDT);
7011 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7012 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7015 splitScalarSMulPseudo(Worklist, Inst, MDT);
7019 case AMDGPU::S_AND_B64:
7020 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7024 case AMDGPU::S_OR_B64:
7025 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7029 case AMDGPU::S_XOR_B64:
7030 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7034 case AMDGPU::S_NAND_B64:
7035 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7039 case AMDGPU::S_NOR_B64:
7040 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7044 case AMDGPU::S_XNOR_B64:
7046 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7048 splitScalar64BitXnor(Worklist, Inst, MDT);
7052 case AMDGPU::S_ANDN2_B64:
7053 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7057 case AMDGPU::S_ORN2_B64:
7058 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7062 case AMDGPU::S_BREV_B64:
7063 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7067 case AMDGPU::S_NOT_B64:
7068 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7072 case AMDGPU::S_BCNT1_I32_B64:
7073 splitScalar64BitBCNT(Worklist, Inst);
7077 case AMDGPU::S_BFE_I64:
7078 splitScalar64BitBFE(Worklist, Inst);
7082 case AMDGPU::S_FLBIT_I32_B64:
7083 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7086 case AMDGPU::S_FF1_I32_B64:
7087 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7091 case AMDGPU::S_LSHL_B32:
7093 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7097 case AMDGPU::S_ASHR_I32:
7099 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7103 case AMDGPU::S_LSHR_B32:
7105 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7109 case AMDGPU::S_LSHL_B64:
7112 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7113 : AMDGPU::V_LSHLREV_B64_e64;
7117 case AMDGPU::S_ASHR_I64:
7119 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7123 case AMDGPU::S_LSHR_B64:
7125 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7130 case AMDGPU::S_ABS_I32:
7131 lowerScalarAbs(Worklist, Inst);
7135 case AMDGPU::S_CBRANCH_SCC0:
7136 case AMDGPU::S_CBRANCH_SCC1: {
7139 bool IsSCC = CondReg == AMDGPU::SCC;
7142 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7145 .
addReg(IsSCC ? VCC : CondReg);
7149 case AMDGPU::S_BFE_U64:
7150 case AMDGPU::S_BFM_B64:
7153 case AMDGPU::S_PACK_LL_B32_B16:
7154 case AMDGPU::S_PACK_LH_B32_B16:
7155 case AMDGPU::S_PACK_HL_B32_B16:
7156 case AMDGPU::S_PACK_HH_B32_B16:
7157 movePackToVALU(Worklist,
MRI, Inst);
7161 case AMDGPU::S_XNOR_B32:
7162 lowerScalarXnor(Worklist, Inst);
7166 case AMDGPU::S_NAND_B32:
7167 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7171 case AMDGPU::S_NOR_B32:
7172 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7176 case AMDGPU::S_ANDN2_B32:
7177 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7181 case AMDGPU::S_ORN2_B32:
7182 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7190 case AMDGPU::S_ADD_CO_PSEUDO:
7191 case AMDGPU::S_SUB_CO_PSEUDO: {
7192 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7193 ? AMDGPU::V_ADDC_U32_e64
7194 : AMDGPU::V_SUBB_U32_e64;
7195 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7198 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7199 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7217 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7221 case AMDGPU::S_UADDO_PSEUDO:
7222 case AMDGPU::S_USUBO_PSEUDO: {
7229 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7230 ? AMDGPU::V_ADD_CO_U32_e64
7231 : AMDGPU::V_SUB_CO_U32_e64;
7234 Register DestReg =
MRI.createVirtualRegister(NewRC);
7242 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7249 case AMDGPU::S_CSELECT_B32:
7250 case AMDGPU::S_CSELECT_B64:
7251 lowerSelect(Worklist, Inst, MDT);
7254 case AMDGPU::S_CMP_EQ_I32:
7255 case AMDGPU::S_CMP_LG_I32:
7256 case AMDGPU::S_CMP_GT_I32:
7257 case AMDGPU::S_CMP_GE_I32:
7258 case AMDGPU::S_CMP_LT_I32:
7259 case AMDGPU::S_CMP_LE_I32:
7260 case AMDGPU::S_CMP_EQ_U32:
7261 case AMDGPU::S_CMP_LG_U32:
7262 case AMDGPU::S_CMP_GT_U32:
7263 case AMDGPU::S_CMP_GE_U32:
7264 case AMDGPU::S_CMP_LT_U32:
7265 case AMDGPU::S_CMP_LE_U32:
7266 case AMDGPU::S_CMP_EQ_U64:
7267 case AMDGPU::S_CMP_LG_U64:
7268 case AMDGPU::S_CMP_LT_F32:
7269 case AMDGPU::S_CMP_EQ_F32:
7270 case AMDGPU::S_CMP_LE_F32:
7271 case AMDGPU::S_CMP_GT_F32:
7272 case AMDGPU::S_CMP_LG_F32:
7273 case AMDGPU::S_CMP_GE_F32:
7274 case AMDGPU::S_CMP_O_F32:
7275 case AMDGPU::S_CMP_U_F32:
7276 case AMDGPU::S_CMP_NGE_F32:
7277 case AMDGPU::S_CMP_NLG_F32:
7278 case AMDGPU::S_CMP_NGT_F32:
7279 case AMDGPU::S_CMP_NLE_F32:
7280 case AMDGPU::S_CMP_NEQ_F32:
7281 case AMDGPU::S_CMP_NLT_F32:
7282 case AMDGPU::S_CMP_LT_F16:
7283 case AMDGPU::S_CMP_EQ_F16:
7284 case AMDGPU::S_CMP_LE_F16:
7285 case AMDGPU::S_CMP_GT_F16:
7286 case AMDGPU::S_CMP_LG_F16:
7287 case AMDGPU::S_CMP_GE_F16:
7288 case AMDGPU::S_CMP_O_F16:
7289 case AMDGPU::S_CMP_U_F16:
7290 case AMDGPU::S_CMP_NGE_F16:
7291 case AMDGPU::S_CMP_NLG_F16:
7292 case AMDGPU::S_CMP_NGT_F16:
7293 case AMDGPU::S_CMP_NLE_F16:
7294 case AMDGPU::S_CMP_NEQ_F16:
7295 case AMDGPU::S_CMP_NLT_F16: {
7301 AMDGPU::OpName::src0_modifiers) >= 0) {
7316 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7320 case AMDGPU::S_CVT_HI_F32_F16: {
7322 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7323 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7334 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7338 case AMDGPU::S_MINIMUM_F32:
7339 case AMDGPU::S_MAXIMUM_F32:
7340 case AMDGPU::S_MINIMUM_F16:
7341 case AMDGPU::S_MAXIMUM_F16: {
7343 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7354 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7360 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7368 if (NewOpcode == Opcode) {
7392 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7404 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7405 MRI.replaceRegWith(DstReg, NewDstReg);
7407 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7418 AMDGPU::OpName::src0_modifiers) >= 0)
7423 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7424 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7426 NewInstr->addOperand(Src);
7429 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7432 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7434 NewInstr.addImm(
Size);
7435 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7439 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7444 "Scalar BFE is only implemented for constant width and offset");
7453 AMDGPU::OpName::src1_modifiers) >= 0)
7458 AMDGPU::OpName::src2_modifiers) >= 0)
7472 NewInstr->addOperand(
Op);
7479 if (
Op.getReg() == AMDGPU::SCC) {
7481 if (
Op.isDef() && !
Op.isDead())
7482 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7484 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7489 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7490 Register DstReg = NewInstr->getOperand(0).getReg();
7495 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7496 MRI.replaceRegWith(DstReg, NewDstReg);
7502 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7506std::pair<bool, MachineBasicBlock *>
7518 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7521 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7523 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7524 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7532 MRI.replaceRegWith(OldDstReg, ResultReg);
7535 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7536 return std::pair(
true, NewBB);
7539 return std::pair(
false,
nullptr);
7556 bool IsSCC = (CondReg == AMDGPU::SCC);
7564 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7572 NewCondReg =
MRI.createVirtualRegister(TC);
7576 bool CopyFound =
false;
7580 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7582 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7584 .
addReg(CandI.getOperand(1).getReg());
7596 : AMDGPU::S_CSELECT_B32;
7606 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7607 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7620 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7622 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7634 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7635 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7638 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7648 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7649 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7664 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7672 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7673 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7679 bool Src0IsSGPR = Src0.
isReg() &&
7681 bool Src1IsSGPR = Src1.
isReg() &&
7684 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7685 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7695 }
else if (Src1IsSGPR) {
7709 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7713 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7719 unsigned Opcode)
const {
7729 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7730 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7742 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7743 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7748 unsigned Opcode)
const {
7758 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7759 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7771 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7772 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7790 &AMDGPU::SGPR_32RegClass;
7793 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7796 AMDGPU::sub0, Src0SubRC);
7801 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7803 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7807 AMDGPU::sub1, Src0SubRC);
7809 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7815 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7822 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7824 Worklist.
insert(&LoHalf);
7825 Worklist.
insert(&HiHalf);
7831 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7842 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7843 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7844 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7855 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7859 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7889 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7895 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7901 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7912 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7928 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7940 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7951 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7952 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7953 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7964 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7968 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7980 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7981 ? AMDGPU::V_MUL_HI_U32_e64
7982 : AMDGPU::V_MUL_HI_I32_e64;
7997 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8005 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8024 &AMDGPU::SGPR_32RegClass;
8027 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8030 &AMDGPU::SGPR_32RegClass;
8033 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8036 AMDGPU::sub0, Src0SubRC);
8038 AMDGPU::sub0, Src1SubRC);
8040 AMDGPU::sub1, Src0SubRC);
8042 AMDGPU::sub1, Src1SubRC);
8047 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8049 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8054 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8059 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8066 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8068 Worklist.
insert(&LoHalf);
8069 Worklist.
insert(&HiHalf);
8072 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8090 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8106 Register NewDest =
MRI.createVirtualRegister(DestRC);
8112 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8130 MRI.getRegClass(Src.getReg()) :
8131 &AMDGPU::SGPR_32RegClass;
8133 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8134 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8137 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8140 AMDGPU::sub0, SrcSubRC);
8142 AMDGPU::sub1, SrcSubRC);
8148 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8152 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8171 Offset == 0 &&
"Not implemented");
8174 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8175 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8176 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8193 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8194 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8199 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8200 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8204 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8207 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8212 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8213 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8234 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8235 unsigned OpcodeAdd =
8236 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8239 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8241 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8248 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8249 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8250 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8251 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8258 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8264 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8266 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8268 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8271void SIInstrInfo::addUsersToMoveToVALUWorklist(
8275 E =
MRI.use_end();
I != E;) {
8280 switch (
UseMI.getOpcode()) {
8283 case AMDGPU::SOFT_WQM:
8284 case AMDGPU::STRICT_WWM:
8285 case AMDGPU::STRICT_WQM:
8286 case AMDGPU::REG_SEQUENCE:
8288 case AMDGPU::INSERT_SUBREG:
8291 OpNo =
I.getOperandNo();
8300 }
while (
I != E &&
I->getParent() == &
UseMI);
8310 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8317 case AMDGPU::S_PACK_LL_B32_B16: {
8318 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8319 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8336 case AMDGPU::S_PACK_LH_B32_B16: {
8337 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8346 case AMDGPU::S_PACK_HL_B32_B16: {
8347 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8357 case AMDGPU::S_PACK_HH_B32_B16: {
8358 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8359 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8376 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8377 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8386 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8387 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8395 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8399 Register DestReg =
MI.getOperand(0).getReg();
8401 MRI.replaceRegWith(DestReg, NewCond);
8406 MI.getOperand(SCCIdx).setReg(NewCond);
8412 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8415 for (
auto &Copy : CopyToDelete)
8416 Copy->eraseFromParent();
8424void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8433 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8435 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8452 case AMDGPU::REG_SEQUENCE:
8453 case AMDGPU::INSERT_SUBREG:
8455 case AMDGPU::SOFT_WQM:
8456 case AMDGPU::STRICT_WWM:
8457 case AMDGPU::STRICT_WQM: {
8465 case AMDGPU::REG_SEQUENCE:
8466 case AMDGPU::INSERT_SUBREG:
8476 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8493 int OpIndices[3])
const {
8512 for (
unsigned i = 0; i < 3; ++i) {
8513 int Idx = OpIndices[i];
8550 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8551 SGPRReg = UsedSGPRs[0];
8554 if (!SGPRReg && UsedSGPRs[1]) {
8555 if (UsedSGPRs[1] == UsedSGPRs[2])
8556 SGPRReg = UsedSGPRs[1];
8563 unsigned OperandName)
const {
8568 return &
MI.getOperand(
Idx);
8585 RsrcDataFormat |= (1ULL << 56);
8590 RsrcDataFormat |= (2ULL << 59);
8593 return RsrcDataFormat;
8615 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8621 unsigned Opc =
MI.getOpcode();
8627 return get(Opc).mayLoad() &&
8632 int &FrameIndex)
const {
8640 FrameIndex =
Addr->getIndex();
8645 int &FrameIndex)
const {
8648 FrameIndex =
Addr->getIndex();
8653 int &FrameIndex)
const {
8667 int &FrameIndex)
const {
8684 while (++
I != E &&
I->isInsideBundle()) {
8685 assert(!
I->isBundle() &&
"No nested bundle!");
8693 unsigned Opc =
MI.getOpcode();
8695 unsigned DescSize =
Desc.getSize();
8700 unsigned Size = DescSize;
8715 bool HasLiteral =
false;
8716 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8724 return HasLiteral ? DescSize + 4 : DescSize;
8734 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8738 case TargetOpcode::BUNDLE:
8740 case TargetOpcode::INLINEASM:
8741 case TargetOpcode::INLINEASM_BR: {
8743 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8747 if (
MI.isMetaInstruction())
8757 if (
MI.memoperands_empty())
8768 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8780 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8783 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8784 .
add(Branch->getOperand(0))
8785 .
add(Branch->getOperand(1));
8787 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8806 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8811 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8813 if (PMBB == LoopEnd) {
8814 HeaderPHIBuilder.
addReg(BackEdgeReg);
8819 HeaderPHIBuilder.
addReg(ZeroReg);
8821 HeaderPHIBuilder.
addMBB(PMBB);
8825 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8827 .
add(Branch->getOperand(0));
8829 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8835 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8836 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8842 static const std::pair<int, const char *> TargetIndices[] = {
8880std::pair<unsigned, unsigned>
8887 static const std::pair<unsigned, const char *> TargetFlags[] = {
8902 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8916 return AMDGPU::WWM_COPY;
8918 return AMDGPU::COPY;
8929 bool IsNullOrVectorRegister =
true;
8938 return IsNullOrVectorRegister &&
8939 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8940 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8953 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8984 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8985 case AMDGPU::SI_KILL_I1_TERMINATOR:
8994 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8995 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8996 case AMDGPU::SI_KILL_I1_PSEUDO:
8997 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9009 const unsigned OffsetBits =
9011 return (1 << OffsetBits) - 1;
9018 if (
MI.isInlineAsm())
9021 for (
auto &
Op :
MI.implicit_operands()) {
9022 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9023 Op.setReg(AMDGPU::VCC_LO);
9036 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9054 if (Imm <= MaxImm + 64) {
9056 Overflow = Imm - MaxImm;
9146std::pair<int64_t, int64_t>
9149 int64_t RemainderOffset = COffsetVal;
9150 int64_t ImmField = 0;
9155 if (AllowNegative) {
9157 int64_t
D = 1LL << NumBits;
9158 RemainderOffset = (COffsetVal /
D) *
D;
9159 ImmField = COffsetVal - RemainderOffset;
9163 (ImmField % 4) != 0) {
9165 RemainderOffset += ImmField % 4;
9166 ImmField -= ImmField % 4;
9168 }
else if (COffsetVal >= 0) {
9169 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9170 RemainderOffset = COffsetVal - ImmField;
9174 assert(RemainderOffset + ImmField == COffsetVal);
9175 return {ImmField, RemainderOffset};
9187 switch (ST.getGeneration()) {
9212 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9213 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9214 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9215 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9216 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9217 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9218 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9219 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9255 if (
isMAI(Opcode)) {
9300 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9301 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9302 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9314 switch (
MI.getOpcode()) {
9316 case AMDGPU::REG_SEQUENCE:
9320 case AMDGPU::INSERT_SUBREG:
9321 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9338 if (!
P.Reg.isVirtual())
9342 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9343 while (
auto *
MI = DefInst) {
9345 switch (
MI->getOpcode()) {
9347 case AMDGPU::V_MOV_B32_e32: {
9348 auto &Op1 =
MI->getOperand(1);
9353 DefInst =
MRI.getVRegDef(RSR.Reg);
9361 DefInst =
MRI.getVRegDef(RSR.Reg);
9374 assert(
MRI.isSSA() &&
"Must be run on SSA");
9376 auto *
TRI =
MRI.getTargetRegisterInfo();
9377 auto *DefBB =
DefMI.getParent();
9381 if (
UseMI.getParent() != DefBB)
9384 const int MaxInstScan = 20;
9388 auto E =
UseMI.getIterator();
9389 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9390 if (
I->isDebugInstr())
9393 if (++NumInst > MaxInstScan)
9396 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9406 assert(
MRI.isSSA() &&
"Must be run on SSA");
9408 auto *
TRI =
MRI.getTargetRegisterInfo();
9409 auto *DefBB =
DefMI.getParent();
9411 const int MaxUseScan = 10;
9414 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9415 auto &UseInst = *
Use.getParent();
9418 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9421 if (++NumUse > MaxUseScan)
9428 const int MaxInstScan = 20;
9432 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9435 if (
I->isDebugInstr())
9438 if (++NumInst > MaxInstScan)
9451 if (Reg == VReg && --NumUse == 0)
9453 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9465 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9468 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9477 if (InsPt !=
MBB.
end() &&
9478 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9479 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9480 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9481 InsPt->definesRegister(Src,
nullptr)) {
9485 : AMDGPU::S_MOV_B64_term),
9487 .
addReg(Src, 0, SrcSubReg)
9512 if (isFullCopyInstr(
MI)) {
9521 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9524 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9535 unsigned *PredCost)
const {
9536 if (
MI.isBundle()) {
9539 unsigned Lat = 0, Count = 0;
9540 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9542 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9544 return Lat + Count - 1;
9547 return SchedModel.computeInstrLatency(&
MI);
9552 unsigned opcode =
MI.getOpcode();
9553 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9554 auto IID = GI->getIntrinsicID();
9561 case Intrinsic::amdgcn_if:
9562 case Intrinsic::amdgcn_else:
9576 if (opcode == AMDGPU::G_LOAD) {
9577 if (
MI.memoperands_empty())
9581 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9582 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9590 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9591 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9592 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9605 unsigned opcode =
MI.getOpcode();
9606 if (opcode == AMDGPU::V_READLANE_B32 ||
9607 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9608 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9611 if (isCopyInstr(
MI)) {
9615 RI.getPhysRegBaseClass(srcOp.
getReg());
9623 if (
MI.isPreISelOpcode())
9638 if (
MI.memoperands_empty())
9642 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9643 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9658 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9664 if (!Reg || !
SrcOp.readsReg())
9670 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9707 Register &SrcReg2, int64_t &CmpMask,
9708 int64_t &CmpValue)
const {
9709 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9712 switch (
MI.getOpcode()) {
9715 case AMDGPU::S_CMP_EQ_U32:
9716 case AMDGPU::S_CMP_EQ_I32:
9717 case AMDGPU::S_CMP_LG_U32:
9718 case AMDGPU::S_CMP_LG_I32:
9719 case AMDGPU::S_CMP_LT_U32:
9720 case AMDGPU::S_CMP_LT_I32:
9721 case AMDGPU::S_CMP_GT_U32:
9722 case AMDGPU::S_CMP_GT_I32:
9723 case AMDGPU::S_CMP_LE_U32:
9724 case AMDGPU::S_CMP_LE_I32:
9725 case AMDGPU::S_CMP_GE_U32:
9726 case AMDGPU::S_CMP_GE_I32:
9727 case AMDGPU::S_CMP_EQ_U64:
9728 case AMDGPU::S_CMP_LG_U64:
9729 SrcReg =
MI.getOperand(0).getReg();
9730 if (
MI.getOperand(1).isReg()) {
9731 if (
MI.getOperand(1).getSubReg())
9733 SrcReg2 =
MI.getOperand(1).getReg();
9735 }
else if (
MI.getOperand(1).isImm()) {
9737 CmpValue =
MI.getOperand(1).getImm();
9743 case AMDGPU::S_CMPK_EQ_U32:
9744 case AMDGPU::S_CMPK_EQ_I32:
9745 case AMDGPU::S_CMPK_LG_U32:
9746 case AMDGPU::S_CMPK_LG_I32:
9747 case AMDGPU::S_CMPK_LT_U32:
9748 case AMDGPU::S_CMPK_LT_I32:
9749 case AMDGPU::S_CMPK_GT_U32:
9750 case AMDGPU::S_CMPK_GT_I32:
9751 case AMDGPU::S_CMPK_LE_U32:
9752 case AMDGPU::S_CMPK_LE_I32:
9753 case AMDGPU::S_CMPK_GE_U32:
9754 case AMDGPU::S_CMPK_GE_I32:
9755 SrcReg =
MI.getOperand(0).getReg();
9757 CmpValue =
MI.getOperand(1).getImm();
9775 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9776 this](int64_t ExpectedValue,
unsigned SrcSize,
9777 bool IsReversible,
bool IsSigned) ->
bool {
9802 if (!Def || Def->getParent() != CmpInstr.
getParent())
9805 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9806 Def->getOpcode() != AMDGPU::S_AND_B64)
9810 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9821 SrcOp = &Def->getOperand(2);
9822 else if (isMask(&Def->getOperand(2)))
9823 SrcOp = &Def->getOperand(1);
9828 if (IsSigned && BitNo == SrcSize - 1)
9831 ExpectedValue <<= BitNo;
9833 bool IsReversedCC =
false;
9834 if (CmpValue != ExpectedValue) {
9837 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9842 Register DefReg = Def->getOperand(0).getReg();
9843 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9846 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9848 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9849 I->killsRegister(AMDGPU::SCC, &RI))
9854 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9858 if (!
MRI->use_nodbg_empty(DefReg)) {
9866 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9867 : AMDGPU::S_BITCMP1_B32
9868 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9869 : AMDGPU::S_BITCMP1_B64;
9874 Def->eraseFromParent();
9882 case AMDGPU::S_CMP_EQ_U32:
9883 case AMDGPU::S_CMP_EQ_I32:
9884 case AMDGPU::S_CMPK_EQ_U32:
9885 case AMDGPU::S_CMPK_EQ_I32:
9886 return optimizeCmpAnd(1, 32,
true,
false);
9887 case AMDGPU::S_CMP_GE_U32:
9888 case AMDGPU::S_CMPK_GE_U32:
9889 return optimizeCmpAnd(1, 32,
false,
false);
9890 case AMDGPU::S_CMP_GE_I32:
9891 case AMDGPU::S_CMPK_GE_I32:
9892 return optimizeCmpAnd(1, 32,
false,
true);
9893 case AMDGPU::S_CMP_EQ_U64:
9894 return optimizeCmpAnd(1, 64,
true,
false);
9895 case AMDGPU::S_CMP_LG_U32:
9896 case AMDGPU::S_CMP_LG_I32:
9897 case AMDGPU::S_CMPK_LG_U32:
9898 case AMDGPU::S_CMPK_LG_I32:
9899 return optimizeCmpAnd(0, 32,
true,
false);
9900 case AMDGPU::S_CMP_GT_U32:
9901 case AMDGPU::S_CMPK_GT_U32:
9902 return optimizeCmpAnd(0, 32,
false,
false);
9903 case AMDGPU::S_CMP_GT_I32:
9904 case AMDGPU::S_CMPK_GT_I32:
9905 return optimizeCmpAnd(0, 32,
false,
true);
9906 case AMDGPU::S_CMP_LG_U64:
9907 return optimizeCmpAnd(0, 64,
true,
false);
9932 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9935 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9936 : &AMDGPU::VReg_64_Align2RegClass);
9938 .
addReg(DataReg, 0,
Op.getSubReg())
9943 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.