31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
220 assert(ExitBlocks[0]->getSinglePredecessor());
237 int64_t &Offset1)
const {
245 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
249 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 if (Offset0Idx == -1 || Offset1Idx == -1)
274 Offset0Idx -=
get(Opc0).NumDefs;
275 Offset1Idx -=
get(Opc1).NumDefs;
296 assert(NumOps == 4 || NumOps == 5);
301 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
303 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
305 if (!Load0Offset || !Load1Offset)
325 if (OffIdx0 == -1 || OffIdx1 == -1)
331 OffIdx0 -=
get(Opc0).NumDefs;
332 OffIdx1 -=
get(Opc1).NumDefs;
338 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
351 case AMDGPU::DS_READ2ST64_B32:
352 case AMDGPU::DS_READ2ST64_B64:
353 case AMDGPU::DS_WRITE2ST64_B32:
354 case AMDGPU::DS_WRITE2ST64_B64:
369 OffsetIsScalable =
false;
399 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
400 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
401 if (Offset0 + 1 != Offset1)
420 Offset = EltSize * Offset0;
423 if (DataOpIdx == -1) {
441 if (BaseOp && !BaseOp->
isFI())
449 if (SOffset->
isReg())
468 if (VAddr0Idx >= 0) {
470 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
526 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
534 if (MO1->getAddrSpace() != MO2->getAddrSpace())
537 auto Base1 = MO1->getValue();
538 auto Base2 = MO2->getValue();
539 if (!Base1 || !Base2)
544 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
547 return Base1 == Base2;
551 int64_t Offset1,
bool OffsetIsScalable1,
553 int64_t Offset2,
bool OffsetIsScalable2,
554 unsigned ClusterSize,
555 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
580 const unsigned LoadSize = NumBytes / ClusterSize;
581 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
582 return NumDWORDs <= 8;
596 int64_t Offset0, int64_t Offset1,
597 unsigned NumLoads)
const {
598 assert(Offset1 > Offset0 &&
599 "Second offset should be larger than first offset!");
604 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
611 const char *Msg =
"illegal VGPR to SGPR copy") {
615 C.diagnose(IllegalCopy);
632 assert((
TII.getSubtarget().hasMAIInsts() &&
633 !
TII.getSubtarget().hasGFX90AInsts()) &&
634 "Expected GFX908 subtarget.");
637 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
638 "Source register of the copy should be either an SGPR or an AGPR.");
641 "Destination register of the copy should be an AGPR.");
653 if (!Def->modifiesRegister(SrcReg, &RI))
656 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
657 Def->getOperand(0).getReg() != SrcReg)
664 bool SafeToPropagate =
true;
667 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
668 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
669 SafeToPropagate =
false;
671 if (!SafeToPropagate)
683 if (ImpUseSuperReg) {
684 Builder.
addReg(ImpUseSuperReg,
702 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
706 "VGPR used for an intermediate copy should have been reserved.");
721 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
722 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
723 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
730 if (ImpUseSuperReg) {
731 UseBuilder.
addReg(ImpUseSuperReg,
753 int16_t SubIdx = BaseIndices[
Idx];
754 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
755 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
756 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
757 unsigned Opcode = AMDGPU::S_MOV_B32;
760 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
761 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
762 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
766 DestSubReg = RI.getSubReg(DestReg, SubIdx);
767 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
768 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
769 Opcode = AMDGPU::S_MOV_B64;
784 assert(FirstMI && LastMI);
792 LastMI->addRegisterKilled(SrcReg, &RI);
800 unsigned Size = RI.getRegSizeInBits(*RC);
802 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
808 if (((
Size == 16) != (SrcSize == 16))) {
815 if (DestReg == SrcReg) {
821 RC = RI.getPhysRegBaseClass(DestReg);
822 Size = RI.getRegSizeInBits(*RC);
823 SrcRC = RI.getPhysRegBaseClass(SrcReg);
824 SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (RC == &AMDGPU::VGPR_32RegClass) {
830 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
831 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
832 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
833 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
839 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
840 RC == &AMDGPU::SReg_32RegClass) {
841 if (SrcReg == AMDGPU::SCC) {
848 if (DestReg == AMDGPU::VCC_LO) {
849 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
863 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
873 if (RC == &AMDGPU::SReg_64RegClass) {
874 if (SrcReg == AMDGPU::SCC) {
881 if (DestReg == AMDGPU::VCC) {
882 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
896 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
906 if (DestReg == AMDGPU::SCC) {
909 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 if (RC == &AMDGPU::AGPR_32RegClass) {
928 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
929 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
951 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
952 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
954 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
955 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
956 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
957 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 if (IsAGPRDst || IsAGPRSrc) {
975 if (!DstLow || !SrcLow) {
977 "Cannot use hi16 subreg with an AGPR!");
990 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
991 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1004 if (!DstLow || !SrcLow) {
1006 "Cannot use hi16 subreg on VI!");
1057 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1063 unsigned EltSize = 4;
1064 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1067 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1070 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1072 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1074 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1080 Opcode = AMDGPU::V_MOV_B64_e32;
1083 Opcode = AMDGPU::V_PK_MOV_B32;
1093 std::unique_ptr<RegScavenger> RS;
1094 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1101 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1102 const bool CanKillSuperReg = KillSrc && !Overlap;
1107 SubIdx = SubIndices[
Idx];
1109 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1110 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1111 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1112 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1114 bool IsFirstSubreg =
Idx == 0;
1115 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1117 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1121 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1122 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1169 int64_t
Value)
const {
1172 if (RegClass == &AMDGPU::SReg_32RegClass ||
1173 RegClass == &AMDGPU::SGPR_32RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1175 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1181 if (RegClass == &AMDGPU::SReg_64RegClass ||
1182 RegClass == &AMDGPU::SGPR_64RegClass ||
1183 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1189 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1200 unsigned EltSize = 4;
1201 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1203 if (RI.getRegSizeInBits(*RegClass) > 32) {
1204 Opcode = AMDGPU::S_MOV_B64;
1207 Opcode = AMDGPU::S_MOV_B32;
1214 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1217 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1218 Builder.
addImm(IdxValue);
1224 return &AMDGPU::VGPR_32RegClass;
1236 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1237 "Not a VGPR32 reg");
1239 if (
Cond.size() == 1) {
1240 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1249 }
else if (
Cond.size() == 2) {
1251 switch (
Cond[0].getImm()) {
1252 case SIInstrInfo::SCC_TRUE: {
1253 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 : AMDGPU::S_CSELECT_B64), SReg)
1266 case SIInstrInfo::SCC_FALSE: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 : AMDGPU::S_CSELECT_B64), SReg)
1280 case SIInstrInfo::VCCNZ: {
1283 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1294 case SIInstrInfo::VCCZ: {
1297 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1308 case SIInstrInfo::EXECNZ: {
1309 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1312 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1315 : AMDGPU::S_CSELECT_B64), SReg)
1326 case SIInstrInfo::EXECZ: {
1327 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1330 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1333 : AMDGPU::S_CSELECT_B64), SReg)
1382 return AMDGPU::COPY;
1383 if (RI.getRegSizeInBits(*DstRC) == 16) {
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1390 return AMDGPU::S_MOV_B64;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1394 return AMDGPU::COPY;
1399 bool IsIndirectSrc)
const {
1400 if (IsIndirectSrc) {
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1451 if (VecSize <= 1024)
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1480 if (VecSize <= 1024)
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1524 if (VecSize <= 1024)
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1532 bool IsSGPR)
const {
1544 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1551 return AMDGPU::SI_SPILL_S32_SAVE;
1553 return AMDGPU::SI_SPILL_S64_SAVE;
1555 return AMDGPU::SI_SPILL_S96_SAVE;
1557 return AMDGPU::SI_SPILL_S128_SAVE;
1559 return AMDGPU::SI_SPILL_S160_SAVE;
1561 return AMDGPU::SI_SPILL_S192_SAVE;
1563 return AMDGPU::SI_SPILL_S224_SAVE;
1565 return AMDGPU::SI_SPILL_S256_SAVE;
1567 return AMDGPU::SI_SPILL_S288_SAVE;
1569 return AMDGPU::SI_SPILL_S320_SAVE;
1571 return AMDGPU::SI_SPILL_S352_SAVE;
1573 return AMDGPU::SI_SPILL_S384_SAVE;
1575 return AMDGPU::SI_SPILL_S512_SAVE;
1577 return AMDGPU::SI_SPILL_S1024_SAVE;
1586 return AMDGPU::SI_SPILL_V32_SAVE;
1588 return AMDGPU::SI_SPILL_V64_SAVE;
1590 return AMDGPU::SI_SPILL_V96_SAVE;
1592 return AMDGPU::SI_SPILL_V128_SAVE;
1594 return AMDGPU::SI_SPILL_V160_SAVE;
1596 return AMDGPU::SI_SPILL_V192_SAVE;
1598 return AMDGPU::SI_SPILL_V224_SAVE;
1600 return AMDGPU::SI_SPILL_V256_SAVE;
1602 return AMDGPU::SI_SPILL_V288_SAVE;
1604 return AMDGPU::SI_SPILL_V320_SAVE;
1606 return AMDGPU::SI_SPILL_V352_SAVE;
1608 return AMDGPU::SI_SPILL_V384_SAVE;
1610 return AMDGPU::SI_SPILL_V512_SAVE;
1612 return AMDGPU::SI_SPILL_V1024_SAVE;
1621 return AMDGPU::SI_SPILL_A32_SAVE;
1623 return AMDGPU::SI_SPILL_A64_SAVE;
1625 return AMDGPU::SI_SPILL_A96_SAVE;
1627 return AMDGPU::SI_SPILL_A128_SAVE;
1629 return AMDGPU::SI_SPILL_A160_SAVE;
1631 return AMDGPU::SI_SPILL_A192_SAVE;
1633 return AMDGPU::SI_SPILL_A224_SAVE;
1635 return AMDGPU::SI_SPILL_A256_SAVE;
1637 return AMDGPU::SI_SPILL_A288_SAVE;
1639 return AMDGPU::SI_SPILL_A320_SAVE;
1641 return AMDGPU::SI_SPILL_A352_SAVE;
1643 return AMDGPU::SI_SPILL_A384_SAVE;
1645 return AMDGPU::SI_SPILL_A512_SAVE;
1647 return AMDGPU::SI_SPILL_A1024_SAVE;
1656 return AMDGPU::SI_SPILL_AV32_SAVE;
1658 return AMDGPU::SI_SPILL_AV64_SAVE;
1660 return AMDGPU::SI_SPILL_AV96_SAVE;
1662 return AMDGPU::SI_SPILL_AV128_SAVE;
1664 return AMDGPU::SI_SPILL_AV160_SAVE;
1666 return AMDGPU::SI_SPILL_AV192_SAVE;
1668 return AMDGPU::SI_SPILL_AV224_SAVE;
1670 return AMDGPU::SI_SPILL_AV256_SAVE;
1672 return AMDGPU::SI_SPILL_AV288_SAVE;
1674 return AMDGPU::SI_SPILL_AV320_SAVE;
1676 return AMDGPU::SI_SPILL_AV352_SAVE;
1678 return AMDGPU::SI_SPILL_AV384_SAVE;
1680 return AMDGPU::SI_SPILL_AV512_SAVE;
1682 return AMDGPU::SI_SPILL_AV1024_SAVE;
1689 bool IsVectorSuperClass) {
1694 if (IsVectorSuperClass)
1695 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1697 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1711 if (IsVectorSuperClass)
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize =
TRI->getSpillSize(*RC);
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1748 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1763 SpillSize, RI, *MFI);
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V32_RESTORE;
1814 return AMDGPU::SI_SPILL_V64_RESTORE;
1816 return AMDGPU::SI_SPILL_V96_RESTORE;
1818 return AMDGPU::SI_SPILL_V128_RESTORE;
1820 return AMDGPU::SI_SPILL_V160_RESTORE;
1822 return AMDGPU::SI_SPILL_V192_RESTORE;
1824 return AMDGPU::SI_SPILL_V224_RESTORE;
1826 return AMDGPU::SI_SPILL_V256_RESTORE;
1828 return AMDGPU::SI_SPILL_V288_RESTORE;
1830 return AMDGPU::SI_SPILL_V320_RESTORE;
1832 return AMDGPU::SI_SPILL_V352_RESTORE;
1834 return AMDGPU::SI_SPILL_V384_RESTORE;
1836 return AMDGPU::SI_SPILL_V512_RESTORE;
1838 return AMDGPU::SI_SPILL_V1024_RESTORE;
1847 return AMDGPU::SI_SPILL_A32_RESTORE;
1849 return AMDGPU::SI_SPILL_A64_RESTORE;
1851 return AMDGPU::SI_SPILL_A96_RESTORE;
1853 return AMDGPU::SI_SPILL_A128_RESTORE;
1855 return AMDGPU::SI_SPILL_A160_RESTORE;
1857 return AMDGPU::SI_SPILL_A192_RESTORE;
1859 return AMDGPU::SI_SPILL_A224_RESTORE;
1861 return AMDGPU::SI_SPILL_A256_RESTORE;
1863 return AMDGPU::SI_SPILL_A288_RESTORE;
1865 return AMDGPU::SI_SPILL_A320_RESTORE;
1867 return AMDGPU::SI_SPILL_A352_RESTORE;
1869 return AMDGPU::SI_SPILL_A384_RESTORE;
1871 return AMDGPU::SI_SPILL_A512_RESTORE;
1873 return AMDGPU::SI_SPILL_A1024_RESTORE;
1882 return AMDGPU::SI_SPILL_AV32_RESTORE;
1884 return AMDGPU::SI_SPILL_AV64_RESTORE;
1886 return AMDGPU::SI_SPILL_AV96_RESTORE;
1888 return AMDGPU::SI_SPILL_AV128_RESTORE;
1890 return AMDGPU::SI_SPILL_AV160_RESTORE;
1892 return AMDGPU::SI_SPILL_AV192_RESTORE;
1894 return AMDGPU::SI_SPILL_AV224_RESTORE;
1896 return AMDGPU::SI_SPILL_AV256_RESTORE;
1898 return AMDGPU::SI_SPILL_AV288_RESTORE;
1900 return AMDGPU::SI_SPILL_AV320_RESTORE;
1902 return AMDGPU::SI_SPILL_AV352_RESTORE;
1904 return AMDGPU::SI_SPILL_AV384_RESTORE;
1906 return AMDGPU::SI_SPILL_AV512_RESTORE;
1908 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1915 bool IsVectorSuperClass) {
1920 if (IsVectorSuperClass)
1921 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1923 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1930 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1936 if (IsVectorSuperClass)
1953 unsigned SpillSize =
TRI->getSpillSize(*RC);
1960 FrameInfo.getObjectAlign(FrameIndex));
1964 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1965 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1966 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1971 if (DestReg.
isVirtual() && SpillSize == 4) {
1973 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1987 SpillSize, RI, *MFI);
2002 unsigned Quantity)
const {
2004 while (Quantity > 0) {
2005 unsigned Arg = std::min(Quantity, 8u);
2019 if (HasNoTerminator) {
2020 if (
Info->returnsVoid()) {
2030 switch (
MI.getOpcode()) {
2032 if (
MI.isMetaInstruction())
2037 return MI.getOperand(0).getImm() + 1;
2047 switch (
MI.getOpcode()) {
2049 case AMDGPU::S_MOV_B64_term:
2052 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2055 case AMDGPU::S_MOV_B32_term:
2058 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2061 case AMDGPU::S_XOR_B64_term:
2064 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2067 case AMDGPU::S_XOR_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2072 case AMDGPU::S_OR_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_OR_B64));
2077 case AMDGPU::S_OR_B32_term:
2080 MI.setDesc(
get(AMDGPU::S_OR_B32));
2083 case AMDGPU::S_ANDN2_B64_term:
2086 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2089 case AMDGPU::S_ANDN2_B32_term:
2092 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2095 case AMDGPU::S_AND_B64_term:
2098 MI.setDesc(
get(AMDGPU::S_AND_B64));
2101 case AMDGPU::S_AND_B32_term:
2104 MI.setDesc(
get(AMDGPU::S_AND_B32));
2107 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2113 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2119 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2120 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2123 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2124 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 case AMDGPU::V_MOV_B64_PSEUDO: {
2129 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2130 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2136 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2141 if (
SrcOp.isImm()) {
2143 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2144 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2187 MI.eraseFromParent();
2190 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2194 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2199 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2204 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2205 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2207 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2208 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2215 MI.eraseFromParent();
2218 case AMDGPU::V_SET_INACTIVE_B32: {
2219 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2220 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2224 .
add(
MI.getOperand(1));
2228 .
add(
MI.getOperand(2));
2231 MI.eraseFromParent();
2234 case AMDGPU::V_SET_INACTIVE_B64: {
2235 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2236 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2238 MI.getOperand(0).getReg())
2239 .
add(
MI.getOperand(1));
2244 MI.getOperand(0).getReg())
2245 .
add(
MI.getOperand(2));
2249 MI.eraseFromParent();
2252 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2285 Opc = AMDGPU::V_MOVRELD_B32_e32;
2287 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2288 : AMDGPU::S_MOVRELD_B32;
2293 bool IsUndef =
MI.getOperand(1).isUndef();
2294 unsigned SubReg =
MI.getOperand(3).getImm();
2295 assert(VecReg ==
MI.getOperand(1).getReg());
2300 .
add(
MI.getOperand(2))
2304 const int ImpDefIdx =
2306 const int ImpUseIdx = ImpDefIdx + 1;
2308 MI.eraseFromParent();
2311 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2325 bool IsUndef =
MI.getOperand(1).isUndef();
2334 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2338 .
add(
MI.getOperand(2))
2343 const int ImpDefIdx =
2345 const int ImpUseIdx = ImpDefIdx + 1;
2352 MI.eraseFromParent();
2355 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2366 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2370 bool IsUndef =
MI.getOperand(1).isUndef();
2388 MI.eraseFromParent();
2391 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2394 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2395 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2418 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2425 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2435 MI.eraseFromParent();
2438 case AMDGPU::ENTER_STRICT_WWM: {
2442 : AMDGPU::S_OR_SAVEEXEC_B64));
2445 case AMDGPU::ENTER_STRICT_WQM: {
2448 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2449 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2450 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2454 MI.eraseFromParent();
2457 case AMDGPU::EXIT_STRICT_WWM:
2458 case AMDGPU::EXIT_STRICT_WQM: {
2461 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2464 case AMDGPU::ENTER_PSEUDO_WM:
2465 case AMDGPU::EXIT_PSEUDO_WM: {
2467 MI.eraseFromParent();
2470 case AMDGPU::SI_RETURN: {
2484 MI.eraseFromParent();
2488 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2489 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2490 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2493 case AMDGPU::S_GETPC_B64_pseudo:
2494 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2497 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2519 case AMDGPU::S_LOAD_DWORDX16_IMM:
2520 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2533 for (
auto &CandMO :
I->operands()) {
2534 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2542 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2550 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2552 unsigned NewOpcode = -1;
2553 if (SubregSize == 256)
2554 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2555 else if (SubregSize == 128)
2556 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2563 MRI.setRegClass(DestReg, NewRC);
2566 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2571 MI->getOperand(0).setReg(DestReg);
2572 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2576 OffsetMO->
setImm(FinalOffset);
2582 MI->setMemRefs(*MF, NewMMOs);
2595std::pair<MachineInstr*, MachineInstr*>
2597 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2602 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2603 return std::pair(&
MI,
nullptr);
2614 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2616 if (Dst.isPhysical()) {
2617 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2620 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2624 for (
unsigned I = 1;
I <= 2; ++
I) {
2627 if (
SrcOp.isImm()) {
2629 Imm.ashrInPlace(Part * 32);
2630 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2634 if (Src.isPhysical())
2635 MovDPP.addReg(RI.getSubReg(Src, Sub));
2642 MovDPP.addImm(MO.getImm());
2644 Split[Part] = MovDPP;
2648 if (Dst.isVirtual())
2655 MI.eraseFromParent();
2656 return std::pair(Split[0], Split[1]);
2659std::optional<DestSourcePair>
2661 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2664 return std::nullopt;
2669 unsigned Src0OpName,
2671 unsigned Src1OpName)
const {
2678 "All commutable instructions have both src0 and src1 modifiers");
2680 int Src0ModsVal = Src0Mods->
getImm();
2681 int Src1ModsVal = Src1Mods->
getImm();
2683 Src1Mods->
setImm(Src0ModsVal);
2684 Src0Mods->
setImm(Src1ModsVal);
2693 bool IsKill = RegOp.
isKill();
2695 bool IsUndef = RegOp.
isUndef();
2696 bool IsDebug = RegOp.
isDebug();
2698 if (NonRegOp.
isImm())
2700 else if (NonRegOp.
isFI())
2719 unsigned Src1Idx)
const {
2720 assert(!NewMI &&
"this should never be used");
2722 unsigned Opc =
MI.getOpcode();
2724 if (CommutedOpcode == -1)
2727 if (Src0Idx > Src1Idx)
2731 static_cast<int>(Src0Idx) &&
2733 static_cast<int>(Src1Idx) &&
2734 "inconsistency with findCommutedOpIndices");
2761 Src1, AMDGPU::OpName::src1_modifiers);
2773 unsigned &SrcOpIdx0,
2774 unsigned &SrcOpIdx1)
const {
2779 unsigned &SrcOpIdx0,
2780 unsigned &SrcOpIdx1)
const {
2781 if (!
Desc.isCommutable())
2784 unsigned Opc =
Desc.getOpcode();
2793 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2797 int64_t BrOffset)
const {
2800 assert(BranchOp != AMDGPU::S_SETPC_B64);
2814 return MI.getOperand(0).getMBB();
2819 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2820 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2821 MI.getOpcode() == AMDGPU::SI_LOOP)
2832 assert(RS &&
"RegScavenger required for long branching");
2834 "new block should be inserted for expanding unconditional branch");
2837 "restore block should be inserted for restoring clobbered registers");
2845 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2855 MCCtx.createTempSymbol(
"post_getpc",
true);
2859 MCCtx.createTempSymbol(
"offset_lo",
true);
2861 MCCtx.createTempSymbol(
"offset_hi",
true);
2864 .
addReg(PCReg, 0, AMDGPU::sub0)
2868 .
addReg(PCReg, 0, AMDGPU::sub1)
2910 if (LongBranchReservedReg) {
2912 Scav = LongBranchReservedReg;
2921 MRI.replaceRegWith(PCReg, Scav);
2922 MRI.clearVirtRegs();
2928 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2929 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2930 MRI.clearVirtRegs();
2945unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2947 case SIInstrInfo::SCC_TRUE:
2948 return AMDGPU::S_CBRANCH_SCC1;
2949 case SIInstrInfo::SCC_FALSE:
2950 return AMDGPU::S_CBRANCH_SCC0;
2951 case SIInstrInfo::VCCNZ:
2952 return AMDGPU::S_CBRANCH_VCCNZ;
2953 case SIInstrInfo::VCCZ:
2954 return AMDGPU::S_CBRANCH_VCCZ;
2955 case SIInstrInfo::EXECNZ:
2956 return AMDGPU::S_CBRANCH_EXECNZ;
2957 case SIInstrInfo::EXECZ:
2958 return AMDGPU::S_CBRANCH_EXECZ;
2964SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2966 case AMDGPU::S_CBRANCH_SCC0:
2968 case AMDGPU::S_CBRANCH_SCC1:
2970 case AMDGPU::S_CBRANCH_VCCNZ:
2972 case AMDGPU::S_CBRANCH_VCCZ:
2974 case AMDGPU::S_CBRANCH_EXECNZ:
2976 case AMDGPU::S_CBRANCH_EXECZ:
2988 bool AllowModify)
const {
2989 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2991 TBB =
I->getOperand(0).getMBB();
2997 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2998 CondBB =
I->getOperand(1).getMBB();
2999 Cond.push_back(
I->getOperand(0));
3001 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3002 if (Pred == INVALID_BR)
3005 CondBB =
I->getOperand(0).getMBB();
3007 Cond.push_back(
I->getOperand(1));
3017 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3019 FBB =
I->getOperand(0).getMBB();
3029 bool AllowModify)
const {
3037 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
3038 switch (
I->getOpcode()) {
3039 case AMDGPU::S_MOV_B64_term:
3040 case AMDGPU::S_XOR_B64_term:
3041 case AMDGPU::S_OR_B64_term:
3042 case AMDGPU::S_ANDN2_B64_term:
3043 case AMDGPU::S_AND_B64_term:
3044 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3045 case AMDGPU::S_MOV_B32_term:
3046 case AMDGPU::S_XOR_B32_term:
3047 case AMDGPU::S_OR_B32_term:
3048 case AMDGPU::S_ANDN2_B32_term:
3049 case AMDGPU::S_AND_B32_term:
3050 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3053 case AMDGPU::SI_ELSE:
3054 case AMDGPU::SI_KILL_I1_TERMINATOR:
3055 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3072 int *BytesRemoved)
const {
3074 unsigned RemovedSize = 0;
3077 if (
MI.isBranch() ||
MI.isReturn()) {
3079 MI.eraseFromParent();
3085 *BytesRemoved = RemovedSize;
3102 int *BytesAdded)
const {
3103 if (!FBB &&
Cond.empty()) {
3111 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3121 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3158 if (
Cond.size() != 2) {
3173 Register FalseReg,
int &CondCycles,
3174 int &TrueCycles,
int &FalseCycles)
const {
3175 switch (
Cond[0].getImm()) {
3180 if (
MRI.getRegClass(FalseReg) != RC)
3184 CondCycles = TrueCycles = FalseCycles = NumInsts;
3187 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3195 if (
MRI.getRegClass(FalseReg) != RC)
3201 if (NumInsts % 2 == 0)
3204 CondCycles = TrueCycles = FalseCycles = NumInsts;
3216 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3217 if (Pred == VCCZ || Pred == SCC_FALSE) {
3218 Pred =
static_cast<BranchPredicate
>(-Pred);
3224 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3226 if (DstSize == 32) {
3228 if (Pred == SCC_TRUE) {
3243 if (DstSize == 64 && Pred == SCC_TRUE) {
3253 static const int16_t Sub0_15[] = {
3254 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3255 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3256 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3257 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3260 static const int16_t Sub0_15_64[] = {
3261 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3262 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3263 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3264 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3267 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3269 const int16_t *SubIndices = Sub0_15;
3270 int NElts = DstSize / 32;
3274 if (Pred == SCC_TRUE) {
3276 SelOp = AMDGPU::S_CSELECT_B32;
3277 EltRC = &AMDGPU::SGPR_32RegClass;
3279 SelOp = AMDGPU::S_CSELECT_B64;
3280 EltRC = &AMDGPU::SGPR_64RegClass;
3281 SubIndices = Sub0_15_64;
3287 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3292 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3293 Register DstElt =
MRI.createVirtualRegister(EltRC);
3296 unsigned SubIdx = SubIndices[
Idx];
3299 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3302 .
addReg(FalseReg, 0, SubIdx)
3303 .
addReg(TrueReg, 0, SubIdx);
3307 .
addReg(TrueReg, 0, SubIdx)
3308 .
addReg(FalseReg, 0, SubIdx);
3320 switch (
MI.getOpcode()) {
3321 case AMDGPU::V_MOV_B32_e32:
3322 case AMDGPU::V_MOV_B32_e64:
3323 case AMDGPU::V_MOV_B64_PSEUDO:
3324 case AMDGPU::V_MOV_B64_e32:
3325 case AMDGPU::V_MOV_B64_e64:
3326 case AMDGPU::S_MOV_B32:
3327 case AMDGPU::S_MOV_B64:
3328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3330 case AMDGPU::WWM_COPY:
3331 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3332 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3333 case AMDGPU::V_ACCVGPR_MOV_B32:
3341 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3342 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3343 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3346 unsigned Opc =
MI.getOpcode();
3350 MI.removeOperand(
Idx);
3356 if (!
MRI->hasOneNonDBGUse(Reg))
3359 switch (
DefMI.getOpcode()) {
3362 case AMDGPU::V_MOV_B64_e32:
3363 case AMDGPU::S_MOV_B64:
3364 case AMDGPU::V_MOV_B64_PSEUDO:
3365 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3366 case AMDGPU::V_MOV_B32_e32:
3367 case AMDGPU::S_MOV_B32:
3368 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3375 if (!ImmOp->
isImm())
3378 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3379 int64_t Imm = ImmOp->
getImm();
3380 switch (UseOp.getSubReg()) {
3391 case AMDGPU::sub1_lo16:
3393 case AMDGPU::sub1_hi16:
3398 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3400 unsigned Opc =
UseMI.getOpcode();
3401 if (Opc == AMDGPU::COPY) {
3402 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3406 bool Is16Bit = OpSize == 2;
3407 bool Is64Bit = OpSize == 8;
3409 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3410 : AMDGPU::V_MOV_B32_e32
3411 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3412 : AMDGPU::S_MOV_B32;
3413 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3418 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3425 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3428 UseMI.getOperand(0).setSubReg(0);
3431 UseMI.getOperand(0).setReg(DstReg);
3441 UseMI.setDesc(NewMCID);
3442 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3447 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3448 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3449 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3450 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3451 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3466 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3467 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3469 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3470 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3471 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3479 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3480 if (!RegSrc->
isReg())
3498 if (Def && Def->isMoveImmediate() &&
3503 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3505 : AMDGPU::V_FMAMK_F16)
3506 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3513 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3516 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3522 unsigned SrcSubReg = RegSrc->
getSubReg();
3527 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3528 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3529 Opc == AMDGPU::V_FMAC_F16_e64)
3530 UseMI.untieRegOperand(
3533 Src1->ChangeToImmediate(Imm);
3538 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3540 DefMI.eraseFromParent();
3550 bool Src0Inlined =
false;
3551 if (Src0->
isReg()) {
3556 if (Def && Def->isMoveImmediate() &&
3568 if (Src1->
isReg() && !Src0Inlined) {
3571 if (Def && Def->isMoveImmediate() &&
3582 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3584 : AMDGPU::V_FMAAK_F16)
3585 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3592 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3598 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3599 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3600 Opc == AMDGPU::V_FMAC_F16_e64)
3601 UseMI.untieRegOperand(
3615 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3617 DefMI.eraseFromParent();
3629 if (BaseOps1.
size() != BaseOps2.
size())
3631 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3632 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3640 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3641 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3642 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3644 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3647bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3650 int64_t Offset0, Offset1;
3652 bool Offset0IsScalable, Offset1IsScalable;
3674 "MIa must load from or modify a memory location");
3676 "MIb must load from or modify a memory location");
3695 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3702 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3712 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3726 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3737 if (Reg.isPhysical())
3739 auto *Def =
MRI.getUniqueVRegDef(Reg);
3741 Imm = Def->getOperand(1).getImm();
3761 unsigned NumOps =
MI.getNumOperands();
3762 for (
unsigned I = 1;
I < NumOps; ++
I) {
3764 if (
Op.isReg() &&
Op.isKill())
3774 unsigned Opc =
MI.getOpcode();
3778 if (NewMFMAOpc != -1) {
3781 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3782 MIB.
add(
MI.getOperand(
I));
3788 if (Def.isEarlyClobber() && Def.isReg() &&
3793 auto UpdateDefIndex = [&](
LiveRange &LR) {
3794 auto S = LR.
find(OldIndex);
3795 if (S != LR.end() && S->start == OldIndex) {
3796 assert(S->valno && S->valno->def == OldIndex);
3797 S->start = NewIndex;
3798 S->valno->def = NewIndex;
3802 for (
auto &SR : LI.subranges())
3813 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3823 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3824 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3828 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3829 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3830 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3831 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3832 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3833 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3834 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3835 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3836 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3837 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3838 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3839 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3840 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3841 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3842 bool Src0Literal =
false;
3847 case AMDGPU::V_MAC_F16_e64:
3848 case AMDGPU::V_FMAC_F16_e64:
3849 case AMDGPU::V_FMAC_F16_t16_e64:
3850 case AMDGPU::V_MAC_F32_e64:
3851 case AMDGPU::V_MAC_LEGACY_F32_e64:
3852 case AMDGPU::V_FMAC_F32_e64:
3853 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3854 case AMDGPU::V_FMAC_F64_e64:
3856 case AMDGPU::V_MAC_F16_e32:
3857 case AMDGPU::V_FMAC_F16_e32:
3858 case AMDGPU::V_MAC_F32_e32:
3859 case AMDGPU::V_MAC_LEGACY_F32_e32:
3860 case AMDGPU::V_FMAC_F32_e32:
3861 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3862 case AMDGPU::V_FMAC_F64_e32: {
3864 AMDGPU::OpName::src0);
3891 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3897 const auto killDef = [&]() ->
void {
3901 if (!
MRI.hasOneNonDBGUse(DefReg))
3915 : AMDGPU::V_FMAAK_F16)
3916 : AMDGPU::V_FMAAK_F32)
3917 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3933 : AMDGPU::V_FMAMK_F16)
3934 : AMDGPU::V_FMAMK_F32)
3935 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3979 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
3980 : IsF64 ? AMDGPU::V_FMA_F64_e64
3982 ? AMDGPU::V_FMA_LEGACY_F32_e64
3983 : AMDGPU::V_FMA_F32_e64
3984 : IsF16 ? AMDGPU::V_MAD_F16_e64
3985 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
3986 : AMDGPU::V_MAD_F32_e64;
4001 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4012 switch (
MI.getOpcode()) {
4013 case AMDGPU::S_SET_GPR_IDX_ON:
4014 case AMDGPU::S_SET_GPR_IDX_MODE:
4015 case AMDGPU::S_SET_GPR_IDX_OFF:
4033 if (
MI.isTerminator() ||
MI.isPosition())
4037 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4040 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4046 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4047 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4048 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4049 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4054 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4065 unsigned Opcode =
MI.getOpcode();
4080 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4082 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4083 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4086 if (
MI.isCall() ||
MI.isInlineAsm())
4098 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4099 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4100 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4101 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4109 if (
MI.isMetaInstruction())
4113 if (
MI.isCopyLike()) {
4118 return MI.readsRegister(AMDGPU::EXEC, &RI);
4129 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4133 switch (Imm.getBitWidth()) {
4153 APInt IntImm = Imm.bitcastToAPInt();
4172 uint8_t OperandType)
const {
4173 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4182 int64_t Imm = MO.
getImm();
4183 switch (OperandType) {
4196 int32_t Trunc =
static_cast<int32_t
>(Imm);
4236 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4241 int16_t Trunc =
static_cast<int16_t
>(Imm);
4252 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4253 int16_t Trunc =
static_cast<int16_t
>(Imm);
4313 AMDGPU::OpName::src2))
4329 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4349 return Mods && Mods->
getImm();
4362 switch (
MI.getOpcode()) {
4363 default:
return false;
4365 case AMDGPU::V_ADDC_U32_e64:
4366 case AMDGPU::V_SUBB_U32_e64:
4367 case AMDGPU::V_SUBBREV_U32_e64: {
4375 case AMDGPU::V_MAC_F16_e64:
4376 case AMDGPU::V_MAC_F32_e64:
4377 case AMDGPU::V_MAC_LEGACY_F32_e64:
4378 case AMDGPU::V_FMAC_F16_e64:
4379 case AMDGPU::V_FMAC_F16_t16_e64:
4380 case AMDGPU::V_FMAC_F32_e64:
4381 case AMDGPU::V_FMAC_F64_e64:
4382 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4388 case AMDGPU::V_CNDMASK_B32_e64:
4419 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4428 unsigned Op32)
const {
4438 Inst32.
add(
MI.getOperand(0));
4442 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
4443 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4457 if (Op32Src2Idx != -1) {
4487 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4492 return MO.
getReg() == AMDGPU::M0 ||
4493 MO.
getReg() == AMDGPU::VCC ||
4494 MO.
getReg() == AMDGPU::VCC_LO;
4496 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4497 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4507 switch (MO.getReg()) {
4509 case AMDGPU::VCC_LO:
4510 case AMDGPU::VCC_HI:
4512 case AMDGPU::FLAT_SCR:
4525 switch (
MI.getOpcode()) {
4526 case AMDGPU::V_READLANE_B32:
4527 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4528 case AMDGPU::V_WRITELANE_B32:
4529 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4536 if (
MI.isPreISelOpcode() ||
4537 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4548 if (
SubReg.getReg().isPhysical())
4551 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4558 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4568 if (Src0Idx == -1) {
4578 if (!
Desc.isVariadic() &&
4579 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4580 ErrInfo =
"Instruction has wrong number of operands.";
4584 if (
MI.isInlineAsm()) {
4597 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4598 ErrInfo =
"inlineasm operand has incorrect register class.";
4606 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4607 ErrInfo =
"missing memory operand from image instruction.";
4612 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4615 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4616 "all fp values to integers.";
4620 int RegClass =
Desc.operands()[i].RegClass;
4622 switch (
Desc.operands()[i].OperandType) {
4624 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4625 ErrInfo =
"Illegal immediate value for operand.";
4646 ErrInfo =
"Illegal immediate value for operand.";
4653 ErrInfo =
"Expected inline constant for operand.";
4662 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4663 ErrInfo =
"Expected immediate, but got non-immediate";
4685 RI.getSubRegisterClass(RC, MO.
getSubReg());
4693 ErrInfo =
"Subtarget requires even aligned vector registers";
4698 if (RegClass != -1) {
4699 if (Reg.isVirtual())
4704 ErrInfo =
"Operand has incorrect register class.";
4713 ErrInfo =
"SDWA is not supported on this target";
4719 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4727 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4734 "Only reg allowed as operands in SDWA instructions on GFX9+";
4743 if (OMod !=
nullptr &&
4745 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4750 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4751 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4752 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4753 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4756 unsigned Mods = Src0ModsMO->
getImm();
4759 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4765 if (
isVOPC(BasicOpcode)) {
4769 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4770 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4776 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4777 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4783 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4784 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4791 if (DstUnused && DstUnused->isImm() &&
4794 if (!Dst.isReg() || !Dst.isTied()) {
4795 ErrInfo =
"Dst register should have tied register";
4800 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4803 "Dst register should be tied to implicit use of preserved register";
4806 Dst.getReg() != TiedMO.
getReg()) {
4807 ErrInfo =
"Dst register should use same physical register as preserved";
4839 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4840 if (RegCount > DstSize) {
4841 ErrInfo =
"Image instruction returns too many registers for dst "
4850 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4851 unsigned ConstantBusCount = 0;
4852 bool UsesLiteral =
false;
4859 LiteralVal = &
MI.getOperand(ImmIdx);
4868 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4886 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4896 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4897 return !RI.regsOverlap(SGPRUsed, SGPR);
4907 Opcode != AMDGPU::V_WRITELANE_B32) {
4908 ErrInfo =
"VOP* instruction violates constant bus restriction";
4913 ErrInfo =
"VOP3 instruction uses literal";
4920 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4921 unsigned SGPRCount = 0;
4924 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4932 if (MO.
getReg() != SGPRUsed)
4938 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4945 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4946 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4953 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4963 ErrInfo =
"ABS not allowed in VOP3B instructions";
4976 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4983 if (
Desc.isBranch()) {
4985 ErrInfo =
"invalid branch target for SOPK instruction";
4991 if (!isUInt<16>(Imm)) {
4992 ErrInfo =
"invalid immediate for SOPK instruction";
4996 if (!isInt<16>(Imm)) {
4997 ErrInfo =
"invalid immediate for SOPK instruction";
5004 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5005 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5006 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5007 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5008 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5009 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5011 const unsigned StaticNumOps =
5012 Desc.getNumOperands() +
Desc.implicit_uses().size();
5013 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5018 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5019 ErrInfo =
"missing implicit register operands";
5025 if (!Dst->isUse()) {
5026 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5031 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5032 UseOpIdx != StaticNumOps + 1) {
5033 ErrInfo =
"movrel implicit operands should be tied";
5040 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5042 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5043 ErrInfo =
"src0 should be subreg of implicit vector use";
5051 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5052 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5058 if (
MI.mayStore() &&
5063 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5064 ErrInfo =
"scalar stores must use m0 as offset register";
5072 if (
Offset->getImm() != 0) {
5073 ErrInfo =
"subtarget does not support offsets in flat instructions";
5080 if (GDSOp && GDSOp->
getImm() != 0) {
5081 ErrInfo =
"GDS is not supported on this subtarget";
5090 AMDGPU::OpName::vaddr0);
5092 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5101 ErrInfo =
"dim is out of range";
5108 IsA16 = R128A16->
getImm() != 0;
5109 }
else if (ST.
hasA16()) {
5111 IsA16 = A16->
getImm() != 0;
5114 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5116 unsigned AddrWords =
5119 unsigned VAddrWords;
5121 VAddrWords = RsrcIdx - VAddr0Idx;
5124 unsigned LastVAddrIdx = RsrcIdx - 1;
5125 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5133 if (VAddrWords != AddrWords) {
5135 <<
" but got " << VAddrWords <<
"\n");
5136 ErrInfo =
"bad vaddr size";
5144 using namespace AMDGPU::DPP;
5146 unsigned DC = DppCt->
getImm();
5147 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5148 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5149 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5150 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5151 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5152 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5153 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5154 ErrInfo =
"Invalid dpp_ctrl value";
5157 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5159 ErrInfo =
"Invalid dpp_ctrl value: "
5160 "wavefront shifts are not supported on GFX10+";
5163 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5165 ErrInfo =
"Invalid dpp_ctrl value: "
5166 "broadcasts are not supported on GFX10+";
5169 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5171 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5172 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5174 ErrInfo =
"Invalid dpp_ctrl value: "
5175 "row_newbroadcast/row_share is not supported before "
5178 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5179 ErrInfo =
"Invalid dpp_ctrl value: "
5180 "row_share and row_xmask are not supported before GFX10";
5185 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5187 ErrInfo =
"Invalid dpp_ctrl value: "
5188 "DP ALU dpp only support row_newbcast";
5195 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5196 : AMDGPU::OpName::vdata;
5205 ErrInfo =
"Invalid register class: "
5206 "vdata and vdst should be both VGPR or AGPR";
5209 if (
Data && Data2 &&
5211 ErrInfo =
"Invalid register class: "
5212 "both data operands should be VGPR or AGPR";
5216 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5219 ErrInfo =
"Invalid register class: "
5220 "agpr loads and stores not supported on this GPU";
5227 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5232 if (Reg.isPhysical())
5239 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5240 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5241 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5243 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5244 ErrInfo =
"Subtarget requires even aligned vector registers "
5245 "for DS_GWS instructions";
5251 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5252 ErrInfo =
"Subtarget requires even aligned vector registers "
5253 "for vaddr operand of image instructions";
5259 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5262 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5263 ErrInfo =
"Invalid register class: "
5264 "v_accvgpr_write with an SGPR is not supported on this GPU";
5269 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5272 ErrInfo =
"pseudo expects only physical SGPRs";
5284 switch (
MI.getOpcode()) {
5285 default:
return AMDGPU::INSTRUCTION_LIST_END;
5286 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5287 case AMDGPU::COPY:
return AMDGPU::COPY;
5288 case AMDGPU::PHI:
return AMDGPU::PHI;
5289 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5290 case AMDGPU::WQM:
return AMDGPU::WQM;
5291 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5292 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5293 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5294 case AMDGPU::S_MOV_B32: {
5296 return MI.getOperand(1).isReg() ||
5298 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5300 case AMDGPU::S_ADD_I32:
5301 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5302 case AMDGPU::S_ADDC_U32:
5303 return AMDGPU::V_ADDC_U32_e32;
5304 case AMDGPU::S_SUB_I32:
5305 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5308 case AMDGPU::S_ADD_U32:
5309 return AMDGPU::V_ADD_CO_U32_e32;
5310 case AMDGPU::S_SUB_U32:
5311 return AMDGPU::V_SUB_CO_U32_e32;
5312 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5313 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5314 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5315 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5316 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5317 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5318 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5319 case AMDGPU::S_XNOR_B32:
5320 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5321 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5322 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5323 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5324 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5325 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5326 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5327 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5328 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5329 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5330 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5331 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5332 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5333 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5334 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5335 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5336 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5337 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5338 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5339 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5340 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5341 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5342 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5343 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5344 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5345 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5346 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5347 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5348 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5349 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5350 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5351 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5352 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5353 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5354 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5355 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5356 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5357 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5358 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5359 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5360 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5361 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5362 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5363 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5364 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5365 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5366 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5367 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5368 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5369 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5370 case AMDGPU::S_CEIL_F16:
5372 : AMDGPU::V_CEIL_F16_fake16_e64;
5373 case AMDGPU::S_FLOOR_F16:
5375 : AMDGPU::V_FLOOR_F16_fake16_e64;
5376 case AMDGPU::S_TRUNC_F16:
5377 return AMDGPU::V_TRUNC_F16_fake16_e64;
5378 case AMDGPU::S_RNDNE_F16:
5379 return AMDGPU::V_RNDNE_F16_fake16_e64;
5380 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5381 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5382 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5383 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5384 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5385 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5386 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5387 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5388 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5389 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5390 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5391 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5392 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5393 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5394 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5395 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5396 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5397 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5398 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5399 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5400 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5401 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5402 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5403 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5404 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5405 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5406 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5407 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5408 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5409 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5410 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5411 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5412 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5413 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5414 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5415 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5416 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5417 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5418 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5419 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5420 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5421 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5422 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5423 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5424 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5425 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5426 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5427 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5428 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5429 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5430 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5431 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5432 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5433 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5434 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5435 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5436 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5439 "Unexpected scalar opcode without corresponding vector one!");
5452 bool IsWave32 = ST.isWave32();
5457 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5458 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5467 const unsigned OrSaveExec =
5468 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5481 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5483 auto ExecRestoreMI =
5493 bool IsAllocatable) {
5494 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5499 case AMDGPU::AV_32RegClassID:
5500 RCID = AMDGPU::VGPR_32RegClassID;
5502 case AMDGPU::AV_64RegClassID:
5503 RCID = AMDGPU::VReg_64RegClassID;
5505 case AMDGPU::AV_96RegClassID:
5506 RCID = AMDGPU::VReg_96RegClassID;
5508 case AMDGPU::AV_128RegClassID:
5509 RCID = AMDGPU::VReg_128RegClassID;
5511 case AMDGPU::AV_160RegClassID:
5512 RCID = AMDGPU::VReg_160RegClassID;
5514 case AMDGPU::AV_512RegClassID:
5515 RCID = AMDGPU::VReg_512RegClassID;
5531 auto RegClass = TID.
operands()[OpNum].RegClass;
5532 bool IsAllocatable =
false;
5542 AMDGPU::OpName::vdst);
5545 : AMDGPU::OpName::vdata);
5546 if (DataIdx != -1) {
5548 TID.
Opcode, AMDGPU::OpName::data1);
5556 unsigned OpNo)
const {
5559 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5560 Desc.operands()[OpNo].RegClass == -1) {
5563 if (Reg.isVirtual())
5564 return MRI.getRegClass(Reg);
5565 return RI.getPhysRegBaseClass(Reg);
5568 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5577 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5579 unsigned Size = RI.getRegSizeInBits(*RC);
5580 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5582 Opcode = AMDGPU::COPY;
5584 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5601 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
5611 Register NewSuperReg =
MRI.createVirtualRegister(SuperRC);
5617 .
addReg(NewSuperReg, 0, SubIdx);
5627 if (SubIdx == AMDGPU::sub0)
5629 if (SubIdx == AMDGPU::sub1)
5641void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5657 if (Reg.isPhysical())
5668 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5695 MO = &
MI.getOperand(OpIdx);
5707 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5713 if (!SGPRsUsed.
count(SGPR) &&
5716 if (--ConstantBusLimit <= 0)
5722 if (!LiteralLimit--)
5724 if (--ConstantBusLimit <= 0)
5738 unsigned Opc =
MI.getOpcode();
5746 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5747 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5748 MI.getOperand(DataIdx).isReg() &&
5749 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5751 if ((
int)OpIdx == DataIdx) {
5752 if (VDstIdx != -1 &&
5753 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5757 AMDGPU::OpName::data1);
5758 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5759 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5762 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5772 bool Is64BitOp = Is64BitFPOp ||
5785 if (!Is64BitFPOp && (int32_t)Imm < 0)
5803 unsigned Opc =
MI.getOpcode();
5822 if (Opc == AMDGPU::V_WRITELANE_B32) {
5825 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5831 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5848 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5850 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5862 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5864 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5876 if (HasImplicitSGPR || !
MI.isCommutable()) {
5893 if (CommutedOpc == -1) {
5898 MI.setDesc(
get(CommutedOpc));
5902 bool Src0Kill = Src0.
isKill();
5906 else if (Src1.
isReg()) {
5921 unsigned Opc =
MI.getOpcode();
5929 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5930 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5936 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5942 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5953 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
5955 SGPRsUsed.
insert(SGPRReg);
5959 for (
int Idx : VOP3Idx) {
5968 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
5993 if (ConstantBusLimit > 0) {
6005 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6006 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6015 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6019 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6021 get(TargetOpcode::COPY), NewSrcReg)
6028 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6034 for (
unsigned i = 0; i < SubRegs; ++i) {
6035 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6037 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6044 get(AMDGPU::REG_SEQUENCE), DstReg);
6045 for (
unsigned i = 0; i < SubRegs; ++i) {
6060 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6062 SBase->setReg(SGPR);
6074 if (OldSAddrIdx < 0)
6091 if (NewVAddrIdx < 0)
6098 if (OldVAddrIdx >= 0) {
6100 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6101 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6112 if (OldVAddrIdx == NewVAddrIdx) {
6115 MRI.removeRegOperandFromUseList(&NewVAddr);
6116 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6120 MRI.removeRegOperandFromUseList(&NewVAddr);
6121 MRI.addRegOperandToUseList(&NewVAddr);
6123 assert(OldSAddrIdx == NewVAddrIdx);
6125 if (OldVAddrIdx >= 0) {
6127 AMDGPU::OpName::vdst_in);
6131 if (NewVDstIn != -1) {
6138 if (NewVDstIn != -1) {
6177 unsigned OpSubReg =
Op.getSubReg();
6186 Register DstReg =
MRI.createVirtualRegister(DstRC);
6197 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6200 bool ImpDef = Def->isImplicitDef();
6201 while (!ImpDef && Def && Def->isCopy()) {
6202 if (Def->getOperand(1).getReg().isPhysical())
6204 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6205 ImpDef = Def && Def->isImplicitDef();
6207 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6222 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6223 unsigned SaveExecOpc =
6224 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6225 unsigned XorTermOpc =
6226 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6228 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6229 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6237 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6238 unsigned NumSubRegs =
RegSize / 32;
6239 Register VScalarOp = ScalarOp->getReg();
6241 if (NumSubRegs == 1) {
6242 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6244 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6247 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6249 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6255 CondReg = NewCondReg;
6257 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6265 ScalarOp->setReg(CurReg);
6266 ScalarOp->setIsKill();
6269 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6270 "Unhandled register size");
6272 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6273 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6274 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6277 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6278 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6281 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6282 .
addReg(VScalarOp, VScalarOpUndef,
6283 TRI->getSubRegFromChannel(
Idx + 1));
6289 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6290 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6296 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6297 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6300 if (NumSubRegs <= 2)
6301 Cmp.addReg(VScalarOp);
6303 Cmp.addReg(VScalarOp, VScalarOpUndef,
6304 TRI->getSubRegFromChannel(
Idx, 2));
6308 CondReg = NewCondReg;
6310 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6319 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6320 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6324 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6325 unsigned Channel = 0;
6326 for (
Register Piece : ReadlanePieces) {
6327 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6331 ScalarOp->setReg(SScalarOp);
6332 ScalarOp->setIsKill();
6336 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6337 MRI.setSimpleHint(SaveExec, CondReg);
6368 if (!Begin.isValid())
6370 if (!
End.isValid()) {
6375 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6376 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6377 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6384 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6390 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6399 for (
auto I = Begin;
I != AfterMI;
I++) {
6400 for (
auto &MO :
I->all_uses())
6401 MRI.clearKillFlags(MO.getReg());
6436 for (
auto &Succ : RemainderBB->
successors()) {
6459static std::tuple<unsigned, unsigned>
6467 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6468 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6471 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6472 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6473 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6474 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6475 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6483 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6487 .
addImm(RsrcDataFormat >> 32);
6492 .
addImm(AMDGPU::sub0_sub1)
6498 return std::tuple(RsrcPtr, NewSRsrc);
6535 if (
MI.getOpcode() == AMDGPU::PHI) {
6537 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6538 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6541 MRI.getRegClass(
MI.getOperand(i).getReg());
6556 VRC = &AMDGPU::VReg_1RegClass;
6572 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
6574 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6590 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6597 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
6599 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6617 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6622 if (DstRC != Src0RC) {
6631 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6639 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6640 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6641 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6642 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6643 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6658 : AMDGPU::OpName::srsrc;
6663 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6672 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6678 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6679 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6684 while (Start->getOpcode() != FrameSetupOpcode)
6687 while (
End->getOpcode() != FrameDestroyOpcode)
6691 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6692 MI.definesRegister(
End->getOperand(1).getReg()))
6700 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6702 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6713 bool isSoffsetLegal =
true;
6716 if (SoffsetIdx != -1) {
6720 isSoffsetLegal =
false;
6724 bool isRsrcLegal =
true;
6727 if (RsrcIdx != -1) {
6730 isRsrcLegal =
false;
6735 if (isRsrcLegal && isSoffsetLegal)
6759 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6760 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6761 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6763 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6764 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6765 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6767 unsigned RsrcPtr, NewSRsrc;
6774 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6781 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6799 "FIXME: Need to emit flat atomics here");
6801 unsigned RsrcPtr, NewSRsrc;
6804 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6827 MIB.
addImm(CPol->getImm());
6832 MIB.
addImm(TFE->getImm());
6852 MI.removeFromParent();
6857 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6859 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6863 if (!isSoffsetLegal) {
6875 if (!isSoffsetLegal) {
6884 InstrList.insert(
MI);
6888 if (RsrcIdx != -1) {
6889 DeferredList.insert(
MI);
6894 return DeferredList.contains(
MI);
6900 while (!Worklist.
empty()) {
6914 "Deferred MachineInstr are not supposed to re-populate worklist");
6932 case AMDGPU::S_ADD_U64_PSEUDO:
6933 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6935 case AMDGPU::S_SUB_U64_PSEUDO:
6936 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6938 case AMDGPU::S_ADD_I32:
6939 case AMDGPU::S_SUB_I32: {
6943 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
6951 case AMDGPU::S_MUL_U64:
6953 splitScalarSMulU64(Worklist, Inst, MDT);
6957 case AMDGPU::S_MUL_U64_U32_PSEUDO:
6958 case AMDGPU::S_MUL_I64_I32_PSEUDO:
6961 splitScalarSMulPseudo(Worklist, Inst, MDT);
6965 case AMDGPU::S_AND_B64:
6966 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
6970 case AMDGPU::S_OR_B64:
6971 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
6975 case AMDGPU::S_XOR_B64:
6976 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
6980 case AMDGPU::S_NAND_B64:
6981 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
6985 case AMDGPU::S_NOR_B64:
6986 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
6990 case AMDGPU::S_XNOR_B64:
6992 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
6994 splitScalar64BitXnor(Worklist, Inst, MDT);
6998 case AMDGPU::S_ANDN2_B64:
6999 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7003 case AMDGPU::S_ORN2_B64:
7004 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7008 case AMDGPU::S_BREV_B64:
7009 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7013 case AMDGPU::S_NOT_B64:
7014 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7018 case AMDGPU::S_BCNT1_I32_B64:
7019 splitScalar64BitBCNT(Worklist, Inst);
7023 case AMDGPU::S_BFE_I64:
7024 splitScalar64BitBFE(Worklist, Inst);
7028 case AMDGPU::S_FLBIT_I32_B64:
7029 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7032 case AMDGPU::S_FF1_I32_B64:
7033 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7037 case AMDGPU::S_LSHL_B32:
7039 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7043 case AMDGPU::S_ASHR_I32:
7045 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7049 case AMDGPU::S_LSHR_B32:
7051 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7055 case AMDGPU::S_LSHL_B64:
7058 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7059 : AMDGPU::V_LSHLREV_B64_e64;
7063 case AMDGPU::S_ASHR_I64:
7065 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7069 case AMDGPU::S_LSHR_B64:
7071 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7076 case AMDGPU::S_ABS_I32:
7077 lowerScalarAbs(Worklist, Inst);
7081 case AMDGPU::S_CBRANCH_SCC0:
7082 case AMDGPU::S_CBRANCH_SCC1: {
7085 bool IsSCC = CondReg == AMDGPU::SCC;
7088 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7091 .
addReg(IsSCC ? VCC : CondReg);
7095 case AMDGPU::S_BFE_U64:
7096 case AMDGPU::S_BFM_B64:
7099 case AMDGPU::S_PACK_LL_B32_B16:
7100 case AMDGPU::S_PACK_LH_B32_B16:
7101 case AMDGPU::S_PACK_HL_B32_B16:
7102 case AMDGPU::S_PACK_HH_B32_B16:
7103 movePackToVALU(Worklist,
MRI, Inst);
7107 case AMDGPU::S_XNOR_B32:
7108 lowerScalarXnor(Worklist, Inst);
7112 case AMDGPU::S_NAND_B32:
7113 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7117 case AMDGPU::S_NOR_B32:
7118 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7122 case AMDGPU::S_ANDN2_B32:
7123 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7127 case AMDGPU::S_ORN2_B32:
7128 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7136 case AMDGPU::S_ADD_CO_PSEUDO:
7137 case AMDGPU::S_SUB_CO_PSEUDO: {
7138 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7139 ? AMDGPU::V_ADDC_U32_e64
7140 : AMDGPU::V_SUBB_U32_e64;
7141 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7144 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7145 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7163 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7167 case AMDGPU::S_UADDO_PSEUDO:
7168 case AMDGPU::S_USUBO_PSEUDO: {
7175 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7176 ? AMDGPU::V_ADD_CO_U32_e64
7177 : AMDGPU::V_SUB_CO_U32_e64;
7180 Register DestReg =
MRI.createVirtualRegister(NewRC);
7188 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7195 case AMDGPU::S_CSELECT_B32:
7196 case AMDGPU::S_CSELECT_B64:
7197 lowerSelect(Worklist, Inst, MDT);
7200 case AMDGPU::S_CMP_EQ_I32:
7201 case AMDGPU::S_CMP_LG_I32:
7202 case AMDGPU::S_CMP_GT_I32:
7203 case AMDGPU::S_CMP_GE_I32:
7204 case AMDGPU::S_CMP_LT_I32:
7205 case AMDGPU::S_CMP_LE_I32:
7206 case AMDGPU::S_CMP_EQ_U32:
7207 case AMDGPU::S_CMP_LG_U32:
7208 case AMDGPU::S_CMP_GT_U32:
7209 case AMDGPU::S_CMP_GE_U32:
7210 case AMDGPU::S_CMP_LT_U32:
7211 case AMDGPU::S_CMP_LE_U32:
7212 case AMDGPU::S_CMP_EQ_U64:
7213 case AMDGPU::S_CMP_LG_U64:
7214 case AMDGPU::S_CMP_LT_F32:
7215 case AMDGPU::S_CMP_EQ_F32:
7216 case AMDGPU::S_CMP_LE_F32:
7217 case AMDGPU::S_CMP_GT_F32:
7218 case AMDGPU::S_CMP_LG_F32:
7219 case AMDGPU::S_CMP_GE_F32:
7220 case AMDGPU::S_CMP_O_F32:
7221 case AMDGPU::S_CMP_U_F32:
7222 case AMDGPU::S_CMP_NGE_F32:
7223 case AMDGPU::S_CMP_NLG_F32:
7224 case AMDGPU::S_CMP_NGT_F32:
7225 case AMDGPU::S_CMP_NLE_F32:
7226 case AMDGPU::S_CMP_NEQ_F32:
7227 case AMDGPU::S_CMP_NLT_F32:
7228 case AMDGPU::S_CMP_LT_F16:
7229 case AMDGPU::S_CMP_EQ_F16:
7230 case AMDGPU::S_CMP_LE_F16:
7231 case AMDGPU::S_CMP_GT_F16:
7232 case AMDGPU::S_CMP_LG_F16:
7233 case AMDGPU::S_CMP_GE_F16:
7234 case AMDGPU::S_CMP_O_F16:
7235 case AMDGPU::S_CMP_U_F16:
7236 case AMDGPU::S_CMP_NGE_F16:
7237 case AMDGPU::S_CMP_NLG_F16:
7238 case AMDGPU::S_CMP_NGT_F16:
7239 case AMDGPU::S_CMP_NLE_F16:
7240 case AMDGPU::S_CMP_NEQ_F16:
7241 case AMDGPU::S_CMP_NLT_F16: {
7247 AMDGPU::OpName::src0_modifiers) >= 0) {
7262 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7266 case AMDGPU::S_CVT_HI_F32_F16: {
7268 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7269 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7280 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7284 case AMDGPU::S_MINIMUM_F32:
7285 case AMDGPU::S_MAXIMUM_F32:
7286 case AMDGPU::S_MINIMUM_F16:
7287 case AMDGPU::S_MAXIMUM_F16: {
7289 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7300 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7306 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7314 if (NewOpcode == Opcode) {
7338 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7350 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7351 MRI.replaceRegWith(DstReg, NewDstReg);
7353 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7364 AMDGPU::OpName::src0_modifiers) >= 0)
7369 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7370 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7372 NewInstr->addOperand(Src);
7375 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7378 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7380 NewInstr.addImm(
Size);
7381 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7385 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7390 "Scalar BFE is only implemented for constant width and offset");
7399 AMDGPU::OpName::src1_modifiers) >= 0)
7404 AMDGPU::OpName::src2_modifiers) >= 0)
7418 NewInstr->addOperand(
Op);
7425 if (
Op.getReg() == AMDGPU::SCC) {
7427 if (
Op.isDef() && !
Op.isDead())
7428 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7430 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7435 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7436 Register DstReg = NewInstr->getOperand(0).getReg();
7441 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7442 MRI.replaceRegWith(DstReg, NewDstReg);
7448 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7452std::pair<bool, MachineBasicBlock *>
7464 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7467 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7469 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7470 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7478 MRI.replaceRegWith(OldDstReg, ResultReg);
7481 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7482 return std::pair(
true, NewBB);
7485 return std::pair(
false,
nullptr);
7502 bool IsSCC = (CondReg == AMDGPU::SCC);
7510 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7518 NewCondReg =
MRI.createVirtualRegister(TC);
7522 bool CopyFound =
false;
7526 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC,
false,
false, &RI) !=
7528 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7530 .
addReg(CandI.getOperand(1).getReg());
7542 : AMDGPU::S_CSELECT_B32;
7552 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7553 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7566 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7568 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7580 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7581 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7584 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7594 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7595 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7610 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7618 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7619 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7625 bool Src0IsSGPR = Src0.
isReg() &&
7627 bool Src1IsSGPR = Src1.
isReg() &&
7630 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7631 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7641 }
else if (Src1IsSGPR) {
7655 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7659 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7665 unsigned Opcode)
const {
7675 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7676 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7688 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7689 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7694 unsigned Opcode)
const {
7704 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7705 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7717 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7718 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7736 &AMDGPU::SGPR_32RegClass;
7739 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7742 AMDGPU::sub0, Src0SubRC);
7747 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7749 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7753 AMDGPU::sub1, Src0SubRC);
7755 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7761 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7768 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7770 Worklist.
insert(&LoHalf);
7771 Worklist.
insert(&HiHalf);
7777 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7788 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7789 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7790 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7801 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7805 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7835 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7841 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7847 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7858 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7874 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7886 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7897 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7898 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7899 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7910 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7914 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7926 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7927 ? AMDGPU::V_MUL_HI_U32_e64
7928 : AMDGPU::V_MUL_HI_I32_e64;
7943 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7951 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7970 &AMDGPU::SGPR_32RegClass;
7973 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7976 &AMDGPU::SGPR_32RegClass;
7979 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7982 AMDGPU::sub0, Src0SubRC);
7984 AMDGPU::sub0, Src1SubRC);
7986 AMDGPU::sub1, Src0SubRC);
7988 AMDGPU::sub1, Src1SubRC);
7993 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7995 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8000 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8005 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8012 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8014 Worklist.
insert(&LoHalf);
8015 Worklist.
insert(&HiHalf);
8018 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8036 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8052 Register NewDest =
MRI.createVirtualRegister(DestRC);
8058 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8076 MRI.getRegClass(Src.getReg()) :
8077 &AMDGPU::SGPR_32RegClass;
8079 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8080 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8083 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8086 AMDGPU::sub0, SrcSubRC);
8088 AMDGPU::sub1, SrcSubRC);
8094 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8098 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8117 Offset == 0 &&
"Not implemented");
8120 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8121 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8122 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8139 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8140 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8145 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8146 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8150 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8153 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8158 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8159 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8180 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8181 unsigned OpcodeAdd =
8182 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8185 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8187 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8194 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8195 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8196 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8197 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8204 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8210 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8212 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8214 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8217void SIInstrInfo::addUsersToMoveToVALUWorklist(
8221 E =
MRI.use_end();
I !=
E;) {
8226 switch (
UseMI.getOpcode()) {
8229 case AMDGPU::SOFT_WQM:
8230 case AMDGPU::STRICT_WWM:
8231 case AMDGPU::STRICT_WQM:
8232 case AMDGPU::REG_SEQUENCE:
8234 case AMDGPU::INSERT_SUBREG:
8237 OpNo =
I.getOperandNo();
8246 }
while (
I !=
E &&
I->getParent() == &
UseMI);
8256 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8263 case AMDGPU::S_PACK_LL_B32_B16: {
8264 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8265 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8282 case AMDGPU::S_PACK_LH_B32_B16: {
8283 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8292 case AMDGPU::S_PACK_HL_B32_B16: {
8293 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8303 case AMDGPU::S_PACK_HH_B32_B16: {
8304 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8305 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8322 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8323 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8332 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8333 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8341 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC,
false, &RI);
8345 Register DestReg =
MI.getOperand(0).getReg();
8347 MRI.replaceRegWith(DestReg, NewCond);
8352 MI.getOperand(SCCIdx).setReg(NewCond);
8358 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC,
false,
false, &RI) != -1)
8361 for (
auto &Copy : CopyToDelete)
8362 Copy->eraseFromParent();
8370void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8379 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8381 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8398 case AMDGPU::REG_SEQUENCE:
8399 case AMDGPU::INSERT_SUBREG:
8401 case AMDGPU::SOFT_WQM:
8402 case AMDGPU::STRICT_WWM:
8403 case AMDGPU::STRICT_WQM: {
8411 case AMDGPU::REG_SEQUENCE:
8412 case AMDGPU::INSERT_SUBREG:
8422 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8439 int OpIndices[3])
const {
8458 for (
unsigned i = 0; i < 3; ++i) {
8459 int Idx = OpIndices[i];
8496 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8497 SGPRReg = UsedSGPRs[0];
8500 if (!SGPRReg && UsedSGPRs[1]) {
8501 if (UsedSGPRs[1] == UsedSGPRs[2])
8502 SGPRReg = UsedSGPRs[1];
8509 unsigned OperandName)
const {
8514 return &
MI.getOperand(
Idx);
8531 RsrcDataFormat |= (1ULL << 56);
8536 RsrcDataFormat |= (2ULL << 59);
8539 return RsrcDataFormat;
8561 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8567 unsigned Opc =
MI.getOpcode();
8573 return get(Opc).mayLoad() &&
8578 int &FrameIndex)
const {
8586 FrameIndex =
Addr->getIndex();
8591 int &FrameIndex)
const {
8594 FrameIndex =
Addr->getIndex();
8599 int &FrameIndex)
const {
8613 int &FrameIndex)
const {
8630 while (++
I !=
E &&
I->isInsideBundle()) {
8631 assert(!
I->isBundle() &&
"No nested bundle!");
8639 unsigned Opc =
MI.getOpcode();
8641 unsigned DescSize =
Desc.getSize();
8646 unsigned Size = DescSize;
8661 bool HasLiteral =
false;
8662 for (
int I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I) {
8670 return HasLiteral ? DescSize + 4 : DescSize;
8680 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8684 case TargetOpcode::BUNDLE:
8686 case TargetOpcode::INLINEASM:
8687 case TargetOpcode::INLINEASM_BR: {
8689 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8693 if (
MI.isMetaInstruction())
8703 if (
MI.memoperands_empty())
8714 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8726 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8729 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8730 .
add(Branch->getOperand(0))
8731 .
add(Branch->getOperand(1));
8733 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8752 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8757 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8759 if (PMBB == LoopEnd) {
8760 HeaderPHIBuilder.
addReg(BackEdgeReg);
8765 HeaderPHIBuilder.
addReg(ZeroReg);
8767 HeaderPHIBuilder.
addMBB(PMBB);
8771 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8773 .
add(Branch->getOperand(0));
8775 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8781 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8782 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8788 static const std::pair<int, const char *> TargetIndices[] = {
8826std::pair<unsigned, unsigned>
8833 static const std::pair<unsigned, const char *> TargetFlags[] = {
8848 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8862 return AMDGPU::WWM_COPY;
8864 return AMDGPU::COPY;
8875 bool IsNullOrVectorRegister =
true;
8884 return IsNullOrVectorRegister &&
8885 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8886 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8899 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8930 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8931 case AMDGPU::SI_KILL_I1_TERMINATOR:
8940 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8941 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8942 case AMDGPU::SI_KILL_I1_PSEUDO:
8943 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
8955 const unsigned OffsetBits =
8957 return (1 << OffsetBits) - 1;
8964 if (
MI.isInlineAsm())
8967 for (
auto &
Op :
MI.implicit_operands()) {
8968 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
8969 Op.setReg(AMDGPU::VCC_LO);
8982 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9000 if (Imm <= MaxImm + 64) {
9002 Overflow = Imm - MaxImm;
9092std::pair<int64_t, int64_t>
9095 int64_t RemainderOffset = COffsetVal;
9096 int64_t ImmField = 0;
9101 if (AllowNegative) {
9103 int64_t
D = 1LL << NumBits;
9104 RemainderOffset = (COffsetVal /
D) *
D;
9105 ImmField = COffsetVal - RemainderOffset;
9109 (ImmField % 4) != 0) {
9111 RemainderOffset += ImmField % 4;
9112 ImmField -= ImmField % 4;
9114 }
else if (COffsetVal >= 0) {
9115 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9116 RemainderOffset = COffsetVal - ImmField;
9120 assert(RemainderOffset + ImmField == COffsetVal);
9121 return {ImmField, RemainderOffset};
9133 switch (ST.getGeneration()) {
9158 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9159 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9160 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9161 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9162 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9163 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9164 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9165 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9201 if (
isMAI(Opcode)) {
9246 for (
unsigned I = 0,
E = (
MI.getNumOperands() - 1)/ 2;
I <
E; ++
I)
9247 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9248 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9260 switch (
MI.getOpcode()) {
9262 case AMDGPU::REG_SEQUENCE:
9266 case AMDGPU::INSERT_SUBREG:
9267 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9284 if (!
P.Reg.isVirtual())
9288 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9289 while (
auto *
MI = DefInst) {
9291 switch (
MI->getOpcode()) {
9293 case AMDGPU::V_MOV_B32_e32: {
9294 auto &Op1 =
MI->getOperand(1);
9299 DefInst =
MRI.getVRegDef(RSR.Reg);
9307 DefInst =
MRI.getVRegDef(RSR.Reg);
9320 assert(
MRI.isSSA() &&
"Must be run on SSA");
9322 auto *
TRI =
MRI.getTargetRegisterInfo();
9323 auto *DefBB =
DefMI.getParent();
9327 if (
UseMI.getParent() != DefBB)
9330 const int MaxInstScan = 20;
9334 auto E =
UseMI.getIterator();
9335 for (
auto I = std::next(
DefMI.getIterator());
I !=
E; ++
I) {
9336 if (
I->isDebugInstr())
9339 if (++NumInst > MaxInstScan)
9342 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9352 assert(
MRI.isSSA() &&
"Must be run on SSA");
9354 auto *
TRI =
MRI.getTargetRegisterInfo();
9355 auto *DefBB =
DefMI.getParent();
9357 const int MaxUseScan = 10;
9360 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9361 auto &UseInst = *
Use.getParent();
9364 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9367 if (++NumUse > MaxUseScan)
9374 const int MaxInstScan = 20;
9378 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9381 if (
I->isDebugInstr())
9384 if (++NumInst > MaxInstScan)
9397 if (Reg == VReg && --NumUse == 0)
9399 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9411 if (!Cur->isPHI() && Cur->readsRegister(Dst))
9414 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9423 if (InsPt !=
MBB.
end() &&
9424 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9425 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9426 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9427 InsPt->definesRegister(Src)) {
9431 : AMDGPU::S_MOV_B64_term),
9433 .
addReg(Src, 0, SrcSubReg)
9458 if (isFullCopyInstr(
MI)) {
9467 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9470 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9481 unsigned *PredCost)
const {
9482 if (
MI.isBundle()) {
9485 unsigned Lat = 0, Count = 0;
9486 for (++
I;
I !=
E &&
I->isBundledWithPred(); ++
I) {
9488 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9490 return Lat + Count - 1;
9493 return SchedModel.computeInstrLatency(&
MI);
9498 unsigned opcode =
MI.getOpcode();
9499 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9500 auto IID = GI->getIntrinsicID();
9507 case Intrinsic::amdgcn_if:
9508 case Intrinsic::amdgcn_else:
9522 if (opcode == AMDGPU::G_LOAD) {
9523 if (
MI.memoperands_empty())
9527 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9528 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9536 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9537 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9538 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9551 unsigned opcode =
MI.getOpcode();
9552 if (opcode == AMDGPU::V_READLANE_B32 ||
9553 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9554 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9557 if (isCopyInstr(
MI)) {
9561 RI.getPhysRegBaseClass(srcOp.
getReg());
9569 if (
MI.isPreISelOpcode())
9584 if (
MI.memoperands_empty())
9588 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9589 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9604 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I) {
9610 if (!Reg || !
SrcOp.readsReg())
9616 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9653 Register &SrcReg2, int64_t &CmpMask,
9654 int64_t &CmpValue)
const {
9655 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9658 switch (
MI.getOpcode()) {
9661 case AMDGPU::S_CMP_EQ_U32:
9662 case AMDGPU::S_CMP_EQ_I32:
9663 case AMDGPU::S_CMP_LG_U32:
9664 case AMDGPU::S_CMP_LG_I32:
9665 case AMDGPU::S_CMP_LT_U32:
9666 case AMDGPU::S_CMP_LT_I32:
9667 case AMDGPU::S_CMP_GT_U32:
9668 case AMDGPU::S_CMP_GT_I32:
9669 case AMDGPU::S_CMP_LE_U32:
9670 case AMDGPU::S_CMP_LE_I32:
9671 case AMDGPU::S_CMP_GE_U32:
9672 case AMDGPU::S_CMP_GE_I32:
9673 case AMDGPU::S_CMP_EQ_U64:
9674 case AMDGPU::S_CMP_LG_U64:
9675 SrcReg =
MI.getOperand(0).getReg();
9676 if (
MI.getOperand(1).isReg()) {
9677 if (
MI.getOperand(1).getSubReg())
9679 SrcReg2 =
MI.getOperand(1).getReg();
9681 }
else if (
MI.getOperand(1).isImm()) {
9683 CmpValue =
MI.getOperand(1).getImm();
9689 case AMDGPU::S_CMPK_EQ_U32:
9690 case AMDGPU::S_CMPK_EQ_I32:
9691 case AMDGPU::S_CMPK_LG_U32:
9692 case AMDGPU::S_CMPK_LG_I32:
9693 case AMDGPU::S_CMPK_LT_U32:
9694 case AMDGPU::S_CMPK_LT_I32:
9695 case AMDGPU::S_CMPK_GT_U32:
9696 case AMDGPU::S_CMPK_GT_I32:
9697 case AMDGPU::S_CMPK_LE_U32:
9698 case AMDGPU::S_CMPK_LE_I32:
9699 case AMDGPU::S_CMPK_GE_U32:
9700 case AMDGPU::S_CMPK_GE_I32:
9701 SrcReg =
MI.getOperand(0).getReg();
9703 CmpValue =
MI.getOperand(1).getImm();
9721 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9722 this](int64_t ExpectedValue,
unsigned SrcSize,
9723 bool IsReversible,
bool IsSigned) ->
bool {
9748 if (!Def || Def->getParent() != CmpInstr.
getParent())
9751 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9752 Def->getOpcode() != AMDGPU::S_AND_B64)
9756 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9767 SrcOp = &Def->getOperand(2);
9768 else if (isMask(&Def->getOperand(2)))
9769 SrcOp = &Def->getOperand(1);
9774 if (IsSigned && BitNo == SrcSize - 1)
9777 ExpectedValue <<= BitNo;
9779 bool IsReversedCC =
false;
9780 if (CmpValue != ExpectedValue) {
9783 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9788 Register DefReg = Def->getOperand(0).getReg();
9789 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9792 for (
auto I = std::next(Def->getIterator()),
E = CmpInstr.
getIterator();
9794 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9795 I->killsRegister(AMDGPU::SCC, &RI))
9799 MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC);
9803 if (!
MRI->use_nodbg_empty(DefReg)) {
9811 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9812 : AMDGPU::S_BITCMP1_B32
9813 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9814 : AMDGPU::S_BITCMP1_B64;
9819 Def->eraseFromParent();
9827 case AMDGPU::S_CMP_EQ_U32:
9828 case AMDGPU::S_CMP_EQ_I32:
9829 case AMDGPU::S_CMPK_EQ_U32:
9830 case AMDGPU::S_CMPK_EQ_I32:
9831 return optimizeCmpAnd(1, 32,
true,
false);
9832 case AMDGPU::S_CMP_GE_U32:
9833 case AMDGPU::S_CMPK_GE_U32:
9834 return optimizeCmpAnd(1, 32,
false,
false);
9835 case AMDGPU::S_CMP_GE_I32:
9836 case AMDGPU::S_CMPK_GE_I32:
9837 return optimizeCmpAnd(1, 32,
false,
true);
9838 case AMDGPU::S_CMP_EQ_U64:
9839 return optimizeCmpAnd(1, 64,
true,
false);
9840 case AMDGPU::S_CMP_LG_U32:
9841 case AMDGPU::S_CMP_LG_I32:
9842 case AMDGPU::S_CMPK_LG_U32:
9843 case AMDGPU::S_CMPK_LG_I32:
9844 return optimizeCmpAnd(0, 32,
true,
false);
9845 case AMDGPU::S_CMP_GT_U32:
9846 case AMDGPU::S_CMPK_GT_U32:
9847 return optimizeCmpAnd(0, 32,
false,
false);
9848 case AMDGPU::S_CMP_GT_I32:
9849 case AMDGPU::S_CMPK_GT_I32:
9850 return optimizeCmpAnd(0, 32,
false,
true);
9851 case AMDGPU::S_CMP_LG_U64:
9852 return optimizeCmpAnd(0, 64,
true,
false);
9877 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9880 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9881 : &AMDGPU::VReg_64_Align2RegClass);
9883 .
addReg(DataReg, 0,
Op.getSubReg())
9888 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.