31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
220 assert(ExitBlocks[0]->getSinglePredecessor());
237 int64_t &Offset1)
const {
245 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
249 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 if (Offset0Idx == -1 || Offset1Idx == -1)
274 Offset0Idx -=
get(Opc0).NumDefs;
275 Offset1Idx -=
get(Opc1).NumDefs;
296 assert(NumOps == 4 || NumOps == 5);
301 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
303 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
305 if (!Load0Offset || !Load1Offset)
325 if (OffIdx0 == -1 || OffIdx1 == -1)
331 OffIdx0 -=
get(Opc0).NumDefs;
332 OffIdx1 -=
get(Opc1).NumDefs;
338 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
351 case AMDGPU::DS_READ2ST64_B32:
352 case AMDGPU::DS_READ2ST64_B64:
353 case AMDGPU::DS_WRITE2ST64_B32:
354 case AMDGPU::DS_WRITE2ST64_B64:
369 OffsetIsScalable =
false;
399 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
400 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
401 if (Offset0 + 1 != Offset1)
420 Offset = EltSize * Offset0;
423 if (DataOpIdx == -1) {
441 if (BaseOp && !BaseOp->
isFI())
449 if (SOffset->
isReg())
468 if (VAddr0Idx >= 0) {
470 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
526 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
534 if (MO1->getAddrSpace() != MO2->getAddrSpace())
537 auto Base1 = MO1->getValue();
538 auto Base2 = MO2->getValue();
539 if (!Base1 || !Base2)
544 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
547 return Base1 == Base2;
551 int64_t Offset1,
bool OffsetIsScalable1,
553 int64_t Offset2,
bool OffsetIsScalable2,
554 unsigned ClusterSize,
555 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
580 const unsigned LoadSize = NumBytes / ClusterSize;
581 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
582 return NumDWORDs <= 8;
596 int64_t Offset0, int64_t Offset1,
597 unsigned NumLoads)
const {
598 assert(Offset1 > Offset0 &&
599 "Second offset should be larger than first offset!");
604 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
611 const char *Msg =
"illegal VGPR to SGPR copy") {
615 C.diagnose(IllegalCopy);
632 assert((
TII.getSubtarget().hasMAIInsts() &&
633 !
TII.getSubtarget().hasGFX90AInsts()) &&
634 "Expected GFX908 subtarget.");
637 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
638 "Source register of the copy should be either an SGPR or an AGPR.");
641 "Destination register of the copy should be an AGPR.");
653 if (!Def->modifiesRegister(SrcReg, &RI))
656 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
657 Def->getOperand(0).getReg() != SrcReg)
664 bool SafeToPropagate =
true;
667 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
668 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
669 SafeToPropagate =
false;
671 if (!SafeToPropagate)
683 if (ImpUseSuperReg) {
684 Builder.
addReg(ImpUseSuperReg,
702 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
706 "VGPR used for an intermediate copy should have been reserved.");
721 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
722 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
723 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
730 if (ImpUseSuperReg) {
731 UseBuilder.
addReg(ImpUseSuperReg,
753 int16_t SubIdx = BaseIndices[
Idx];
754 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
755 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
756 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
757 unsigned Opcode = AMDGPU::S_MOV_B32;
760 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
761 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
762 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
766 DestSubReg = RI.getSubReg(DestReg, SubIdx);
767 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
768 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
769 Opcode = AMDGPU::S_MOV_B64;
784 assert(FirstMI && LastMI);
792 LastMI->addRegisterKilled(SrcReg, &RI);
800 unsigned Size = RI.getRegSizeInBits(*RC);
802 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
808 if (((
Size == 16) != (SrcSize == 16))) {
815 if (DestReg == SrcReg) {
821 RC = RI.getPhysRegBaseClass(DestReg);
822 Size = RI.getRegSizeInBits(*RC);
823 SrcRC = RI.getPhysRegBaseClass(SrcReg);
824 SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (RC == &AMDGPU::VGPR_32RegClass) {
830 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
831 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
832 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
833 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
839 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
840 RC == &AMDGPU::SReg_32RegClass) {
841 if (SrcReg == AMDGPU::SCC) {
848 if (DestReg == AMDGPU::VCC_LO) {
849 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
863 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
873 if (RC == &AMDGPU::SReg_64RegClass) {
874 if (SrcReg == AMDGPU::SCC) {
881 if (DestReg == AMDGPU::VCC) {
882 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
896 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
906 if (DestReg == AMDGPU::SCC) {
909 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 if (RC == &AMDGPU::AGPR_32RegClass) {
928 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
929 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
951 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
952 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
954 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
955 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
956 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
957 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 if (IsAGPRDst || IsAGPRSrc) {
975 if (!DstLow || !SrcLow) {
977 "Cannot use hi16 subreg with an AGPR!");
990 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
991 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1004 if (!DstLow || !SrcLow) {
1006 "Cannot use hi16 subreg on VI!");
1057 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1063 unsigned EltSize = 4;
1064 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1067 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1070 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1072 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1074 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1080 Opcode = AMDGPU::V_MOV_B64_e32;
1083 Opcode = AMDGPU::V_PK_MOV_B32;
1093 std::unique_ptr<RegScavenger> RS;
1094 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1101 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1102 const bool CanKillSuperReg = KillSrc && !Overlap;
1107 SubIdx = SubIndices[
Idx];
1109 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1110 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1111 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1112 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1114 bool IsFirstSubreg =
Idx == 0;
1115 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1117 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1121 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1122 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1169 int64_t
Value)
const {
1172 if (RegClass == &AMDGPU::SReg_32RegClass ||
1173 RegClass == &AMDGPU::SGPR_32RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1175 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1181 if (RegClass == &AMDGPU::SReg_64RegClass ||
1182 RegClass == &AMDGPU::SGPR_64RegClass ||
1183 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1189 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1200 unsigned EltSize = 4;
1201 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1203 if (RI.getRegSizeInBits(*RegClass) > 32) {
1204 Opcode = AMDGPU::S_MOV_B64;
1207 Opcode = AMDGPU::S_MOV_B32;
1214 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1217 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1218 Builder.
addImm(IdxValue);
1224 return &AMDGPU::VGPR_32RegClass;
1236 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1237 "Not a VGPR32 reg");
1239 if (
Cond.size() == 1) {
1240 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1249 }
else if (
Cond.size() == 2) {
1251 switch (
Cond[0].getImm()) {
1252 case SIInstrInfo::SCC_TRUE: {
1253 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 : AMDGPU::S_CSELECT_B64), SReg)
1266 case SIInstrInfo::SCC_FALSE: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 : AMDGPU::S_CSELECT_B64), SReg)
1280 case SIInstrInfo::VCCNZ: {
1283 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1294 case SIInstrInfo::VCCZ: {
1297 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1308 case SIInstrInfo::EXECNZ: {
1309 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1312 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1315 : AMDGPU::S_CSELECT_B64), SReg)
1326 case SIInstrInfo::EXECZ: {
1327 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1330 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1333 : AMDGPU::S_CSELECT_B64), SReg)
1382 return AMDGPU::COPY;
1383 if (RI.getRegSizeInBits(*DstRC) == 16) {
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1390 return AMDGPU::S_MOV_B64;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1394 return AMDGPU::COPY;
1399 bool IsIndirectSrc)
const {
1400 if (IsIndirectSrc) {
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1451 if (VecSize <= 1024)
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1480 if (VecSize <= 1024)
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1524 if (VecSize <= 1024)
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1532 bool IsSGPR)
const {
1544 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1551 return AMDGPU::SI_SPILL_S32_SAVE;
1553 return AMDGPU::SI_SPILL_S64_SAVE;
1555 return AMDGPU::SI_SPILL_S96_SAVE;
1557 return AMDGPU::SI_SPILL_S128_SAVE;
1559 return AMDGPU::SI_SPILL_S160_SAVE;
1561 return AMDGPU::SI_SPILL_S192_SAVE;
1563 return AMDGPU::SI_SPILL_S224_SAVE;
1565 return AMDGPU::SI_SPILL_S256_SAVE;
1567 return AMDGPU::SI_SPILL_S288_SAVE;
1569 return AMDGPU::SI_SPILL_S320_SAVE;
1571 return AMDGPU::SI_SPILL_S352_SAVE;
1573 return AMDGPU::SI_SPILL_S384_SAVE;
1575 return AMDGPU::SI_SPILL_S512_SAVE;
1577 return AMDGPU::SI_SPILL_S1024_SAVE;
1586 return AMDGPU::SI_SPILL_V32_SAVE;
1588 return AMDGPU::SI_SPILL_V64_SAVE;
1590 return AMDGPU::SI_SPILL_V96_SAVE;
1592 return AMDGPU::SI_SPILL_V128_SAVE;
1594 return AMDGPU::SI_SPILL_V160_SAVE;
1596 return AMDGPU::SI_SPILL_V192_SAVE;
1598 return AMDGPU::SI_SPILL_V224_SAVE;
1600 return AMDGPU::SI_SPILL_V256_SAVE;
1602 return AMDGPU::SI_SPILL_V288_SAVE;
1604 return AMDGPU::SI_SPILL_V320_SAVE;
1606 return AMDGPU::SI_SPILL_V352_SAVE;
1608 return AMDGPU::SI_SPILL_V384_SAVE;
1610 return AMDGPU::SI_SPILL_V512_SAVE;
1612 return AMDGPU::SI_SPILL_V1024_SAVE;
1621 return AMDGPU::SI_SPILL_A32_SAVE;
1623 return AMDGPU::SI_SPILL_A64_SAVE;
1625 return AMDGPU::SI_SPILL_A96_SAVE;
1627 return AMDGPU::SI_SPILL_A128_SAVE;
1629 return AMDGPU::SI_SPILL_A160_SAVE;
1631 return AMDGPU::SI_SPILL_A192_SAVE;
1633 return AMDGPU::SI_SPILL_A224_SAVE;
1635 return AMDGPU::SI_SPILL_A256_SAVE;
1637 return AMDGPU::SI_SPILL_A288_SAVE;
1639 return AMDGPU::SI_SPILL_A320_SAVE;
1641 return AMDGPU::SI_SPILL_A352_SAVE;
1643 return AMDGPU::SI_SPILL_A384_SAVE;
1645 return AMDGPU::SI_SPILL_A512_SAVE;
1647 return AMDGPU::SI_SPILL_A1024_SAVE;
1656 return AMDGPU::SI_SPILL_AV32_SAVE;
1658 return AMDGPU::SI_SPILL_AV64_SAVE;
1660 return AMDGPU::SI_SPILL_AV96_SAVE;
1662 return AMDGPU::SI_SPILL_AV128_SAVE;
1664 return AMDGPU::SI_SPILL_AV160_SAVE;
1666 return AMDGPU::SI_SPILL_AV192_SAVE;
1668 return AMDGPU::SI_SPILL_AV224_SAVE;
1670 return AMDGPU::SI_SPILL_AV256_SAVE;
1672 return AMDGPU::SI_SPILL_AV288_SAVE;
1674 return AMDGPU::SI_SPILL_AV320_SAVE;
1676 return AMDGPU::SI_SPILL_AV352_SAVE;
1678 return AMDGPU::SI_SPILL_AV384_SAVE;
1680 return AMDGPU::SI_SPILL_AV512_SAVE;
1682 return AMDGPU::SI_SPILL_AV1024_SAVE;
1689 bool IsVectorSuperClass) {
1694 if (IsVectorSuperClass)
1695 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1697 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1711 if (IsVectorSuperClass)
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize =
TRI->getSpillSize(*RC);
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1748 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1763 SpillSize, RI, *MFI);
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V32_RESTORE;
1814 return AMDGPU::SI_SPILL_V64_RESTORE;
1816 return AMDGPU::SI_SPILL_V96_RESTORE;
1818 return AMDGPU::SI_SPILL_V128_RESTORE;
1820 return AMDGPU::SI_SPILL_V160_RESTORE;
1822 return AMDGPU::SI_SPILL_V192_RESTORE;
1824 return AMDGPU::SI_SPILL_V224_RESTORE;
1826 return AMDGPU::SI_SPILL_V256_RESTORE;
1828 return AMDGPU::SI_SPILL_V288_RESTORE;
1830 return AMDGPU::SI_SPILL_V320_RESTORE;
1832 return AMDGPU::SI_SPILL_V352_RESTORE;
1834 return AMDGPU::SI_SPILL_V384_RESTORE;
1836 return AMDGPU::SI_SPILL_V512_RESTORE;
1838 return AMDGPU::SI_SPILL_V1024_RESTORE;
1847 return AMDGPU::SI_SPILL_A32_RESTORE;
1849 return AMDGPU::SI_SPILL_A64_RESTORE;
1851 return AMDGPU::SI_SPILL_A96_RESTORE;
1853 return AMDGPU::SI_SPILL_A128_RESTORE;
1855 return AMDGPU::SI_SPILL_A160_RESTORE;
1857 return AMDGPU::SI_SPILL_A192_RESTORE;
1859 return AMDGPU::SI_SPILL_A224_RESTORE;
1861 return AMDGPU::SI_SPILL_A256_RESTORE;
1863 return AMDGPU::SI_SPILL_A288_RESTORE;
1865 return AMDGPU::SI_SPILL_A320_RESTORE;
1867 return AMDGPU::SI_SPILL_A352_RESTORE;
1869 return AMDGPU::SI_SPILL_A384_RESTORE;
1871 return AMDGPU::SI_SPILL_A512_RESTORE;
1873 return AMDGPU::SI_SPILL_A1024_RESTORE;
1882 return AMDGPU::SI_SPILL_AV32_RESTORE;
1884 return AMDGPU::SI_SPILL_AV64_RESTORE;
1886 return AMDGPU::SI_SPILL_AV96_RESTORE;
1888 return AMDGPU::SI_SPILL_AV128_RESTORE;
1890 return AMDGPU::SI_SPILL_AV160_RESTORE;
1892 return AMDGPU::SI_SPILL_AV192_RESTORE;
1894 return AMDGPU::SI_SPILL_AV224_RESTORE;
1896 return AMDGPU::SI_SPILL_AV256_RESTORE;
1898 return AMDGPU::SI_SPILL_AV288_RESTORE;
1900 return AMDGPU::SI_SPILL_AV320_RESTORE;
1902 return AMDGPU::SI_SPILL_AV352_RESTORE;
1904 return AMDGPU::SI_SPILL_AV384_RESTORE;
1906 return AMDGPU::SI_SPILL_AV512_RESTORE;
1908 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1915 bool IsVectorSuperClass) {
1920 if (IsVectorSuperClass)
1921 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1923 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1930 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1936 if (IsVectorSuperClass)
1953 unsigned SpillSize =
TRI->getSpillSize(*RC);
1960 FrameInfo.getObjectAlign(FrameIndex));
1964 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1965 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1966 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1971 if (DestReg.
isVirtual() && SpillSize == 4) {
1973 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1987 SpillSize, RI, *MFI);
2002 unsigned Quantity)
const {
2004 while (Quantity > 0) {
2005 unsigned Arg = std::min(Quantity, 8u);
2019 if (HasNoTerminator) {
2020 if (
Info->returnsVoid()) {
2030 switch (
MI.getOpcode()) {
2032 if (
MI.isMetaInstruction())
2037 return MI.getOperand(0).getImm() + 1;
2047 switch (
MI.getOpcode()) {
2049 case AMDGPU::S_MOV_B64_term:
2052 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2055 case AMDGPU::S_MOV_B32_term:
2058 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2061 case AMDGPU::S_XOR_B64_term:
2064 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2067 case AMDGPU::S_XOR_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2072 case AMDGPU::S_OR_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_OR_B64));
2077 case AMDGPU::S_OR_B32_term:
2080 MI.setDesc(
get(AMDGPU::S_OR_B32));
2083 case AMDGPU::S_ANDN2_B64_term:
2086 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2089 case AMDGPU::S_ANDN2_B32_term:
2092 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2095 case AMDGPU::S_AND_B64_term:
2098 MI.setDesc(
get(AMDGPU::S_AND_B64));
2101 case AMDGPU::S_AND_B32_term:
2104 MI.setDesc(
get(AMDGPU::S_AND_B32));
2107 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2113 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2119 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2120 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2123 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2124 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 case AMDGPU::V_MOV_B64_PSEUDO: {
2129 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2130 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2136 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2141 if (
SrcOp.isImm()) {
2143 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2144 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2187 MI.eraseFromParent();
2190 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2194 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2199 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2204 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2205 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2207 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2208 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2215 MI.eraseFromParent();
2218 case AMDGPU::V_SET_INACTIVE_B32: {
2219 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2220 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2224 .
add(
MI.getOperand(1));
2228 .
add(
MI.getOperand(2));
2231 MI.eraseFromParent();
2234 case AMDGPU::V_SET_INACTIVE_B64: {
2235 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2236 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2238 MI.getOperand(0).getReg())
2239 .
add(
MI.getOperand(1));
2244 MI.getOperand(0).getReg())
2245 .
add(
MI.getOperand(2));
2249 MI.eraseFromParent();
2252 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2285 Opc = AMDGPU::V_MOVRELD_B32_e32;
2287 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2288 : AMDGPU::S_MOVRELD_B32;
2293 bool IsUndef =
MI.getOperand(1).isUndef();
2294 unsigned SubReg =
MI.getOperand(3).getImm();
2295 assert(VecReg ==
MI.getOperand(1).getReg());
2300 .
add(
MI.getOperand(2))
2304 const int ImpDefIdx =
2306 const int ImpUseIdx = ImpDefIdx + 1;
2308 MI.eraseFromParent();
2311 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2325 bool IsUndef =
MI.getOperand(1).isUndef();
2334 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2338 .
add(
MI.getOperand(2))
2343 const int ImpDefIdx =
2345 const int ImpUseIdx = ImpDefIdx + 1;
2352 MI.eraseFromParent();
2355 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2366 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2370 bool IsUndef =
MI.getOperand(1).isUndef();
2388 MI.eraseFromParent();
2391 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2394 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2395 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2418 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2425 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2435 MI.eraseFromParent();
2438 case AMDGPU::ENTER_STRICT_WWM: {
2442 : AMDGPU::S_OR_SAVEEXEC_B64));
2445 case AMDGPU::ENTER_STRICT_WQM: {
2448 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2449 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2450 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2454 MI.eraseFromParent();
2457 case AMDGPU::EXIT_STRICT_WWM:
2458 case AMDGPU::EXIT_STRICT_WQM: {
2461 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2464 case AMDGPU::ENTER_PSEUDO_WM:
2465 case AMDGPU::EXIT_PSEUDO_WM: {
2467 MI.eraseFromParent();
2470 case AMDGPU::SI_RETURN: {
2484 MI.eraseFromParent();
2488 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2489 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2490 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2493 case AMDGPU::S_GETPC_B64_pseudo:
2494 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2497 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2519 case AMDGPU::S_LOAD_DWORDX16_IMM:
2520 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2533 for (
auto &CandMO :
I->operands()) {
2534 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2542 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2550 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2552 unsigned NewOpcode = -1;
2553 if (SubregSize == 256)
2554 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2555 else if (SubregSize == 128)
2556 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2563 MRI.setRegClass(DestReg, NewRC);
2566 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2571 MI->getOperand(0).setReg(DestReg);
2572 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2576 OffsetMO->
setImm(FinalOffset);
2582 MI->setMemRefs(*MF, NewMMOs);
2595std::pair<MachineInstr*, MachineInstr*>
2597 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2602 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2603 return std::pair(&
MI,
nullptr);
2614 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2616 if (Dst.isPhysical()) {
2617 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2620 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2624 for (
unsigned I = 1;
I <= 2; ++
I) {
2627 if (
SrcOp.isImm()) {
2629 Imm.ashrInPlace(Part * 32);
2630 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2634 if (Src.isPhysical())
2635 MovDPP.addReg(RI.getSubReg(Src, Sub));
2642 MovDPP.addImm(MO.getImm());
2644 Split[Part] = MovDPP;
2648 if (Dst.isVirtual())
2655 MI.eraseFromParent();
2656 return std::pair(Split[0], Split[1]);
2659std::optional<DestSourcePair>
2661 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2664 return std::nullopt;
2669 unsigned Src0OpName,
2671 unsigned Src1OpName)
const {
2678 "All commutable instructions have both src0 and src1 modifiers");
2680 int Src0ModsVal = Src0Mods->
getImm();
2681 int Src1ModsVal = Src1Mods->
getImm();
2683 Src1Mods->
setImm(Src0ModsVal);
2684 Src0Mods->
setImm(Src1ModsVal);
2693 bool IsKill = RegOp.
isKill();
2695 bool IsUndef = RegOp.
isUndef();
2696 bool IsDebug = RegOp.
isDebug();
2698 if (NonRegOp.
isImm())
2700 else if (NonRegOp.
isFI())
2719 unsigned Src1Idx)
const {
2720 assert(!NewMI &&
"this should never be used");
2722 unsigned Opc =
MI.getOpcode();
2724 if (CommutedOpcode == -1)
2727 if (Src0Idx > Src1Idx)
2731 static_cast<int>(Src0Idx) &&
2733 static_cast<int>(Src1Idx) &&
2734 "inconsistency with findCommutedOpIndices");
2761 Src1, AMDGPU::OpName::src1_modifiers);
2773 unsigned &SrcOpIdx0,
2774 unsigned &SrcOpIdx1)
const {
2779 unsigned &SrcOpIdx0,
2780 unsigned &SrcOpIdx1)
const {
2781 if (!
Desc.isCommutable())
2784 unsigned Opc =
Desc.getOpcode();
2793 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2797 int64_t BrOffset)
const {
2800 assert(BranchOp != AMDGPU::S_SETPC_B64);
2814 return MI.getOperand(0).getMBB();
2819 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2820 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2821 MI.getOpcode() == AMDGPU::SI_LOOP)
2832 assert(RS &&
"RegScavenger required for long branching");
2834 "new block should be inserted for expanding unconditional branch");
2837 "restore block should be inserted for restoring clobbered registers");
2845 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2855 MCCtx.createTempSymbol(
"post_getpc",
true);
2859 MCCtx.createTempSymbol(
"offset_lo",
true);
2861 MCCtx.createTempSymbol(
"offset_hi",
true);
2864 .
addReg(PCReg, 0, AMDGPU::sub0)
2868 .
addReg(PCReg, 0, AMDGPU::sub1)
2910 if (LongBranchReservedReg) {
2912 Scav = LongBranchReservedReg;
2921 MRI.replaceRegWith(PCReg, Scav);
2922 MRI.clearVirtRegs();
2928 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2929 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2930 MRI.clearVirtRegs();
2945unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2947 case SIInstrInfo::SCC_TRUE:
2948 return AMDGPU::S_CBRANCH_SCC1;
2949 case SIInstrInfo::SCC_FALSE:
2950 return AMDGPU::S_CBRANCH_SCC0;
2951 case SIInstrInfo::VCCNZ:
2952 return AMDGPU::S_CBRANCH_VCCNZ;
2953 case SIInstrInfo::VCCZ:
2954 return AMDGPU::S_CBRANCH_VCCZ;
2955 case SIInstrInfo::EXECNZ:
2956 return AMDGPU::S_CBRANCH_EXECNZ;
2957 case SIInstrInfo::EXECZ:
2958 return AMDGPU::S_CBRANCH_EXECZ;
2964SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2966 case AMDGPU::S_CBRANCH_SCC0:
2968 case AMDGPU::S_CBRANCH_SCC1:
2970 case AMDGPU::S_CBRANCH_VCCNZ:
2972 case AMDGPU::S_CBRANCH_VCCZ:
2974 case AMDGPU::S_CBRANCH_EXECNZ:
2976 case AMDGPU::S_CBRANCH_EXECZ:
2988 bool AllowModify)
const {
2989 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2991 TBB =
I->getOperand(0).getMBB();
2997 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2998 CondBB =
I->getOperand(1).getMBB();
2999 Cond.push_back(
I->getOperand(0));
3001 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3002 if (Pred == INVALID_BR)
3005 CondBB =
I->getOperand(0).getMBB();
3007 Cond.push_back(
I->getOperand(1));
3017 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3019 FBB =
I->getOperand(0).getMBB();
3029 bool AllowModify)
const {
3037 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
3038 switch (
I->getOpcode()) {
3039 case AMDGPU::S_MOV_B64_term:
3040 case AMDGPU::S_XOR_B64_term:
3041 case AMDGPU::S_OR_B64_term:
3042 case AMDGPU::S_ANDN2_B64_term:
3043 case AMDGPU::S_AND_B64_term:
3044 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3045 case AMDGPU::S_MOV_B32_term:
3046 case AMDGPU::S_XOR_B32_term:
3047 case AMDGPU::S_OR_B32_term:
3048 case AMDGPU::S_ANDN2_B32_term:
3049 case AMDGPU::S_AND_B32_term:
3050 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3053 case AMDGPU::SI_ELSE:
3054 case AMDGPU::SI_KILL_I1_TERMINATOR:
3055 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3072 int *BytesRemoved)
const {
3074 unsigned RemovedSize = 0;
3077 if (
MI.isBranch() ||
MI.isReturn()) {
3079 MI.eraseFromParent();
3085 *BytesRemoved = RemovedSize;
3102 int *BytesAdded)
const {
3103 if (!FBB &&
Cond.empty()) {
3111 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3121 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3158 if (
Cond.size() != 2) {
3173 Register FalseReg,
int &CondCycles,
3174 int &TrueCycles,
int &FalseCycles)
const {
3175 switch (
Cond[0].getImm()) {
3180 if (
MRI.getRegClass(FalseReg) != RC)
3184 CondCycles = TrueCycles = FalseCycles = NumInsts;
3187 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3195 if (
MRI.getRegClass(FalseReg) != RC)
3201 if (NumInsts % 2 == 0)
3204 CondCycles = TrueCycles = FalseCycles = NumInsts;
3216 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3217 if (Pred == VCCZ || Pred == SCC_FALSE) {
3218 Pred =
static_cast<BranchPredicate
>(-Pred);
3224 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3226 if (DstSize == 32) {
3228 if (Pred == SCC_TRUE) {
3243 if (DstSize == 64 && Pred == SCC_TRUE) {
3253 static const int16_t Sub0_15[] = {
3254 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3255 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3256 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3257 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3260 static const int16_t Sub0_15_64[] = {
3261 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3262 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3263 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3264 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3267 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3269 const int16_t *SubIndices = Sub0_15;
3270 int NElts = DstSize / 32;
3274 if (Pred == SCC_TRUE) {
3276 SelOp = AMDGPU::S_CSELECT_B32;
3277 EltRC = &AMDGPU::SGPR_32RegClass;
3279 SelOp = AMDGPU::S_CSELECT_B64;
3280 EltRC = &AMDGPU::SGPR_64RegClass;
3281 SubIndices = Sub0_15_64;
3287 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3292 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3293 Register DstElt =
MRI.createVirtualRegister(EltRC);
3296 unsigned SubIdx = SubIndices[
Idx];
3299 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3302 .
addReg(FalseReg, 0, SubIdx)
3303 .
addReg(TrueReg, 0, SubIdx);
3307 .
addReg(TrueReg, 0, SubIdx)
3308 .
addReg(FalseReg, 0, SubIdx);
3320 switch (
MI.getOpcode()) {
3321 case AMDGPU::V_MOV_B32_e32:
3322 case AMDGPU::V_MOV_B32_e64:
3323 case AMDGPU::V_MOV_B64_PSEUDO:
3324 case AMDGPU::V_MOV_B64_e32:
3325 case AMDGPU::V_MOV_B64_e64:
3326 case AMDGPU::S_MOV_B32:
3327 case AMDGPU::S_MOV_B64:
3328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3330 case AMDGPU::WWM_COPY:
3331 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3332 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3333 case AMDGPU::V_ACCVGPR_MOV_B32:
3341 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3342 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3343 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3346 unsigned Opc =
MI.getOpcode();
3350 MI.removeOperand(
Idx);
3356 if (!
MRI->hasOneNonDBGUse(Reg))
3359 switch (
DefMI.getOpcode()) {
3362 case AMDGPU::V_MOV_B64_e32:
3363 case AMDGPU::S_MOV_B64:
3364 case AMDGPU::V_MOV_B64_PSEUDO:
3365 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3366 case AMDGPU::V_MOV_B32_e32:
3367 case AMDGPU::S_MOV_B32:
3368 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3375 if (!ImmOp->
isImm())
3378 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3379 int64_t Imm = ImmOp->
getImm();
3380 switch (UseOp.getSubReg()) {
3391 case AMDGPU::sub1_lo16:
3393 case AMDGPU::sub1_hi16:
3398 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3400 unsigned Opc =
UseMI.getOpcode();
3401 if (Opc == AMDGPU::COPY) {
3402 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3406 bool Is16Bit = OpSize == 2;
3407 bool Is64Bit = OpSize == 8;
3409 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3410 : AMDGPU::V_MOV_B32_e32
3411 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3412 : AMDGPU::S_MOV_B32;
3413 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3418 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3425 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3428 UseMI.getOperand(0).setSubReg(0);
3431 UseMI.getOperand(0).setReg(DstReg);
3441 UseMI.setDesc(NewMCID);
3442 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3447 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3448 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3449 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3450 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3451 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3466 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3467 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3469 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3470 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3471 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3479 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3480 if (!RegSrc->
isReg())
3498 if (Def && Def->isMoveImmediate() &&
3503 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3505 : AMDGPU::V_FMAMK_F16)
3506 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3513 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3516 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3522 unsigned SrcSubReg = RegSrc->
getSubReg();
3527 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3528 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3529 Opc == AMDGPU::V_FMAC_F16_e64)
3530 UseMI.untieRegOperand(
3533 Src1->ChangeToImmediate(Imm);
3538 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3540 DefMI.eraseFromParent();
3550 bool Src0Inlined =
false;
3551 if (Src0->
isReg()) {
3556 if (Def && Def->isMoveImmediate() &&
3568 if (Src1->
isReg() && !Src0Inlined) {
3571 if (Def && Def->isMoveImmediate() &&
3582 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3584 : AMDGPU::V_FMAAK_F16)
3585 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3592 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3598 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3599 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3600 Opc == AMDGPU::V_FMAC_F16_e64)
3601 UseMI.untieRegOperand(
3615 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3617 DefMI.eraseFromParent();
3629 if (BaseOps1.
size() != BaseOps2.
size())
3631 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3632 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3639 int WidthB,
int OffsetB) {
3640 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3641 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3642 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3643 return LowOffset + LowWidth <= HighOffset;
3646bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3649 int64_t Offset0, Offset1;
3651 bool Offset0IsScalable, Offset1IsScalable;
3665 unsigned Width0 = MIa.
memoperands().front()->getSize();
3666 unsigned Width1 = MIb.
memoperands().front()->getSize();
3673 "MIa must load from or modify a memory location");
3675 "MIb must load from or modify a memory location");
3694 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3701 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3711 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3725 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3736 if (Reg.isPhysical())
3738 auto *Def =
MRI.getUniqueVRegDef(Reg);
3740 Imm = Def->getOperand(1).getImm();
3760 unsigned NumOps =
MI.getNumOperands();
3761 for (
unsigned I = 1;
I < NumOps; ++
I) {
3763 if (
Op.isReg() &&
Op.isKill())
3773 unsigned Opc =
MI.getOpcode();
3777 if (NewMFMAOpc != -1) {
3780 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3781 MIB.
add(
MI.getOperand(
I));
3787 if (Def.isEarlyClobber() && Def.isReg() &&
3792 auto UpdateDefIndex = [&](
LiveRange &LR) {
3793 auto S = LR.
find(OldIndex);
3794 if (S != LR.end() && S->start == OldIndex) {
3795 assert(S->valno && S->valno->def == OldIndex);
3796 S->start = NewIndex;
3797 S->valno->def = NewIndex;
3801 for (
auto &SR : LI.subranges())
3812 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3822 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3823 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3827 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3828 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3829 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3830 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3831 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3832 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3833 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3834 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3835 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3836 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3837 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3838 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3839 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3840 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3841 bool Src0Literal =
false;
3846 case AMDGPU::V_MAC_F16_e64:
3847 case AMDGPU::V_FMAC_F16_e64:
3848 case AMDGPU::V_FMAC_F16_t16_e64:
3849 case AMDGPU::V_MAC_F32_e64:
3850 case AMDGPU::V_MAC_LEGACY_F32_e64:
3851 case AMDGPU::V_FMAC_F32_e64:
3852 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3853 case AMDGPU::V_FMAC_F64_e64:
3855 case AMDGPU::V_MAC_F16_e32:
3856 case AMDGPU::V_FMAC_F16_e32:
3857 case AMDGPU::V_MAC_F32_e32:
3858 case AMDGPU::V_MAC_LEGACY_F32_e32:
3859 case AMDGPU::V_FMAC_F32_e32:
3860 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3861 case AMDGPU::V_FMAC_F64_e32: {
3863 AMDGPU::OpName::src0);
3890 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3896 const auto killDef = [&]() ->
void {
3900 if (!
MRI.hasOneNonDBGUse(DefReg))
3914 : AMDGPU::V_FMAAK_F16)
3915 : AMDGPU::V_FMAAK_F32)
3916 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3932 : AMDGPU::V_FMAMK_F16)
3933 : AMDGPU::V_FMAMK_F32)
3934 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3978 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
3979 : IsF64 ? AMDGPU::V_FMA_F64_e64
3981 ? AMDGPU::V_FMA_LEGACY_F32_e64
3982 : AMDGPU::V_FMA_F32_e64
3983 : IsF16 ? AMDGPU::V_MAD_F16_e64
3984 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
3985 : AMDGPU::V_MAD_F32_e64;
4000 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4011 switch (
MI.getOpcode()) {
4012 case AMDGPU::S_SET_GPR_IDX_ON:
4013 case AMDGPU::S_SET_GPR_IDX_MODE:
4014 case AMDGPU::S_SET_GPR_IDX_OFF:
4032 if (
MI.isTerminator() ||
MI.isPosition())
4036 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4039 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4045 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4046 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4047 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4048 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4053 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4064 unsigned Opcode =
MI.getOpcode();
4079 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4081 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4082 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4085 if (
MI.isCall() ||
MI.isInlineAsm())
4097 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4098 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4099 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4100 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4108 if (
MI.isMetaInstruction())
4112 if (
MI.isCopyLike()) {
4117 return MI.readsRegister(AMDGPU::EXEC, &RI);
4128 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4132 switch (Imm.getBitWidth()) {
4152 APInt IntImm = Imm.bitcastToAPInt();
4171 uint8_t OperandType)
const {
4172 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4181 int64_t Imm = MO.
getImm();
4182 switch (OperandType) {
4195 int32_t Trunc =
static_cast<int32_t
>(Imm);
4235 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4240 int16_t Trunc =
static_cast<int16_t
>(Imm);
4251 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4252 int16_t Trunc =
static_cast<int16_t
>(Imm);
4312 AMDGPU::OpName::src2))
4328 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4348 return Mods && Mods->
getImm();
4361 switch (
MI.getOpcode()) {
4362 default:
return false;
4364 case AMDGPU::V_ADDC_U32_e64:
4365 case AMDGPU::V_SUBB_U32_e64:
4366 case AMDGPU::V_SUBBREV_U32_e64: {
4374 case AMDGPU::V_MAC_F16_e64:
4375 case AMDGPU::V_MAC_F32_e64:
4376 case AMDGPU::V_MAC_LEGACY_F32_e64:
4377 case AMDGPU::V_FMAC_F16_e64:
4378 case AMDGPU::V_FMAC_F16_t16_e64:
4379 case AMDGPU::V_FMAC_F32_e64:
4380 case AMDGPU::V_FMAC_F64_e64:
4381 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4387 case AMDGPU::V_CNDMASK_B32_e64:
4418 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4427 unsigned Op32)
const {
4437 Inst32.
add(
MI.getOperand(0));
4441 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
4442 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4456 if (Op32Src2Idx != -1) {
4486 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4491 return MO.
getReg() == AMDGPU::M0 ||
4492 MO.
getReg() == AMDGPU::VCC ||
4493 MO.
getReg() == AMDGPU::VCC_LO;
4495 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4496 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4506 switch (MO.getReg()) {
4508 case AMDGPU::VCC_LO:
4509 case AMDGPU::VCC_HI:
4511 case AMDGPU::FLAT_SCR:
4524 switch (
MI.getOpcode()) {
4525 case AMDGPU::V_READLANE_B32:
4526 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4527 case AMDGPU::V_WRITELANE_B32:
4528 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4535 if (
MI.isPreISelOpcode() ||
4536 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4547 if (
SubReg.getReg().isPhysical())
4550 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4557 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4567 if (Src0Idx == -1) {
4577 if (!
Desc.isVariadic() &&
4578 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4579 ErrInfo =
"Instruction has wrong number of operands.";
4583 if (
MI.isInlineAsm()) {
4596 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4597 ErrInfo =
"inlineasm operand has incorrect register class.";
4605 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4606 ErrInfo =
"missing memory operand from image instruction.";
4611 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4614 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4615 "all fp values to integers.";
4619 int RegClass =
Desc.operands()[i].RegClass;
4621 switch (
Desc.operands()[i].OperandType) {
4623 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4624 ErrInfo =
"Illegal immediate value for operand.";
4645 ErrInfo =
"Illegal immediate value for operand.";
4652 ErrInfo =
"Expected inline constant for operand.";
4661 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4662 ErrInfo =
"Expected immediate, but got non-immediate";
4684 RI.getSubRegisterClass(RC, MO.
getSubReg());
4692 ErrInfo =
"Subtarget requires even aligned vector registers";
4697 if (RegClass != -1) {
4698 if (Reg.isVirtual())
4703 ErrInfo =
"Operand has incorrect register class.";
4712 ErrInfo =
"SDWA is not supported on this target";
4718 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4726 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4733 "Only reg allowed as operands in SDWA instructions on GFX9+";
4742 if (OMod !=
nullptr &&
4744 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4749 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4750 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4751 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4752 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4755 unsigned Mods = Src0ModsMO->
getImm();
4758 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4764 if (
isVOPC(BasicOpcode)) {
4768 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4769 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4775 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4776 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4782 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4783 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4790 if (DstUnused && DstUnused->isImm() &&
4793 if (!Dst.isReg() || !Dst.isTied()) {
4794 ErrInfo =
"Dst register should have tied register";
4799 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4802 "Dst register should be tied to implicit use of preserved register";
4805 Dst.getReg() != TiedMO.
getReg()) {
4806 ErrInfo =
"Dst register should use same physical register as preserved";
4838 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4839 if (RegCount > DstSize) {
4840 ErrInfo =
"Image instruction returns too many registers for dst "
4849 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4850 unsigned ConstantBusCount = 0;
4851 bool UsesLiteral =
false;
4858 LiteralVal = &
MI.getOperand(ImmIdx);
4867 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4885 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4895 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4896 return !RI.regsOverlap(SGPRUsed, SGPR);
4906 Opcode != AMDGPU::V_WRITELANE_B32) {
4907 ErrInfo =
"VOP* instruction violates constant bus restriction";
4912 ErrInfo =
"VOP3 instruction uses literal";
4919 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4920 unsigned SGPRCount = 0;
4923 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4931 if (MO.
getReg() != SGPRUsed)
4937 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4944 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4945 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4952 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4962 ErrInfo =
"ABS not allowed in VOP3B instructions";
4975 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4982 if (
Desc.isBranch()) {
4984 ErrInfo =
"invalid branch target for SOPK instruction";
4990 if (!isUInt<16>(Imm)) {
4991 ErrInfo =
"invalid immediate for SOPK instruction";
4995 if (!isInt<16>(Imm)) {
4996 ErrInfo =
"invalid immediate for SOPK instruction";
5003 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5004 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5005 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5006 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5007 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5008 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5010 const unsigned StaticNumOps =
5011 Desc.getNumOperands() +
Desc.implicit_uses().size();
5012 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5017 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5018 ErrInfo =
"missing implicit register operands";
5024 if (!Dst->isUse()) {
5025 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5030 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5031 UseOpIdx != StaticNumOps + 1) {
5032 ErrInfo =
"movrel implicit operands should be tied";
5039 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5041 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5042 ErrInfo =
"src0 should be subreg of implicit vector use";
5050 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5051 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5057 if (
MI.mayStore() &&
5062 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5063 ErrInfo =
"scalar stores must use m0 as offset register";
5071 if (
Offset->getImm() != 0) {
5072 ErrInfo =
"subtarget does not support offsets in flat instructions";
5079 if (GDSOp && GDSOp->
getImm() != 0) {
5080 ErrInfo =
"GDS is not supported on this subtarget";
5089 AMDGPU::OpName::vaddr0);
5091 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5100 ErrInfo =
"dim is out of range";
5107 IsA16 = R128A16->
getImm() != 0;
5108 }
else if (ST.
hasA16()) {
5110 IsA16 = A16->
getImm() != 0;
5113 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5115 unsigned AddrWords =
5118 unsigned VAddrWords;
5120 VAddrWords = RsrcIdx - VAddr0Idx;
5123 unsigned LastVAddrIdx = RsrcIdx - 1;
5124 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5132 if (VAddrWords != AddrWords) {
5134 <<
" but got " << VAddrWords <<
"\n");
5135 ErrInfo =
"bad vaddr size";
5143 using namespace AMDGPU::DPP;
5145 unsigned DC = DppCt->
getImm();
5146 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5147 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5148 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5149 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5150 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5151 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5152 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5153 ErrInfo =
"Invalid dpp_ctrl value";
5156 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5158 ErrInfo =
"Invalid dpp_ctrl value: "
5159 "wavefront shifts are not supported on GFX10+";
5162 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5164 ErrInfo =
"Invalid dpp_ctrl value: "
5165 "broadcasts are not supported on GFX10+";
5168 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5170 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5171 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5173 ErrInfo =
"Invalid dpp_ctrl value: "
5174 "row_newbroadcast/row_share is not supported before "
5177 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5178 ErrInfo =
"Invalid dpp_ctrl value: "
5179 "row_share and row_xmask are not supported before GFX10";
5184 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5186 ErrInfo =
"Invalid dpp_ctrl value: "
5187 "DP ALU dpp only support row_newbcast";
5194 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5195 : AMDGPU::OpName::vdata;
5204 ErrInfo =
"Invalid register class: "
5205 "vdata and vdst should be both VGPR or AGPR";
5208 if (
Data && Data2 &&
5210 ErrInfo =
"Invalid register class: "
5211 "both data operands should be VGPR or AGPR";
5215 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5218 ErrInfo =
"Invalid register class: "
5219 "agpr loads and stores not supported on this GPU";
5226 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5231 if (Reg.isPhysical())
5238 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5239 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5240 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5242 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5243 ErrInfo =
"Subtarget requires even aligned vector registers "
5244 "for DS_GWS instructions";
5250 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5251 ErrInfo =
"Subtarget requires even aligned vector registers "
5252 "for vaddr operand of image instructions";
5258 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5261 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5262 ErrInfo =
"Invalid register class: "
5263 "v_accvgpr_write with an SGPR is not supported on this GPU";
5268 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5271 ErrInfo =
"pseudo expects only physical SGPRs";
5283 switch (
MI.getOpcode()) {
5284 default:
return AMDGPU::INSTRUCTION_LIST_END;
5285 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5286 case AMDGPU::COPY:
return AMDGPU::COPY;
5287 case AMDGPU::PHI:
return AMDGPU::PHI;
5288 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5289 case AMDGPU::WQM:
return AMDGPU::WQM;
5290 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5291 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5292 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5293 case AMDGPU::S_MOV_B32: {
5295 return MI.getOperand(1).isReg() ||
5297 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5299 case AMDGPU::S_ADD_I32:
5300 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5301 case AMDGPU::S_ADDC_U32:
5302 return AMDGPU::V_ADDC_U32_e32;
5303 case AMDGPU::S_SUB_I32:
5304 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5307 case AMDGPU::S_ADD_U32:
5308 return AMDGPU::V_ADD_CO_U32_e32;
5309 case AMDGPU::S_SUB_U32:
5310 return AMDGPU::V_SUB_CO_U32_e32;
5311 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5312 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5313 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5314 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5315 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5316 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5317 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5318 case AMDGPU::S_XNOR_B32:
5319 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5320 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5321 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5322 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5323 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5324 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5325 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5326 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5327 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5328 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5329 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5330 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5331 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5332 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5333 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5334 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5335 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5336 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5337 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5338 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5339 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5340 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5341 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5342 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5343 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5344 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5345 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5346 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5347 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5348 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5349 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5350 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5351 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5352 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5353 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5354 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5355 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5356 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5357 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5358 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5359 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5360 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5361 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5362 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5363 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5364 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5365 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5366 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5367 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5368 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5369 case AMDGPU::S_CEIL_F16:
5371 : AMDGPU::V_CEIL_F16_fake16_e64;
5372 case AMDGPU::S_FLOOR_F16:
5374 : AMDGPU::V_FLOOR_F16_fake16_e64;
5375 case AMDGPU::S_TRUNC_F16:
5376 return AMDGPU::V_TRUNC_F16_fake16_e64;
5377 case AMDGPU::S_RNDNE_F16:
5378 return AMDGPU::V_RNDNE_F16_fake16_e64;
5379 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5380 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5381 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5382 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5383 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5384 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5385 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5386 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5387 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5388 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5389 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5390 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5391 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5392 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5393 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5394 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5395 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5396 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5397 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5398 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5399 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5400 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5401 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5402 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5403 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5404 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5405 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5406 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5407 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5408 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5409 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5410 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5411 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5412 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5413 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5414 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5415 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5416 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5417 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5418 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5419 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5420 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5421 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5422 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5423 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5424 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5425 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5426 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5427 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5428 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5429 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5430 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5431 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5432 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5433 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5434 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5435 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5438 "Unexpected scalar opcode without corresponding vector one!");
5451 bool IsWave32 = ST.isWave32();
5456 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5457 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5466 const unsigned OrSaveExec =
5467 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5480 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5482 auto ExecRestoreMI =
5492 bool IsAllocatable) {
5493 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5498 case AMDGPU::AV_32RegClassID:
5499 RCID = AMDGPU::VGPR_32RegClassID;
5501 case AMDGPU::AV_64RegClassID:
5502 RCID = AMDGPU::VReg_64RegClassID;
5504 case AMDGPU::AV_96RegClassID:
5505 RCID = AMDGPU::VReg_96RegClassID;
5507 case AMDGPU::AV_128RegClassID:
5508 RCID = AMDGPU::VReg_128RegClassID;
5510 case AMDGPU::AV_160RegClassID:
5511 RCID = AMDGPU::VReg_160RegClassID;
5513 case AMDGPU::AV_512RegClassID:
5514 RCID = AMDGPU::VReg_512RegClassID;
5530 auto RegClass = TID.
operands()[OpNum].RegClass;
5531 bool IsAllocatable =
false;
5541 AMDGPU::OpName::vdst);
5544 : AMDGPU::OpName::vdata);
5545 if (DataIdx != -1) {
5547 TID.
Opcode, AMDGPU::OpName::data1);
5555 unsigned OpNo)
const {
5558 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5559 Desc.operands()[OpNo].RegClass == -1) {
5562 if (Reg.isVirtual())
5563 return MRI.getRegClass(Reg);
5564 return RI.getPhysRegBaseClass(Reg);
5567 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5576 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5578 unsigned Size = RI.getRegSizeInBits(*RC);
5579 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5581 Opcode = AMDGPU::COPY;
5583 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5600 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
5610 Register NewSuperReg =
MRI.createVirtualRegister(SuperRC);
5616 .
addReg(NewSuperReg, 0, SubIdx);
5626 if (SubIdx == AMDGPU::sub0)
5628 if (SubIdx == AMDGPU::sub1)
5640void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5656 if (Reg.isPhysical())
5667 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5694 MO = &
MI.getOperand(OpIdx);
5706 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5712 if (!SGPRsUsed.
count(SGPR) &&
5715 if (--ConstantBusLimit <= 0)
5721 if (!LiteralLimit--)
5723 if (--ConstantBusLimit <= 0)
5737 unsigned Opc =
MI.getOpcode();
5745 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5746 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5747 MI.getOperand(DataIdx).isReg() &&
5748 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5750 if ((
int)OpIdx == DataIdx) {
5751 if (VDstIdx != -1 &&
5752 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5756 AMDGPU::OpName::data1);
5757 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5758 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5761 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5771 bool Is64BitOp = Is64BitFPOp ||
5784 if (!Is64BitFPOp && (int32_t)Imm < 0)
5802 unsigned Opc =
MI.getOpcode();
5821 if (Opc == AMDGPU::V_WRITELANE_B32) {
5824 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5830 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5847 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5849 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5861 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5863 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5875 if (HasImplicitSGPR || !
MI.isCommutable()) {
5892 if (CommutedOpc == -1) {
5897 MI.setDesc(
get(CommutedOpc));
5901 bool Src0Kill = Src0.
isKill();
5905 else if (Src1.
isReg()) {
5920 unsigned Opc =
MI.getOpcode();
5928 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5929 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5935 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5941 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5952 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
5954 SGPRsUsed.
insert(SGPRReg);
5958 for (
int Idx : VOP3Idx) {
5967 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
5992 if (ConstantBusLimit > 0) {
6004 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6005 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6014 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6018 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6020 get(TargetOpcode::COPY), NewSrcReg)
6027 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6033 for (
unsigned i = 0; i < SubRegs; ++i) {
6034 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6036 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6043 get(AMDGPU::REG_SEQUENCE), DstReg);
6044 for (
unsigned i = 0; i < SubRegs; ++i) {
6059 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6061 SBase->setReg(SGPR);
6073 if (OldSAddrIdx < 0)
6090 if (NewVAddrIdx < 0)
6097 if (OldVAddrIdx >= 0) {
6099 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6100 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6111 if (OldVAddrIdx == NewVAddrIdx) {
6114 MRI.removeRegOperandFromUseList(&NewVAddr);
6115 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6119 MRI.removeRegOperandFromUseList(&NewVAddr);
6120 MRI.addRegOperandToUseList(&NewVAddr);
6122 assert(OldSAddrIdx == NewVAddrIdx);
6124 if (OldVAddrIdx >= 0) {
6126 AMDGPU::OpName::vdst_in);
6130 if (NewVDstIn != -1) {
6137 if (NewVDstIn != -1) {
6176 unsigned OpSubReg =
Op.getSubReg();
6185 Register DstReg =
MRI.createVirtualRegister(DstRC);
6196 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6199 bool ImpDef = Def->isImplicitDef();
6200 while (!ImpDef && Def && Def->isCopy()) {
6201 if (Def->getOperand(1).getReg().isPhysical())
6203 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6204 ImpDef = Def && Def->isImplicitDef();
6206 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6221 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6222 unsigned SaveExecOpc =
6223 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6224 unsigned XorTermOpc =
6225 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6227 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6228 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6236 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6237 unsigned NumSubRegs =
RegSize / 32;
6238 Register VScalarOp = ScalarOp->getReg();
6240 if (NumSubRegs == 1) {
6241 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6243 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6246 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6248 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6254 CondReg = NewCondReg;
6256 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6264 ScalarOp->setReg(CurReg);
6265 ScalarOp->setIsKill();
6268 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6269 "Unhandled register size");
6271 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6272 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6273 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6276 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6277 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6280 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6281 .
addReg(VScalarOp, VScalarOpUndef,
6282 TRI->getSubRegFromChannel(
Idx + 1));
6288 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6289 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6295 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6296 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6299 if (NumSubRegs <= 2)
6300 Cmp.addReg(VScalarOp);
6302 Cmp.addReg(VScalarOp, VScalarOpUndef,
6303 TRI->getSubRegFromChannel(
Idx, 2));
6307 CondReg = NewCondReg;
6309 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6318 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6319 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6323 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6324 unsigned Channel = 0;
6325 for (
Register Piece : ReadlanePieces) {
6326 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6330 ScalarOp->setReg(SScalarOp);
6331 ScalarOp->setIsKill();
6335 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6336 MRI.setSimpleHint(SaveExec, CondReg);
6367 if (!Begin.isValid())
6369 if (!
End.isValid()) {
6374 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6375 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6376 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6383 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6389 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6398 for (
auto I = Begin;
I != AfterMI;
I++) {
6399 for (
auto &MO :
I->all_uses())
6400 MRI.clearKillFlags(MO.getReg());
6435 for (
auto &Succ : RemainderBB->
successors()) {
6458static std::tuple<unsigned, unsigned>
6466 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6467 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6470 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6471 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6472 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6473 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6474 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6482 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6486 .
addImm(RsrcDataFormat >> 32);
6491 .
addImm(AMDGPU::sub0_sub1)
6497 return std::tuple(RsrcPtr, NewSRsrc);
6534 if (
MI.getOpcode() == AMDGPU::PHI) {
6536 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6537 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6540 MRI.getRegClass(
MI.getOperand(i).getReg());
6555 VRC = &AMDGPU::VReg_1RegClass;
6571 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
6573 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6589 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6596 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
6598 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6616 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6621 if (DstRC != Src0RC) {
6630 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6638 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6639 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6640 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6641 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6642 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6657 : AMDGPU::OpName::srsrc;
6662 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6671 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6677 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6678 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6683 while (Start->getOpcode() != FrameSetupOpcode)
6686 while (
End->getOpcode() != FrameDestroyOpcode)
6690 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6691 MI.definesRegister(
End->getOperand(1).getReg()))
6699 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6701 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6712 bool isSoffsetLegal =
true;
6715 if (SoffsetIdx != -1) {
6719 isSoffsetLegal =
false;
6723 bool isRsrcLegal =
true;
6726 if (RsrcIdx != -1) {
6729 isRsrcLegal =
false;
6734 if (isRsrcLegal && isSoffsetLegal)
6758 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6759 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6760 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6762 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6763 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6764 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6766 unsigned RsrcPtr, NewSRsrc;
6773 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6780 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6798 "FIXME: Need to emit flat atomics here");
6800 unsigned RsrcPtr, NewSRsrc;
6803 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6826 MIB.
addImm(CPol->getImm());
6831 MIB.
addImm(TFE->getImm());
6851 MI.removeFromParent();
6856 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6858 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6862 if (!isSoffsetLegal) {
6874 if (!isSoffsetLegal) {
6883 InstrList.insert(
MI);
6887 if (RsrcIdx != -1) {
6888 DeferredList.insert(
MI);
6893 return DeferredList.contains(
MI);
6899 while (!Worklist.
empty()) {
6913 "Deferred MachineInstr are not supposed to re-populate worklist");
6931 case AMDGPU::S_ADD_U64_PSEUDO:
6932 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6934 case AMDGPU::S_SUB_U64_PSEUDO:
6935 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6937 case AMDGPU::S_ADD_I32:
6938 case AMDGPU::S_SUB_I32: {
6942 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
6950 case AMDGPU::S_MUL_U64:
6952 splitScalarSMulU64(Worklist, Inst, MDT);
6956 case AMDGPU::S_MUL_U64_U32_PSEUDO:
6957 case AMDGPU::S_MUL_I64_I32_PSEUDO:
6960 splitScalarSMulPseudo(Worklist, Inst, MDT);
6964 case AMDGPU::S_AND_B64:
6965 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
6969 case AMDGPU::S_OR_B64:
6970 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
6974 case AMDGPU::S_XOR_B64:
6975 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
6979 case AMDGPU::S_NAND_B64:
6980 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
6984 case AMDGPU::S_NOR_B64:
6985 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
6989 case AMDGPU::S_XNOR_B64:
6991 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
6993 splitScalar64BitXnor(Worklist, Inst, MDT);
6997 case AMDGPU::S_ANDN2_B64:
6998 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7002 case AMDGPU::S_ORN2_B64:
7003 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7007 case AMDGPU::S_BREV_B64:
7008 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7012 case AMDGPU::S_NOT_B64:
7013 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7017 case AMDGPU::S_BCNT1_I32_B64:
7018 splitScalar64BitBCNT(Worklist, Inst);
7022 case AMDGPU::S_BFE_I64:
7023 splitScalar64BitBFE(Worklist, Inst);
7027 case AMDGPU::S_FLBIT_I32_B64:
7028 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7031 case AMDGPU::S_FF1_I32_B64:
7032 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7036 case AMDGPU::S_LSHL_B32:
7038 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7042 case AMDGPU::S_ASHR_I32:
7044 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7048 case AMDGPU::S_LSHR_B32:
7050 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7054 case AMDGPU::S_LSHL_B64:
7057 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7058 : AMDGPU::V_LSHLREV_B64_e64;
7062 case AMDGPU::S_ASHR_I64:
7064 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7068 case AMDGPU::S_LSHR_B64:
7070 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7075 case AMDGPU::S_ABS_I32:
7076 lowerScalarAbs(Worklist, Inst);
7080 case AMDGPU::S_CBRANCH_SCC0:
7081 case AMDGPU::S_CBRANCH_SCC1: {
7084 bool IsSCC = CondReg == AMDGPU::SCC;
7087 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7090 .
addReg(IsSCC ? VCC : CondReg);
7094 case AMDGPU::S_BFE_U64:
7095 case AMDGPU::S_BFM_B64:
7098 case AMDGPU::S_PACK_LL_B32_B16:
7099 case AMDGPU::S_PACK_LH_B32_B16:
7100 case AMDGPU::S_PACK_HL_B32_B16:
7101 case AMDGPU::S_PACK_HH_B32_B16:
7102 movePackToVALU(Worklist,
MRI, Inst);
7106 case AMDGPU::S_XNOR_B32:
7107 lowerScalarXnor(Worklist, Inst);
7111 case AMDGPU::S_NAND_B32:
7112 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7116 case AMDGPU::S_NOR_B32:
7117 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7121 case AMDGPU::S_ANDN2_B32:
7122 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7126 case AMDGPU::S_ORN2_B32:
7127 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7135 case AMDGPU::S_ADD_CO_PSEUDO:
7136 case AMDGPU::S_SUB_CO_PSEUDO: {
7137 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7138 ? AMDGPU::V_ADDC_U32_e64
7139 : AMDGPU::V_SUBB_U32_e64;
7140 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7143 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7144 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7162 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7166 case AMDGPU::S_UADDO_PSEUDO:
7167 case AMDGPU::S_USUBO_PSEUDO: {
7174 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7175 ? AMDGPU::V_ADD_CO_U32_e64
7176 : AMDGPU::V_SUB_CO_U32_e64;
7179 Register DestReg =
MRI.createVirtualRegister(NewRC);
7187 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7194 case AMDGPU::S_CSELECT_B32:
7195 case AMDGPU::S_CSELECT_B64:
7196 lowerSelect(Worklist, Inst, MDT);
7199 case AMDGPU::S_CMP_EQ_I32:
7200 case AMDGPU::S_CMP_LG_I32:
7201 case AMDGPU::S_CMP_GT_I32:
7202 case AMDGPU::S_CMP_GE_I32:
7203 case AMDGPU::S_CMP_LT_I32:
7204 case AMDGPU::S_CMP_LE_I32:
7205 case AMDGPU::S_CMP_EQ_U32:
7206 case AMDGPU::S_CMP_LG_U32:
7207 case AMDGPU::S_CMP_GT_U32:
7208 case AMDGPU::S_CMP_GE_U32:
7209 case AMDGPU::S_CMP_LT_U32:
7210 case AMDGPU::S_CMP_LE_U32:
7211 case AMDGPU::S_CMP_EQ_U64:
7212 case AMDGPU::S_CMP_LG_U64:
7213 case AMDGPU::S_CMP_LT_F32:
7214 case AMDGPU::S_CMP_EQ_F32:
7215 case AMDGPU::S_CMP_LE_F32:
7216 case AMDGPU::S_CMP_GT_F32:
7217 case AMDGPU::S_CMP_LG_F32:
7218 case AMDGPU::S_CMP_GE_F32:
7219 case AMDGPU::S_CMP_O_F32:
7220 case AMDGPU::S_CMP_U_F32:
7221 case AMDGPU::S_CMP_NGE_F32:
7222 case AMDGPU::S_CMP_NLG_F32:
7223 case AMDGPU::S_CMP_NGT_F32:
7224 case AMDGPU::S_CMP_NLE_F32:
7225 case AMDGPU::S_CMP_NEQ_F32:
7226 case AMDGPU::S_CMP_NLT_F32:
7227 case AMDGPU::S_CMP_LT_F16:
7228 case AMDGPU::S_CMP_EQ_F16:
7229 case AMDGPU::S_CMP_LE_F16:
7230 case AMDGPU::S_CMP_GT_F16:
7231 case AMDGPU::S_CMP_LG_F16:
7232 case AMDGPU::S_CMP_GE_F16:
7233 case AMDGPU::S_CMP_O_F16:
7234 case AMDGPU::S_CMP_U_F16:
7235 case AMDGPU::S_CMP_NGE_F16:
7236 case AMDGPU::S_CMP_NLG_F16:
7237 case AMDGPU::S_CMP_NGT_F16:
7238 case AMDGPU::S_CMP_NLE_F16:
7239 case AMDGPU::S_CMP_NEQ_F16:
7240 case AMDGPU::S_CMP_NLT_F16: {
7246 AMDGPU::OpName::src0_modifiers) >= 0) {
7261 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7265 case AMDGPU::S_CVT_HI_F32_F16: {
7267 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7268 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7279 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7283 case AMDGPU::S_MINIMUM_F32:
7284 case AMDGPU::S_MAXIMUM_F32:
7285 case AMDGPU::S_MINIMUM_F16:
7286 case AMDGPU::S_MAXIMUM_F16: {
7288 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7299 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7305 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7313 if (NewOpcode == Opcode) {
7337 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7349 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7350 MRI.replaceRegWith(DstReg, NewDstReg);
7352 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7363 AMDGPU::OpName::src0_modifiers) >= 0)
7368 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7369 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7371 NewInstr->addOperand(Src);
7374 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7377 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7379 NewInstr.addImm(
Size);
7380 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7384 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7389 "Scalar BFE is only implemented for constant width and offset");
7398 AMDGPU::OpName::src1_modifiers) >= 0)
7403 AMDGPU::OpName::src2_modifiers) >= 0)
7417 NewInstr->addOperand(
Op);
7424 if (
Op.getReg() == AMDGPU::SCC) {
7426 if (
Op.isDef() && !
Op.isDead())
7427 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7429 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7434 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7435 Register DstReg = NewInstr->getOperand(0).getReg();
7440 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7441 MRI.replaceRegWith(DstReg, NewDstReg);
7447 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7451std::pair<bool, MachineBasicBlock *>
7463 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7466 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7468 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7469 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7477 MRI.replaceRegWith(OldDstReg, ResultReg);
7480 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7481 return std::pair(
true, NewBB);
7484 return std::pair(
false,
nullptr);
7501 bool IsSCC = (CondReg == AMDGPU::SCC);
7509 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7517 NewCondReg =
MRI.createVirtualRegister(TC);
7521 bool CopyFound =
false;
7525 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC,
false,
false, &RI) !=
7527 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7529 .
addReg(CandI.getOperand(1).getReg());
7541 : AMDGPU::S_CSELECT_B32;
7551 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7552 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7565 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7567 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7579 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7580 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7583 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7593 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7594 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7609 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7617 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7618 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7624 bool Src0IsSGPR = Src0.
isReg() &&
7626 bool Src1IsSGPR = Src1.
isReg() &&
7629 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7630 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7640 }
else if (Src1IsSGPR) {
7654 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7658 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7664 unsigned Opcode)
const {
7674 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7675 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7687 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7688 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7693 unsigned Opcode)
const {
7703 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7704 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7716 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7717 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7735 &AMDGPU::SGPR_32RegClass;
7738 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7741 AMDGPU::sub0, Src0SubRC);
7746 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7748 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7752 AMDGPU::sub1, Src0SubRC);
7754 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7760 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7767 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7769 Worklist.
insert(&LoHalf);
7770 Worklist.
insert(&HiHalf);
7776 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7787 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7788 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7789 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7800 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7804 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7834 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7840 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7846 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7857 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7873 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7885 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7896 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7897 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7898 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7909 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7913 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7925 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7926 ? AMDGPU::V_MUL_HI_U32_e64
7927 : AMDGPU::V_MUL_HI_I32_e64;
7942 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7950 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7969 &AMDGPU::SGPR_32RegClass;
7972 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7975 &AMDGPU::SGPR_32RegClass;
7978 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7981 AMDGPU::sub0, Src0SubRC);
7983 AMDGPU::sub0, Src1SubRC);
7985 AMDGPU::sub1, Src0SubRC);
7987 AMDGPU::sub1, Src1SubRC);
7992 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7994 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7999 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8004 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8011 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8013 Worklist.
insert(&LoHalf);
8014 Worklist.
insert(&HiHalf);
8017 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8035 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8051 Register NewDest =
MRI.createVirtualRegister(DestRC);
8057 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8075 MRI.getRegClass(Src.getReg()) :
8076 &AMDGPU::SGPR_32RegClass;
8078 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8079 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8082 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8085 AMDGPU::sub0, SrcSubRC);
8087 AMDGPU::sub1, SrcSubRC);
8093 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8097 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8116 Offset == 0 &&
"Not implemented");
8119 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8120 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8121 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8138 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8139 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8144 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8145 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8149 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8152 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8157 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8158 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8179 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8180 unsigned OpcodeAdd =
8181 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8184 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8186 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8193 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8194 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8195 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8196 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8203 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8209 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8211 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8213 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8216void SIInstrInfo::addUsersToMoveToVALUWorklist(
8220 E =
MRI.use_end();
I !=
E;) {
8225 switch (
UseMI.getOpcode()) {
8228 case AMDGPU::SOFT_WQM:
8229 case AMDGPU::STRICT_WWM:
8230 case AMDGPU::STRICT_WQM:
8231 case AMDGPU::REG_SEQUENCE:
8233 case AMDGPU::INSERT_SUBREG:
8236 OpNo =
I.getOperandNo();
8245 }
while (
I !=
E &&
I->getParent() == &
UseMI);
8255 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8262 case AMDGPU::S_PACK_LL_B32_B16: {
8263 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8264 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8281 case AMDGPU::S_PACK_LH_B32_B16: {
8282 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8291 case AMDGPU::S_PACK_HL_B32_B16: {
8292 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8302 case AMDGPU::S_PACK_HH_B32_B16: {
8303 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8304 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8321 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8322 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8331 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8332 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8340 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC,
false, &RI);
8344 Register DestReg =
MI.getOperand(0).getReg();
8346 MRI.replaceRegWith(DestReg, NewCond);
8351 MI.getOperand(SCCIdx).setReg(NewCond);
8357 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC,
false,
false, &RI) != -1)
8360 for (
auto &Copy : CopyToDelete)
8361 Copy->eraseFromParent();
8369void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8378 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8380 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8397 case AMDGPU::REG_SEQUENCE:
8398 case AMDGPU::INSERT_SUBREG:
8400 case AMDGPU::SOFT_WQM:
8401 case AMDGPU::STRICT_WWM:
8402 case AMDGPU::STRICT_WQM: {
8410 case AMDGPU::REG_SEQUENCE:
8411 case AMDGPU::INSERT_SUBREG:
8421 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8438 int OpIndices[3])
const {
8457 for (
unsigned i = 0; i < 3; ++i) {
8458 int Idx = OpIndices[i];
8495 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8496 SGPRReg = UsedSGPRs[0];
8499 if (!SGPRReg && UsedSGPRs[1]) {
8500 if (UsedSGPRs[1] == UsedSGPRs[2])
8501 SGPRReg = UsedSGPRs[1];
8508 unsigned OperandName)
const {
8513 return &
MI.getOperand(
Idx);
8530 RsrcDataFormat |= (1ULL << 56);
8535 RsrcDataFormat |= (2ULL << 59);
8538 return RsrcDataFormat;
8560 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8566 unsigned Opc =
MI.getOpcode();
8572 return get(Opc).mayLoad() &&
8577 int &FrameIndex)
const {
8585 FrameIndex =
Addr->getIndex();
8590 int &FrameIndex)
const {
8593 FrameIndex =
Addr->getIndex();
8598 int &FrameIndex)
const {
8612 int &FrameIndex)
const {
8629 while (++
I !=
E &&
I->isInsideBundle()) {
8630 assert(!
I->isBundle() &&
"No nested bundle!");
8638 unsigned Opc =
MI.getOpcode();
8640 unsigned DescSize =
Desc.getSize();
8645 unsigned Size = DescSize;
8660 bool HasLiteral =
false;
8661 for (
int I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I) {
8669 return HasLiteral ? DescSize + 4 : DescSize;
8679 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8683 case TargetOpcode::BUNDLE:
8685 case TargetOpcode::INLINEASM:
8686 case TargetOpcode::INLINEASM_BR: {
8688 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8692 if (
MI.isMetaInstruction())
8702 if (
MI.memoperands_empty())
8713 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8725 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8728 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8729 .
add(Branch->getOperand(0))
8730 .
add(Branch->getOperand(1));
8732 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8751 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8756 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8758 if (PMBB == LoopEnd) {
8759 HeaderPHIBuilder.
addReg(BackEdgeReg);
8764 HeaderPHIBuilder.
addReg(ZeroReg);
8766 HeaderPHIBuilder.
addMBB(PMBB);
8770 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8772 .
add(Branch->getOperand(0));
8774 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8780 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8781 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8787 static const std::pair<int, const char *> TargetIndices[] = {
8825std::pair<unsigned, unsigned>
8832 static const std::pair<unsigned, const char *> TargetFlags[] = {
8847 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8861 return AMDGPU::WWM_COPY;
8863 return AMDGPU::COPY;
8874 bool IsNullOrVectorRegister =
true;
8883 return IsNullOrVectorRegister &&
8884 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8885 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8898 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8929 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8930 case AMDGPU::SI_KILL_I1_TERMINATOR:
8939 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8940 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8941 case AMDGPU::SI_KILL_I1_PSEUDO:
8942 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
8954 const unsigned OffsetBits =
8956 return (1 << OffsetBits) - 1;
8963 if (
MI.isInlineAsm())
8966 for (
auto &
Op :
MI.implicit_operands()) {
8967 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
8968 Op.setReg(AMDGPU::VCC_LO);
8981 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
8999 if (Imm <= MaxImm + 64) {
9001 Overflow = Imm - MaxImm;
9091std::pair<int64_t, int64_t>
9094 int64_t RemainderOffset = COffsetVal;
9095 int64_t ImmField = 0;
9100 if (AllowNegative) {
9102 int64_t
D = 1LL << NumBits;
9103 RemainderOffset = (COffsetVal /
D) *
D;
9104 ImmField = COffsetVal - RemainderOffset;
9108 (ImmField % 4) != 0) {
9110 RemainderOffset += ImmField % 4;
9111 ImmField -= ImmField % 4;
9113 }
else if (COffsetVal >= 0) {
9114 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9115 RemainderOffset = COffsetVal - ImmField;
9119 assert(RemainderOffset + ImmField == COffsetVal);
9120 return {ImmField, RemainderOffset};
9132 switch (ST.getGeneration()) {
9157 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9158 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9159 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9160 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9161 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9162 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9163 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9164 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9200 if (
isMAI(Opcode)) {
9245 for (
unsigned I = 0,
E = (
MI.getNumOperands() - 1)/ 2;
I <
E; ++
I)
9246 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9247 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9259 switch (
MI.getOpcode()) {
9261 case AMDGPU::REG_SEQUENCE:
9265 case AMDGPU::INSERT_SUBREG:
9266 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9283 if (!
P.Reg.isVirtual())
9287 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9288 while (
auto *
MI = DefInst) {
9290 switch (
MI->getOpcode()) {
9292 case AMDGPU::V_MOV_B32_e32: {
9293 auto &Op1 =
MI->getOperand(1);
9298 DefInst =
MRI.getVRegDef(RSR.Reg);
9306 DefInst =
MRI.getVRegDef(RSR.Reg);
9319 assert(
MRI.isSSA() &&
"Must be run on SSA");
9321 auto *
TRI =
MRI.getTargetRegisterInfo();
9322 auto *DefBB =
DefMI.getParent();
9326 if (
UseMI.getParent() != DefBB)
9329 const int MaxInstScan = 20;
9333 auto E =
UseMI.getIterator();
9334 for (
auto I = std::next(
DefMI.getIterator());
I !=
E; ++
I) {
9335 if (
I->isDebugInstr())
9338 if (++NumInst > MaxInstScan)
9341 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9351 assert(
MRI.isSSA() &&
"Must be run on SSA");
9353 auto *
TRI =
MRI.getTargetRegisterInfo();
9354 auto *DefBB =
DefMI.getParent();
9356 const int MaxUseScan = 10;
9359 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9360 auto &UseInst = *
Use.getParent();
9363 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9366 if (++NumUse > MaxUseScan)
9373 const int MaxInstScan = 20;
9377 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9380 if (
I->isDebugInstr())
9383 if (++NumInst > MaxInstScan)
9396 if (Reg == VReg && --NumUse == 0)
9398 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9410 if (!Cur->isPHI() && Cur->readsRegister(Dst))
9413 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9422 if (InsPt !=
MBB.
end() &&
9423 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9424 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9425 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9426 InsPt->definesRegister(Src)) {
9430 : AMDGPU::S_MOV_B64_term),
9432 .
addReg(Src, 0, SrcSubReg)
9457 if (isFullCopyInstr(
MI)) {
9466 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9469 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9480 unsigned *PredCost)
const {
9481 if (
MI.isBundle()) {
9484 unsigned Lat = 0, Count = 0;
9485 for (++
I;
I !=
E &&
I->isBundledWithPred(); ++
I) {
9487 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9489 return Lat + Count - 1;
9492 return SchedModel.computeInstrLatency(&
MI);
9497 unsigned opcode =
MI.getOpcode();
9498 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9499 auto IID = GI->getIntrinsicID();
9506 case Intrinsic::amdgcn_if:
9507 case Intrinsic::amdgcn_else:
9521 if (opcode == AMDGPU::G_LOAD) {
9522 if (
MI.memoperands_empty())
9526 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9527 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9535 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9536 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9537 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9550 unsigned opcode =
MI.getOpcode();
9551 if (opcode == AMDGPU::V_READLANE_B32 ||
9552 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9553 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9556 if (isCopyInstr(
MI)) {
9560 RI.getPhysRegBaseClass(srcOp.
getReg());
9568 if (
MI.isPreISelOpcode())
9583 if (
MI.memoperands_empty())
9587 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9588 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9603 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I) {
9609 if (!Reg || !
SrcOp.readsReg())
9615 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9652 Register &SrcReg2, int64_t &CmpMask,
9653 int64_t &CmpValue)
const {
9654 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9657 switch (
MI.getOpcode()) {
9660 case AMDGPU::S_CMP_EQ_U32:
9661 case AMDGPU::S_CMP_EQ_I32:
9662 case AMDGPU::S_CMP_LG_U32:
9663 case AMDGPU::S_CMP_LG_I32:
9664 case AMDGPU::S_CMP_LT_U32:
9665 case AMDGPU::S_CMP_LT_I32:
9666 case AMDGPU::S_CMP_GT_U32:
9667 case AMDGPU::S_CMP_GT_I32:
9668 case AMDGPU::S_CMP_LE_U32:
9669 case AMDGPU::S_CMP_LE_I32:
9670 case AMDGPU::S_CMP_GE_U32:
9671 case AMDGPU::S_CMP_GE_I32:
9672 case AMDGPU::S_CMP_EQ_U64:
9673 case AMDGPU::S_CMP_LG_U64:
9674 SrcReg =
MI.getOperand(0).getReg();
9675 if (
MI.getOperand(1).isReg()) {
9676 if (
MI.getOperand(1).getSubReg())
9678 SrcReg2 =
MI.getOperand(1).getReg();
9680 }
else if (
MI.getOperand(1).isImm()) {
9682 CmpValue =
MI.getOperand(1).getImm();
9688 case AMDGPU::S_CMPK_EQ_U32:
9689 case AMDGPU::S_CMPK_EQ_I32:
9690 case AMDGPU::S_CMPK_LG_U32:
9691 case AMDGPU::S_CMPK_LG_I32:
9692 case AMDGPU::S_CMPK_LT_U32:
9693 case AMDGPU::S_CMPK_LT_I32:
9694 case AMDGPU::S_CMPK_GT_U32:
9695 case AMDGPU::S_CMPK_GT_I32:
9696 case AMDGPU::S_CMPK_LE_U32:
9697 case AMDGPU::S_CMPK_LE_I32:
9698 case AMDGPU::S_CMPK_GE_U32:
9699 case AMDGPU::S_CMPK_GE_I32:
9700 SrcReg =
MI.getOperand(0).getReg();
9702 CmpValue =
MI.getOperand(1).getImm();
9720 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9721 this](int64_t ExpectedValue,
unsigned SrcSize,
9722 bool IsReversible,
bool IsSigned) ->
bool {
9747 if (!Def || Def->getParent() != CmpInstr.
getParent())
9750 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9751 Def->getOpcode() != AMDGPU::S_AND_B64)
9755 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9766 SrcOp = &Def->getOperand(2);
9767 else if (isMask(&Def->getOperand(2)))
9768 SrcOp = &Def->getOperand(1);
9773 if (IsSigned && BitNo == SrcSize - 1)
9776 ExpectedValue <<= BitNo;
9778 bool IsReversedCC =
false;
9779 if (CmpValue != ExpectedValue) {
9782 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9787 Register DefReg = Def->getOperand(0).getReg();
9788 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9791 for (
auto I = std::next(Def->getIterator()),
E = CmpInstr.
getIterator();
9793 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9794 I->killsRegister(AMDGPU::SCC, &RI))
9798 MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC);
9802 if (!
MRI->use_nodbg_empty(DefReg)) {
9810 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9811 : AMDGPU::S_BITCMP1_B32
9812 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9813 : AMDGPU::S_BITCMP1_B64;
9818 Def->eraseFromParent();
9826 case AMDGPU::S_CMP_EQ_U32:
9827 case AMDGPU::S_CMP_EQ_I32:
9828 case AMDGPU::S_CMPK_EQ_U32:
9829 case AMDGPU::S_CMPK_EQ_I32:
9830 return optimizeCmpAnd(1, 32,
true,
false);
9831 case AMDGPU::S_CMP_GE_U32:
9832 case AMDGPU::S_CMPK_GE_U32:
9833 return optimizeCmpAnd(1, 32,
false,
false);
9834 case AMDGPU::S_CMP_GE_I32:
9835 case AMDGPU::S_CMPK_GE_I32:
9836 return optimizeCmpAnd(1, 32,
false,
true);
9837 case AMDGPU::S_CMP_EQ_U64:
9838 return optimizeCmpAnd(1, 64,
true,
false);
9839 case AMDGPU::S_CMP_LG_U32:
9840 case AMDGPU::S_CMP_LG_I32:
9841 case AMDGPU::S_CMPK_LG_U32:
9842 case AMDGPU::S_CMPK_LG_I32:
9843 return optimizeCmpAnd(0, 32,
true,
false);
9844 case AMDGPU::S_CMP_GT_U32:
9845 case AMDGPU::S_CMPK_GT_U32:
9846 return optimizeCmpAnd(0, 32,
false,
false);
9847 case AMDGPU::S_CMP_GT_I32:
9848 case AMDGPU::S_CMPK_GT_I32:
9849 return optimizeCmpAnd(0, 32,
false,
true);
9850 case AMDGPU::S_CMP_LG_U64:
9851 return optimizeCmpAnd(0, 64,
true,
false);
9876 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9879 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9880 : &AMDGPU::VReg_64_Align2RegClass);
9882 .
addReg(DataReg, 0,
Op.getSubReg())
9887 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.