31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
220 assert(ExitBlocks[0]->getSinglePredecessor());
237 int64_t &Offset1)
const {
245 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
249 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 if (Offset0Idx == -1 || Offset1Idx == -1)
274 Offset0Idx -=
get(Opc0).NumDefs;
275 Offset1Idx -=
get(Opc1).NumDefs;
296 assert(NumOps == 4 || NumOps == 5);
301 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
303 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
305 if (!Load0Offset || !Load1Offset)
325 if (OffIdx0 == -1 || OffIdx1 == -1)
331 OffIdx0 -=
get(Opc0).NumDefs;
332 OffIdx1 -=
get(Opc1).NumDefs;
338 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
351 case AMDGPU::DS_READ2ST64_B32:
352 case AMDGPU::DS_READ2ST64_B64:
353 case AMDGPU::DS_WRITE2ST64_B32:
354 case AMDGPU::DS_WRITE2ST64_B64:
369 OffsetIsScalable =
false;
399 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
400 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
401 if (Offset0 + 1 != Offset1)
420 Offset = EltSize * Offset0;
423 if (DataOpIdx == -1) {
441 if (BaseOp && !BaseOp->
isFI())
449 if (SOffset->
isReg())
468 if (VAddr0Idx >= 0) {
470 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
526 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
534 if (MO1->getAddrSpace() != MO2->getAddrSpace())
537 auto Base1 = MO1->getValue();
538 auto Base2 = MO2->getValue();
539 if (!Base1 || !Base2)
544 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
547 return Base1 == Base2;
551 int64_t Offset1,
bool OffsetIsScalable1,
553 int64_t Offset2,
bool OffsetIsScalable2,
554 unsigned ClusterSize,
555 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
580 const unsigned LoadSize = NumBytes / ClusterSize;
581 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
582 return NumDWORDs <= 8;
596 int64_t Offset0, int64_t Offset1,
597 unsigned NumLoads)
const {
598 assert(Offset1 > Offset0 &&
599 "Second offset should be larger than first offset!");
604 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
611 const char *Msg =
"illegal VGPR to SGPR copy") {
615 C.diagnose(IllegalCopy);
632 assert((
TII.getSubtarget().hasMAIInsts() &&
633 !
TII.getSubtarget().hasGFX90AInsts()) &&
634 "Expected GFX908 subtarget.");
637 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
638 "Source register of the copy should be either an SGPR or an AGPR.");
641 "Destination register of the copy should be an AGPR.");
650 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
653 if (!Def->modifiesRegister(SrcReg, &RI))
656 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
657 Def->getOperand(0).getReg() != SrcReg)
664 bool SafeToPropagate =
true;
667 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
668 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
669 SafeToPropagate =
false;
671 if (!SafeToPropagate)
683 if (ImpUseSuperReg) {
684 Builder.
addReg(ImpUseSuperReg,
702 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
706 "VGPR used for an intermediate copy should have been reserved.");
721 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
722 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
723 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
730 if (ImpUseSuperReg) {
731 UseBuilder.
addReg(ImpUseSuperReg,
753 int16_t SubIdx = BaseIndices[
Idx];
754 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
755 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
756 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
757 unsigned Opcode = AMDGPU::S_MOV_B32;
760 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
761 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
762 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
766 DestSubReg = RI.getSubReg(DestReg, SubIdx);
767 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
768 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
769 Opcode = AMDGPU::S_MOV_B64;
784 assert(FirstMI && LastMI);
792 LastMI->addRegisterKilled(SrcReg, &RI);
800 unsigned Size = RI.getRegSizeInBits(*RC);
802 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
808 if (((
Size == 16) != (SrcSize == 16))) {
815 if (DestReg == SrcReg) {
821 RC = RI.getPhysRegBaseClass(DestReg);
822 Size = RI.getRegSizeInBits(*RC);
823 SrcRC = RI.getPhysRegBaseClass(SrcReg);
824 SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (RC == &AMDGPU::VGPR_32RegClass) {
830 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
831 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
832 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
833 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
839 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
840 RC == &AMDGPU::SReg_32RegClass) {
841 if (SrcReg == AMDGPU::SCC) {
848 if (DestReg == AMDGPU::VCC_LO) {
849 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
863 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
873 if (RC == &AMDGPU::SReg_64RegClass) {
874 if (SrcReg == AMDGPU::SCC) {
881 if (DestReg == AMDGPU::VCC) {
882 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
896 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
906 if (DestReg == AMDGPU::SCC) {
909 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 if (RC == &AMDGPU::AGPR_32RegClass) {
928 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
929 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
951 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
952 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
954 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
955 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
956 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
957 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 if (IsAGPRDst || IsAGPRSrc) {
975 if (!DstLow || !SrcLow) {
977 "Cannot use hi16 subreg with an AGPR!");
990 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
991 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1004 if (!DstLow || !SrcLow) {
1006 "Cannot use hi16 subreg on VI!");
1057 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1063 unsigned EltSize = 4;
1064 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1067 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1070 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1072 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1074 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1080 Opcode = AMDGPU::V_MOV_B64_e32;
1083 Opcode = AMDGPU::V_PK_MOV_B32;
1093 std::unique_ptr<RegScavenger> RS;
1094 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1101 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1102 const bool CanKillSuperReg = KillSrc && !Overlap;
1107 SubIdx = SubIndices[
Idx];
1109 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1110 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1111 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1112 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1114 bool IsFirstSubreg =
Idx == 0;
1115 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1117 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1121 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1122 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1169 int64_t
Value)
const {
1172 if (RegClass == &AMDGPU::SReg_32RegClass ||
1173 RegClass == &AMDGPU::SGPR_32RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1175 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1181 if (RegClass == &AMDGPU::SReg_64RegClass ||
1182 RegClass == &AMDGPU::SGPR_64RegClass ||
1183 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1189 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1200 unsigned EltSize = 4;
1201 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1203 if (RI.getRegSizeInBits(*RegClass) > 32) {
1204 Opcode = AMDGPU::S_MOV_B64;
1207 Opcode = AMDGPU::S_MOV_B32;
1214 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1217 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1218 Builder.
addImm(IdxValue);
1224 return &AMDGPU::VGPR_32RegClass;
1236 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1237 "Not a VGPR32 reg");
1239 if (
Cond.size() == 1) {
1240 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1249 }
else if (
Cond.size() == 2) {
1251 switch (
Cond[0].getImm()) {
1252 case SIInstrInfo::SCC_TRUE: {
1253 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 : AMDGPU::S_CSELECT_B64), SReg)
1266 case SIInstrInfo::SCC_FALSE: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 : AMDGPU::S_CSELECT_B64), SReg)
1280 case SIInstrInfo::VCCNZ: {
1283 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1294 case SIInstrInfo::VCCZ: {
1297 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1308 case SIInstrInfo::EXECNZ: {
1309 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1312 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1315 : AMDGPU::S_CSELECT_B64), SReg)
1326 case SIInstrInfo::EXECZ: {
1327 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1330 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1333 : AMDGPU::S_CSELECT_B64), SReg)
1382 return AMDGPU::COPY;
1383 if (RI.getRegSizeInBits(*DstRC) == 16) {
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1390 return AMDGPU::S_MOV_B64;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1394 return AMDGPU::COPY;
1399 bool IsIndirectSrc)
const {
1400 if (IsIndirectSrc) {
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1451 if (VecSize <= 1024)
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1480 if (VecSize <= 1024)
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1524 if (VecSize <= 1024)
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1532 bool IsSGPR)
const {
1544 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1551 return AMDGPU::SI_SPILL_S32_SAVE;
1553 return AMDGPU::SI_SPILL_S64_SAVE;
1555 return AMDGPU::SI_SPILL_S96_SAVE;
1557 return AMDGPU::SI_SPILL_S128_SAVE;
1559 return AMDGPU::SI_SPILL_S160_SAVE;
1561 return AMDGPU::SI_SPILL_S192_SAVE;
1563 return AMDGPU::SI_SPILL_S224_SAVE;
1565 return AMDGPU::SI_SPILL_S256_SAVE;
1567 return AMDGPU::SI_SPILL_S288_SAVE;
1569 return AMDGPU::SI_SPILL_S320_SAVE;
1571 return AMDGPU::SI_SPILL_S352_SAVE;
1573 return AMDGPU::SI_SPILL_S384_SAVE;
1575 return AMDGPU::SI_SPILL_S512_SAVE;
1577 return AMDGPU::SI_SPILL_S1024_SAVE;
1586 return AMDGPU::SI_SPILL_V32_SAVE;
1588 return AMDGPU::SI_SPILL_V64_SAVE;
1590 return AMDGPU::SI_SPILL_V96_SAVE;
1592 return AMDGPU::SI_SPILL_V128_SAVE;
1594 return AMDGPU::SI_SPILL_V160_SAVE;
1596 return AMDGPU::SI_SPILL_V192_SAVE;
1598 return AMDGPU::SI_SPILL_V224_SAVE;
1600 return AMDGPU::SI_SPILL_V256_SAVE;
1602 return AMDGPU::SI_SPILL_V288_SAVE;
1604 return AMDGPU::SI_SPILL_V320_SAVE;
1606 return AMDGPU::SI_SPILL_V352_SAVE;
1608 return AMDGPU::SI_SPILL_V384_SAVE;
1610 return AMDGPU::SI_SPILL_V512_SAVE;
1612 return AMDGPU::SI_SPILL_V1024_SAVE;
1621 return AMDGPU::SI_SPILL_A32_SAVE;
1623 return AMDGPU::SI_SPILL_A64_SAVE;
1625 return AMDGPU::SI_SPILL_A96_SAVE;
1627 return AMDGPU::SI_SPILL_A128_SAVE;
1629 return AMDGPU::SI_SPILL_A160_SAVE;
1631 return AMDGPU::SI_SPILL_A192_SAVE;
1633 return AMDGPU::SI_SPILL_A224_SAVE;
1635 return AMDGPU::SI_SPILL_A256_SAVE;
1637 return AMDGPU::SI_SPILL_A288_SAVE;
1639 return AMDGPU::SI_SPILL_A320_SAVE;
1641 return AMDGPU::SI_SPILL_A352_SAVE;
1643 return AMDGPU::SI_SPILL_A384_SAVE;
1645 return AMDGPU::SI_SPILL_A512_SAVE;
1647 return AMDGPU::SI_SPILL_A1024_SAVE;
1656 return AMDGPU::SI_SPILL_AV32_SAVE;
1658 return AMDGPU::SI_SPILL_AV64_SAVE;
1660 return AMDGPU::SI_SPILL_AV96_SAVE;
1662 return AMDGPU::SI_SPILL_AV128_SAVE;
1664 return AMDGPU::SI_SPILL_AV160_SAVE;
1666 return AMDGPU::SI_SPILL_AV192_SAVE;
1668 return AMDGPU::SI_SPILL_AV224_SAVE;
1670 return AMDGPU::SI_SPILL_AV256_SAVE;
1672 return AMDGPU::SI_SPILL_AV288_SAVE;
1674 return AMDGPU::SI_SPILL_AV320_SAVE;
1676 return AMDGPU::SI_SPILL_AV352_SAVE;
1678 return AMDGPU::SI_SPILL_AV384_SAVE;
1680 return AMDGPU::SI_SPILL_AV512_SAVE;
1682 return AMDGPU::SI_SPILL_AV1024_SAVE;
1689 bool IsVectorSuperClass) {
1694 if (IsVectorSuperClass)
1695 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1697 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1711 if (IsVectorSuperClass)
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize =
TRI->getSpillSize(*RC);
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1748 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1763 SpillSize, RI, *MFI);
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V32_RESTORE;
1814 return AMDGPU::SI_SPILL_V64_RESTORE;
1816 return AMDGPU::SI_SPILL_V96_RESTORE;
1818 return AMDGPU::SI_SPILL_V128_RESTORE;
1820 return AMDGPU::SI_SPILL_V160_RESTORE;
1822 return AMDGPU::SI_SPILL_V192_RESTORE;
1824 return AMDGPU::SI_SPILL_V224_RESTORE;
1826 return AMDGPU::SI_SPILL_V256_RESTORE;
1828 return AMDGPU::SI_SPILL_V288_RESTORE;
1830 return AMDGPU::SI_SPILL_V320_RESTORE;
1832 return AMDGPU::SI_SPILL_V352_RESTORE;
1834 return AMDGPU::SI_SPILL_V384_RESTORE;
1836 return AMDGPU::SI_SPILL_V512_RESTORE;
1838 return AMDGPU::SI_SPILL_V1024_RESTORE;
1847 return AMDGPU::SI_SPILL_A32_RESTORE;
1849 return AMDGPU::SI_SPILL_A64_RESTORE;
1851 return AMDGPU::SI_SPILL_A96_RESTORE;
1853 return AMDGPU::SI_SPILL_A128_RESTORE;
1855 return AMDGPU::SI_SPILL_A160_RESTORE;
1857 return AMDGPU::SI_SPILL_A192_RESTORE;
1859 return AMDGPU::SI_SPILL_A224_RESTORE;
1861 return AMDGPU::SI_SPILL_A256_RESTORE;
1863 return AMDGPU::SI_SPILL_A288_RESTORE;
1865 return AMDGPU::SI_SPILL_A320_RESTORE;
1867 return AMDGPU::SI_SPILL_A352_RESTORE;
1869 return AMDGPU::SI_SPILL_A384_RESTORE;
1871 return AMDGPU::SI_SPILL_A512_RESTORE;
1873 return AMDGPU::SI_SPILL_A1024_RESTORE;
1882 return AMDGPU::SI_SPILL_AV32_RESTORE;
1884 return AMDGPU::SI_SPILL_AV64_RESTORE;
1886 return AMDGPU::SI_SPILL_AV96_RESTORE;
1888 return AMDGPU::SI_SPILL_AV128_RESTORE;
1890 return AMDGPU::SI_SPILL_AV160_RESTORE;
1892 return AMDGPU::SI_SPILL_AV192_RESTORE;
1894 return AMDGPU::SI_SPILL_AV224_RESTORE;
1896 return AMDGPU::SI_SPILL_AV256_RESTORE;
1898 return AMDGPU::SI_SPILL_AV288_RESTORE;
1900 return AMDGPU::SI_SPILL_AV320_RESTORE;
1902 return AMDGPU::SI_SPILL_AV352_RESTORE;
1904 return AMDGPU::SI_SPILL_AV384_RESTORE;
1906 return AMDGPU::SI_SPILL_AV512_RESTORE;
1908 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1915 bool IsVectorSuperClass) {
1920 if (IsVectorSuperClass)
1921 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1923 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1930 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1936 if (IsVectorSuperClass)
1953 unsigned SpillSize =
TRI->getSpillSize(*RC);
1960 FrameInfo.getObjectAlign(FrameIndex));
1964 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1965 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1966 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1971 if (DestReg.
isVirtual() && SpillSize == 4) {
1973 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1987 SpillSize, RI, *MFI);
2002 unsigned Quantity)
const {
2004 while (Quantity > 0) {
2005 unsigned Arg = std::min(Quantity, 8u);
2019 if (HasNoTerminator) {
2020 if (
Info->returnsVoid()) {
2038 constexpr unsigned DoorbellIDMask = 0x3ff;
2039 constexpr unsigned ECQueueWaveAbort = 0x400;
2045 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2051 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2056 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2058 .
addUse(DoorbellRegMasked)
2059 .
addImm(ECQueueWaveAbort);
2061 .
addUse(SetWaveAbortBit);
2072 if (SplitBB != &
MBB)
2081 switch (
MI.getOpcode()) {
2083 if (
MI.isMetaInstruction())
2088 return MI.getOperand(0).getImm() + 1;
2098 switch (
MI.getOpcode()) {
2100 case AMDGPU::S_MOV_B64_term:
2103 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2106 case AMDGPU::S_MOV_B32_term:
2109 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2112 case AMDGPU::S_XOR_B64_term:
2115 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2118 case AMDGPU::S_XOR_B32_term:
2121 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2123 case AMDGPU::S_OR_B64_term:
2126 MI.setDesc(
get(AMDGPU::S_OR_B64));
2128 case AMDGPU::S_OR_B32_term:
2131 MI.setDesc(
get(AMDGPU::S_OR_B32));
2134 case AMDGPU::S_ANDN2_B64_term:
2137 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2140 case AMDGPU::S_ANDN2_B32_term:
2143 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2146 case AMDGPU::S_AND_B64_term:
2149 MI.setDesc(
get(AMDGPU::S_AND_B64));
2152 case AMDGPU::S_AND_B32_term:
2155 MI.setDesc(
get(AMDGPU::S_AND_B32));
2158 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2161 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2164 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2167 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2170 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2171 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2174 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2175 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2178 case AMDGPU::V_MOV_B64_PSEUDO: {
2180 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2181 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2187 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2192 if (
SrcOp.isImm()) {
2194 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2195 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2238 MI.eraseFromParent();
2241 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2245 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2250 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2255 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2256 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2258 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2259 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2266 MI.eraseFromParent();
2269 case AMDGPU::V_SET_INACTIVE_B32: {
2270 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2271 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2275 .
add(
MI.getOperand(1));
2279 .
add(
MI.getOperand(2));
2282 MI.eraseFromParent();
2285 case AMDGPU::V_SET_INACTIVE_B64: {
2286 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2287 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2289 MI.getOperand(0).getReg())
2290 .
add(
MI.getOperand(1));
2295 MI.getOperand(0).getReg())
2296 .
add(
MI.getOperand(2));
2300 MI.eraseFromParent();
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2336 Opc = AMDGPU::V_MOVRELD_B32_e32;
2338 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2339 : AMDGPU::S_MOVRELD_B32;
2344 bool IsUndef =
MI.getOperand(1).isUndef();
2345 unsigned SubReg =
MI.getOperand(3).getImm();
2346 assert(VecReg ==
MI.getOperand(1).getReg());
2351 .
add(
MI.getOperand(2))
2355 const int ImpDefIdx =
2357 const int ImpUseIdx = ImpDefIdx + 1;
2359 MI.eraseFromParent();
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2376 bool IsUndef =
MI.getOperand(1).isUndef();
2385 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2389 .
add(
MI.getOperand(2))
2394 const int ImpDefIdx =
2396 const int ImpUseIdx = ImpDefIdx + 1;
2403 MI.eraseFromParent();
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2421 bool IsUndef =
MI.getOperand(1).isUndef();
2439 MI.eraseFromParent();
2442 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2445 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2446 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2469 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2476 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2486 MI.eraseFromParent();
2489 case AMDGPU::ENTER_STRICT_WWM: {
2493 : AMDGPU::S_OR_SAVEEXEC_B64));
2496 case AMDGPU::ENTER_STRICT_WQM: {
2499 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2500 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2501 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2505 MI.eraseFromParent();
2508 case AMDGPU::EXIT_STRICT_WWM:
2509 case AMDGPU::EXIT_STRICT_WQM: {
2512 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2515 case AMDGPU::ENTER_PSEUDO_WM:
2516 case AMDGPU::EXIT_PSEUDO_WM: {
2518 MI.eraseFromParent();
2521 case AMDGPU::SI_RETURN: {
2535 MI.eraseFromParent();
2539 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2540 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2541 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2544 case AMDGPU::S_GETPC_B64_pseudo:
2545 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2548 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2570 case AMDGPU::S_LOAD_DWORDX16_IMM:
2571 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2584 for (
auto &CandMO :
I->operands()) {
2585 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2593 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2601 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2603 unsigned NewOpcode = -1;
2604 if (SubregSize == 256)
2605 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2606 else if (SubregSize == 128)
2607 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2614 MRI.setRegClass(DestReg, NewRC);
2617 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2622 MI->getOperand(0).setReg(DestReg);
2623 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2627 OffsetMO->
setImm(FinalOffset);
2633 MI->setMemRefs(*MF, NewMMOs);
2646std::pair<MachineInstr*, MachineInstr*>
2648 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2653 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2654 return std::pair(&
MI,
nullptr);
2665 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2667 if (Dst.isPhysical()) {
2668 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2671 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2675 for (
unsigned I = 1;
I <= 2; ++
I) {
2678 if (
SrcOp.isImm()) {
2680 Imm.ashrInPlace(Part * 32);
2681 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2685 if (Src.isPhysical())
2686 MovDPP.addReg(RI.getSubReg(Src, Sub));
2693 MovDPP.addImm(MO.getImm());
2695 Split[Part] = MovDPP;
2699 if (Dst.isVirtual())
2706 MI.eraseFromParent();
2707 return std::pair(Split[0], Split[1]);
2710std::optional<DestSourcePair>
2712 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2715 return std::nullopt;
2720 unsigned Src0OpName,
2722 unsigned Src1OpName)
const {
2729 "All commutable instructions have both src0 and src1 modifiers");
2731 int Src0ModsVal = Src0Mods->
getImm();
2732 int Src1ModsVal = Src1Mods->
getImm();
2734 Src1Mods->
setImm(Src0ModsVal);
2735 Src0Mods->
setImm(Src1ModsVal);
2744 bool IsKill = RegOp.
isKill();
2746 bool IsUndef = RegOp.
isUndef();
2747 bool IsDebug = RegOp.
isDebug();
2749 if (NonRegOp.
isImm())
2751 else if (NonRegOp.
isFI())
2770 unsigned Src1Idx)
const {
2771 assert(!NewMI &&
"this should never be used");
2773 unsigned Opc =
MI.getOpcode();
2775 if (CommutedOpcode == -1)
2778 if (Src0Idx > Src1Idx)
2782 static_cast<int>(Src0Idx) &&
2784 static_cast<int>(Src1Idx) &&
2785 "inconsistency with findCommutedOpIndices");
2812 Src1, AMDGPU::OpName::src1_modifiers);
2824 unsigned &SrcOpIdx0,
2825 unsigned &SrcOpIdx1)
const {
2830 unsigned &SrcOpIdx0,
2831 unsigned &SrcOpIdx1)
const {
2832 if (!
Desc.isCommutable())
2835 unsigned Opc =
Desc.getOpcode();
2844 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2848 int64_t BrOffset)
const {
2851 assert(BranchOp != AMDGPU::S_SETPC_B64);
2865 return MI.getOperand(0).getMBB();
2870 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2871 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2872 MI.getOpcode() == AMDGPU::SI_LOOP)
2883 assert(RS &&
"RegScavenger required for long branching");
2885 "new block should be inserted for expanding unconditional branch");
2888 "restore block should be inserted for restoring clobbered registers");
2896 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2906 MCCtx.createTempSymbol(
"post_getpc",
true);
2910 MCCtx.createTempSymbol(
"offset_lo",
true);
2912 MCCtx.createTempSymbol(
"offset_hi",
true);
2915 .
addReg(PCReg, 0, AMDGPU::sub0)
2919 .
addReg(PCReg, 0, AMDGPU::sub1)
2961 if (LongBranchReservedReg) {
2963 Scav = LongBranchReservedReg;
2972 MRI.replaceRegWith(PCReg, Scav);
2973 MRI.clearVirtRegs();
2979 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2980 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2981 MRI.clearVirtRegs();
2996unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2998 case SIInstrInfo::SCC_TRUE:
2999 return AMDGPU::S_CBRANCH_SCC1;
3000 case SIInstrInfo::SCC_FALSE:
3001 return AMDGPU::S_CBRANCH_SCC0;
3002 case SIInstrInfo::VCCNZ:
3003 return AMDGPU::S_CBRANCH_VCCNZ;
3004 case SIInstrInfo::VCCZ:
3005 return AMDGPU::S_CBRANCH_VCCZ;
3006 case SIInstrInfo::EXECNZ:
3007 return AMDGPU::S_CBRANCH_EXECNZ;
3008 case SIInstrInfo::EXECZ:
3009 return AMDGPU::S_CBRANCH_EXECZ;
3015SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3017 case AMDGPU::S_CBRANCH_SCC0:
3019 case AMDGPU::S_CBRANCH_SCC1:
3021 case AMDGPU::S_CBRANCH_VCCNZ:
3023 case AMDGPU::S_CBRANCH_VCCZ:
3025 case AMDGPU::S_CBRANCH_EXECNZ:
3027 case AMDGPU::S_CBRANCH_EXECZ:
3039 bool AllowModify)
const {
3040 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3042 TBB =
I->getOperand(0).getMBB();
3048 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3049 CondBB =
I->getOperand(1).getMBB();
3050 Cond.push_back(
I->getOperand(0));
3052 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3053 if (Pred == INVALID_BR)
3056 CondBB =
I->getOperand(0).getMBB();
3058 Cond.push_back(
I->getOperand(1));
3068 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3070 FBB =
I->getOperand(0).getMBB();
3080 bool AllowModify)
const {
3088 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3089 switch (
I->getOpcode()) {
3090 case AMDGPU::S_MOV_B64_term:
3091 case AMDGPU::S_XOR_B64_term:
3092 case AMDGPU::S_OR_B64_term:
3093 case AMDGPU::S_ANDN2_B64_term:
3094 case AMDGPU::S_AND_B64_term:
3095 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3096 case AMDGPU::S_MOV_B32_term:
3097 case AMDGPU::S_XOR_B32_term:
3098 case AMDGPU::S_OR_B32_term:
3099 case AMDGPU::S_ANDN2_B32_term:
3100 case AMDGPU::S_AND_B32_term:
3101 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3104 case AMDGPU::SI_ELSE:
3105 case AMDGPU::SI_KILL_I1_TERMINATOR:
3106 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3123 int *BytesRemoved)
const {
3125 unsigned RemovedSize = 0;
3128 if (
MI.isBranch() ||
MI.isReturn()) {
3130 MI.eraseFromParent();
3136 *BytesRemoved = RemovedSize;
3153 int *BytesAdded)
const {
3154 if (!FBB &&
Cond.empty()) {
3162 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3172 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3209 if (
Cond.size() != 2) {
3224 Register FalseReg,
int &CondCycles,
3225 int &TrueCycles,
int &FalseCycles)
const {
3226 switch (
Cond[0].getImm()) {
3231 if (
MRI.getRegClass(FalseReg) != RC)
3235 CondCycles = TrueCycles = FalseCycles = NumInsts;
3238 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3246 if (
MRI.getRegClass(FalseReg) != RC)
3252 if (NumInsts % 2 == 0)
3255 CondCycles = TrueCycles = FalseCycles = NumInsts;
3267 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3268 if (Pred == VCCZ || Pred == SCC_FALSE) {
3269 Pred =
static_cast<BranchPredicate
>(-Pred);
3275 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3277 if (DstSize == 32) {
3279 if (Pred == SCC_TRUE) {
3294 if (DstSize == 64 && Pred == SCC_TRUE) {
3304 static const int16_t Sub0_15[] = {
3305 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3306 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3307 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3308 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3311 static const int16_t Sub0_15_64[] = {
3312 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3313 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3314 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3315 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3318 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3320 const int16_t *SubIndices = Sub0_15;
3321 int NElts = DstSize / 32;
3325 if (Pred == SCC_TRUE) {
3327 SelOp = AMDGPU::S_CSELECT_B32;
3328 EltRC = &AMDGPU::SGPR_32RegClass;
3330 SelOp = AMDGPU::S_CSELECT_B64;
3331 EltRC = &AMDGPU::SGPR_64RegClass;
3332 SubIndices = Sub0_15_64;
3338 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3343 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3344 Register DstElt =
MRI.createVirtualRegister(EltRC);
3347 unsigned SubIdx = SubIndices[
Idx];
3350 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3353 .
addReg(FalseReg, 0, SubIdx)
3354 .
addReg(TrueReg, 0, SubIdx);
3358 .
addReg(TrueReg, 0, SubIdx)
3359 .
addReg(FalseReg, 0, SubIdx);
3371 switch (
MI.getOpcode()) {
3372 case AMDGPU::V_MOV_B32_e32:
3373 case AMDGPU::V_MOV_B32_e64:
3374 case AMDGPU::V_MOV_B64_PSEUDO:
3375 case AMDGPU::V_MOV_B64_e32:
3376 case AMDGPU::V_MOV_B64_e64:
3377 case AMDGPU::S_MOV_B32:
3378 case AMDGPU::S_MOV_B64:
3379 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3381 case AMDGPU::WWM_COPY:
3382 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3383 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3384 case AMDGPU::V_ACCVGPR_MOV_B32:
3392 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3393 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3394 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3397 unsigned Opc =
MI.getOpcode();
3401 MI.removeOperand(
Idx);
3407 if (!
MRI->hasOneNonDBGUse(Reg))
3410 switch (
DefMI.getOpcode()) {
3413 case AMDGPU::V_MOV_B64_e32:
3414 case AMDGPU::S_MOV_B64:
3415 case AMDGPU::V_MOV_B64_PSEUDO:
3416 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3417 case AMDGPU::V_MOV_B32_e32:
3418 case AMDGPU::S_MOV_B32:
3419 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3426 if (!ImmOp->
isImm())
3429 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3430 int64_t Imm = ImmOp->
getImm();
3431 switch (UseOp.getSubReg()) {
3442 case AMDGPU::sub1_lo16:
3444 case AMDGPU::sub1_hi16:
3449 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3451 unsigned Opc =
UseMI.getOpcode();
3452 if (Opc == AMDGPU::COPY) {
3453 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3457 bool Is16Bit = OpSize == 2;
3458 bool Is64Bit = OpSize == 8;
3460 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3461 : AMDGPU::V_MOV_B32_e32
3462 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3463 : AMDGPU::S_MOV_B32;
3464 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3469 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3476 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3479 UseMI.getOperand(0).setSubReg(0);
3482 UseMI.getOperand(0).setReg(DstReg);
3492 UseMI.setDesc(NewMCID);
3493 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3498 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3499 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3500 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3501 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3502 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3517 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3518 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3520 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3521 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3522 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3530 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3531 if (!RegSrc->
isReg())
3549 if (Def && Def->isMoveImmediate() &&
3554 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3556 : AMDGPU::V_FMAMK_F16)
3557 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3564 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3567 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3573 unsigned SrcSubReg = RegSrc->
getSubReg();
3578 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3579 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3580 Opc == AMDGPU::V_FMAC_F16_e64)
3581 UseMI.untieRegOperand(
3584 Src1->ChangeToImmediate(Imm);
3589 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3591 DefMI.eraseFromParent();
3601 bool Src0Inlined =
false;
3602 if (Src0->
isReg()) {
3607 if (Def && Def->isMoveImmediate() &&
3619 if (Src1->
isReg() && !Src0Inlined) {
3622 if (Def && Def->isMoveImmediate() &&
3633 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3635 : AMDGPU::V_FMAAK_F16)
3636 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3643 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3649 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3650 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3651 Opc == AMDGPU::V_FMAC_F16_e64)
3652 UseMI.untieRegOperand(
3666 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3668 DefMI.eraseFromParent();
3680 if (BaseOps1.
size() != BaseOps2.
size())
3682 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3683 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3691 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3692 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3693 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3695 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3698bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3701 int64_t Offset0, Offset1;
3703 bool Offset0IsScalable, Offset1IsScalable;
3725 "MIa must load from or modify a memory location");
3727 "MIb must load from or modify a memory location");
3746 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3753 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3763 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3777 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3788 if (Reg.isPhysical())
3790 auto *Def =
MRI.getUniqueVRegDef(Reg);
3792 Imm = Def->getOperand(1).getImm();
3812 unsigned NumOps =
MI.getNumOperands();
3813 for (
unsigned I = 1;
I < NumOps; ++
I) {
3815 if (
Op.isReg() &&
Op.isKill())
3825 unsigned Opc =
MI.getOpcode();
3829 if (NewMFMAOpc != -1) {
3832 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3833 MIB.
add(
MI.getOperand(
I));
3839 if (Def.isEarlyClobber() && Def.isReg() &&
3844 auto UpdateDefIndex = [&](
LiveRange &LR) {
3845 auto S = LR.
find(OldIndex);
3846 if (S != LR.end() && S->start == OldIndex) {
3847 assert(S->valno && S->valno->def == OldIndex);
3848 S->start = NewIndex;
3849 S->valno->def = NewIndex;
3853 for (
auto &SR : LI.subranges())
3864 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3874 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3875 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3879 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3880 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3881 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3882 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3883 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3884 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3885 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3886 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3887 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3888 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3889 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3890 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3891 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3892 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3893 bool Src0Literal =
false;
3898 case AMDGPU::V_MAC_F16_e64:
3899 case AMDGPU::V_FMAC_F16_e64:
3900 case AMDGPU::V_FMAC_F16_t16_e64:
3901 case AMDGPU::V_MAC_F32_e64:
3902 case AMDGPU::V_MAC_LEGACY_F32_e64:
3903 case AMDGPU::V_FMAC_F32_e64:
3904 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3905 case AMDGPU::V_FMAC_F64_e64:
3907 case AMDGPU::V_MAC_F16_e32:
3908 case AMDGPU::V_FMAC_F16_e32:
3909 case AMDGPU::V_MAC_F32_e32:
3910 case AMDGPU::V_MAC_LEGACY_F32_e32:
3911 case AMDGPU::V_FMAC_F32_e32:
3912 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3913 case AMDGPU::V_FMAC_F64_e32: {
3915 AMDGPU::OpName::src0);
3942 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3948 const auto killDef = [&]() ->
void {
3952 if (!
MRI.hasOneNonDBGUse(DefReg))
3966 : AMDGPU::V_FMAAK_F16)
3967 : AMDGPU::V_FMAAK_F32)
3968 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3984 : AMDGPU::V_FMAMK_F16)
3985 : AMDGPU::V_FMAMK_F32)
3986 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4030 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4031 : IsF64 ? AMDGPU::V_FMA_F64_e64
4033 ? AMDGPU::V_FMA_LEGACY_F32_e64
4034 : AMDGPU::V_FMA_F32_e64
4035 : IsF16 ? AMDGPU::V_MAD_F16_e64
4036 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4037 : AMDGPU::V_MAD_F32_e64;
4052 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4063 switch (
MI.getOpcode()) {
4064 case AMDGPU::S_SET_GPR_IDX_ON:
4065 case AMDGPU::S_SET_GPR_IDX_MODE:
4066 case AMDGPU::S_SET_GPR_IDX_OFF:
4084 if (
MI.isTerminator() ||
MI.isPosition())
4088 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4091 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4097 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4098 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4099 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4100 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4105 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4116 unsigned Opcode =
MI.getOpcode();
4131 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4133 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4134 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4137 if (
MI.isCall() ||
MI.isInlineAsm())
4149 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4150 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4151 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4152 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4160 if (
MI.isMetaInstruction())
4164 if (
MI.isCopyLike()) {
4169 return MI.readsRegister(AMDGPU::EXEC, &RI);
4180 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4184 switch (Imm.getBitWidth()) {
4204 APInt IntImm = Imm.bitcastToAPInt();
4223 uint8_t OperandType)
const {
4224 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4233 int64_t Imm = MO.
getImm();
4234 switch (OperandType) {
4247 int32_t Trunc =
static_cast<int32_t
>(Imm);
4287 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4292 int16_t Trunc =
static_cast<int16_t
>(Imm);
4303 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4304 int16_t Trunc =
static_cast<int16_t
>(Imm);
4364 AMDGPU::OpName::src2))
4380 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4400 return Mods && Mods->
getImm();
4413 switch (
MI.getOpcode()) {
4414 default:
return false;
4416 case AMDGPU::V_ADDC_U32_e64:
4417 case AMDGPU::V_SUBB_U32_e64:
4418 case AMDGPU::V_SUBBREV_U32_e64: {
4426 case AMDGPU::V_MAC_F16_e64:
4427 case AMDGPU::V_MAC_F32_e64:
4428 case AMDGPU::V_MAC_LEGACY_F32_e64:
4429 case AMDGPU::V_FMAC_F16_e64:
4430 case AMDGPU::V_FMAC_F16_t16_e64:
4431 case AMDGPU::V_FMAC_F32_e64:
4432 case AMDGPU::V_FMAC_F64_e64:
4433 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4439 case AMDGPU::V_CNDMASK_B32_e64:
4470 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4479 unsigned Op32)
const {
4489 Inst32.
add(
MI.getOperand(0));
4493 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
4494 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4508 if (Op32Src2Idx != -1) {
4538 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4543 return MO.
getReg() == AMDGPU::M0 ||
4544 MO.
getReg() == AMDGPU::VCC ||
4545 MO.
getReg() == AMDGPU::VCC_LO;
4547 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4548 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4558 switch (MO.getReg()) {
4560 case AMDGPU::VCC_LO:
4561 case AMDGPU::VCC_HI:
4563 case AMDGPU::FLAT_SCR:
4576 switch (
MI.getOpcode()) {
4577 case AMDGPU::V_READLANE_B32:
4578 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4579 case AMDGPU::V_WRITELANE_B32:
4580 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4587 if (
MI.isPreISelOpcode() ||
4588 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4599 if (
SubReg.getReg().isPhysical())
4602 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4609 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4619 if (Src0Idx == -1) {
4629 if (!
Desc.isVariadic() &&
4630 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4631 ErrInfo =
"Instruction has wrong number of operands.";
4635 if (
MI.isInlineAsm()) {
4648 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4649 ErrInfo =
"inlineasm operand has incorrect register class.";
4657 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4658 ErrInfo =
"missing memory operand from image instruction.";
4663 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4666 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4667 "all fp values to integers.";
4671 int RegClass =
Desc.operands()[i].RegClass;
4673 switch (
Desc.operands()[i].OperandType) {
4675 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4676 ErrInfo =
"Illegal immediate value for operand.";
4697 ErrInfo =
"Illegal immediate value for operand.";
4704 ErrInfo =
"Expected inline constant for operand.";
4713 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4714 ErrInfo =
"Expected immediate, but got non-immediate";
4736 RI.getSubRegisterClass(RC, MO.
getSubReg());
4744 ErrInfo =
"Subtarget requires even aligned vector registers";
4749 if (RegClass != -1) {
4750 if (Reg.isVirtual())
4755 ErrInfo =
"Operand has incorrect register class.";
4764 ErrInfo =
"SDWA is not supported on this target";
4770 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4778 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4785 "Only reg allowed as operands in SDWA instructions on GFX9+";
4794 if (OMod !=
nullptr &&
4796 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4801 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4802 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4803 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4804 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4807 unsigned Mods = Src0ModsMO->
getImm();
4810 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4816 if (
isVOPC(BasicOpcode)) {
4820 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4821 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4827 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4828 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4834 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4835 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4842 if (DstUnused && DstUnused->isImm() &&
4845 if (!Dst.isReg() || !Dst.isTied()) {
4846 ErrInfo =
"Dst register should have tied register";
4851 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4854 "Dst register should be tied to implicit use of preserved register";
4857 Dst.getReg() != TiedMO.
getReg()) {
4858 ErrInfo =
"Dst register should use same physical register as preserved";
4890 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4891 if (RegCount > DstSize) {
4892 ErrInfo =
"Image instruction returns too many registers for dst "
4901 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4902 unsigned ConstantBusCount = 0;
4903 bool UsesLiteral =
false;
4910 LiteralVal = &
MI.getOperand(ImmIdx);
4919 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4937 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4947 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4948 return !RI.regsOverlap(SGPRUsed, SGPR);
4958 Opcode != AMDGPU::V_WRITELANE_B32) {
4959 ErrInfo =
"VOP* instruction violates constant bus restriction";
4964 ErrInfo =
"VOP3 instruction uses literal";
4971 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4972 unsigned SGPRCount = 0;
4975 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4983 if (MO.
getReg() != SGPRUsed)
4989 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4996 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4997 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5004 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5014 ErrInfo =
"ABS not allowed in VOP3B instructions";
5027 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5034 if (
Desc.isBranch()) {
5036 ErrInfo =
"invalid branch target for SOPK instruction";
5042 if (!isUInt<16>(Imm)) {
5043 ErrInfo =
"invalid immediate for SOPK instruction";
5047 if (!isInt<16>(Imm)) {
5048 ErrInfo =
"invalid immediate for SOPK instruction";
5055 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5056 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5057 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5058 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5059 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5060 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5062 const unsigned StaticNumOps =
5063 Desc.getNumOperands() +
Desc.implicit_uses().size();
5064 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5069 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5070 ErrInfo =
"missing implicit register operands";
5076 if (!Dst->isUse()) {
5077 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5082 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5083 UseOpIdx != StaticNumOps + 1) {
5084 ErrInfo =
"movrel implicit operands should be tied";
5091 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5093 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5094 ErrInfo =
"src0 should be subreg of implicit vector use";
5102 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5103 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5109 if (
MI.mayStore() &&
5114 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5115 ErrInfo =
"scalar stores must use m0 as offset register";
5123 if (
Offset->getImm() != 0) {
5124 ErrInfo =
"subtarget does not support offsets in flat instructions";
5131 if (GDSOp && GDSOp->
getImm() != 0) {
5132 ErrInfo =
"GDS is not supported on this subtarget";
5141 AMDGPU::OpName::vaddr0);
5143 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5152 ErrInfo =
"dim is out of range";
5159 IsA16 = R128A16->
getImm() != 0;
5160 }
else if (ST.
hasA16()) {
5162 IsA16 = A16->
getImm() != 0;
5165 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5167 unsigned AddrWords =
5170 unsigned VAddrWords;
5172 VAddrWords = RsrcIdx - VAddr0Idx;
5175 unsigned LastVAddrIdx = RsrcIdx - 1;
5176 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5184 if (VAddrWords != AddrWords) {
5186 <<
" but got " << VAddrWords <<
"\n");
5187 ErrInfo =
"bad vaddr size";
5195 using namespace AMDGPU::DPP;
5197 unsigned DC = DppCt->
getImm();
5198 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5199 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5200 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5201 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5202 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5203 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5204 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5205 ErrInfo =
"Invalid dpp_ctrl value";
5208 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5210 ErrInfo =
"Invalid dpp_ctrl value: "
5211 "wavefront shifts are not supported on GFX10+";
5214 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5216 ErrInfo =
"Invalid dpp_ctrl value: "
5217 "broadcasts are not supported on GFX10+";
5220 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5222 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5223 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5225 ErrInfo =
"Invalid dpp_ctrl value: "
5226 "row_newbroadcast/row_share is not supported before "
5229 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5230 ErrInfo =
"Invalid dpp_ctrl value: "
5231 "row_share and row_xmask are not supported before GFX10";
5236 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5238 ErrInfo =
"Invalid dpp_ctrl value: "
5239 "DP ALU dpp only support row_newbcast";
5246 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5247 : AMDGPU::OpName::vdata;
5256 ErrInfo =
"Invalid register class: "
5257 "vdata and vdst should be both VGPR or AGPR";
5260 if (
Data && Data2 &&
5262 ErrInfo =
"Invalid register class: "
5263 "both data operands should be VGPR or AGPR";
5267 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5270 ErrInfo =
"Invalid register class: "
5271 "agpr loads and stores not supported on this GPU";
5278 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5283 if (Reg.isPhysical())
5290 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5291 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5292 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5294 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5295 ErrInfo =
"Subtarget requires even aligned vector registers "
5296 "for DS_GWS instructions";
5302 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5303 ErrInfo =
"Subtarget requires even aligned vector registers "
5304 "for vaddr operand of image instructions";
5310 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5313 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5314 ErrInfo =
"Invalid register class: "
5315 "v_accvgpr_write with an SGPR is not supported on this GPU";
5320 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5323 ErrInfo =
"pseudo expects only physical SGPRs";
5335 switch (
MI.getOpcode()) {
5336 default:
return AMDGPU::INSTRUCTION_LIST_END;
5337 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5338 case AMDGPU::COPY:
return AMDGPU::COPY;
5339 case AMDGPU::PHI:
return AMDGPU::PHI;
5340 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5341 case AMDGPU::WQM:
return AMDGPU::WQM;
5342 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5343 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5344 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5345 case AMDGPU::S_MOV_B32: {
5347 return MI.getOperand(1).isReg() ||
5349 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5351 case AMDGPU::S_ADD_I32:
5352 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5353 case AMDGPU::S_ADDC_U32:
5354 return AMDGPU::V_ADDC_U32_e32;
5355 case AMDGPU::S_SUB_I32:
5356 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5359 case AMDGPU::S_ADD_U32:
5360 return AMDGPU::V_ADD_CO_U32_e32;
5361 case AMDGPU::S_SUB_U32:
5362 return AMDGPU::V_SUB_CO_U32_e32;
5363 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5364 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5365 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5366 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5367 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5368 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5369 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5370 case AMDGPU::S_XNOR_B32:
5371 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5372 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5373 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5374 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5375 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5376 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5377 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5378 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5379 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5380 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5381 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5382 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5383 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5384 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5385 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5386 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5387 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5388 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5389 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5390 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5391 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5392 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5393 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5394 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5395 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5396 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5397 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5398 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5399 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5400 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5401 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5402 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5403 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5404 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5405 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5406 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5407 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5408 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5409 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5410 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5411 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5412 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5413 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5414 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5415 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5416 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5417 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5418 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5419 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5420 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5421 case AMDGPU::S_CEIL_F16:
5423 : AMDGPU::V_CEIL_F16_fake16_e64;
5424 case AMDGPU::S_FLOOR_F16:
5426 : AMDGPU::V_FLOOR_F16_fake16_e64;
5427 case AMDGPU::S_TRUNC_F16:
5428 return AMDGPU::V_TRUNC_F16_fake16_e64;
5429 case AMDGPU::S_RNDNE_F16:
5430 return AMDGPU::V_RNDNE_F16_fake16_e64;
5431 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5432 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5433 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5434 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5435 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5436 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5437 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5438 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5439 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5440 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5441 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5442 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5443 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5444 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5445 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5446 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5447 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5448 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5449 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5450 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5451 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5452 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5453 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5454 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5455 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5456 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5457 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5458 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5459 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5460 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5461 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5462 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5463 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5464 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5465 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5466 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5467 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5468 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5469 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5470 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5471 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5472 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5473 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5474 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5475 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5476 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5477 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5478 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5479 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5480 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5481 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5482 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5483 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5484 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5485 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5486 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5487 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5490 "Unexpected scalar opcode without corresponding vector one!");
5503 bool IsWave32 = ST.isWave32();
5508 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5509 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5518 const unsigned OrSaveExec =
5519 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5532 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5534 auto ExecRestoreMI =
5544 bool IsAllocatable) {
5545 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5550 case AMDGPU::AV_32RegClassID:
5551 RCID = AMDGPU::VGPR_32RegClassID;
5553 case AMDGPU::AV_64RegClassID:
5554 RCID = AMDGPU::VReg_64RegClassID;
5556 case AMDGPU::AV_96RegClassID:
5557 RCID = AMDGPU::VReg_96RegClassID;
5559 case AMDGPU::AV_128RegClassID:
5560 RCID = AMDGPU::VReg_128RegClassID;
5562 case AMDGPU::AV_160RegClassID:
5563 RCID = AMDGPU::VReg_160RegClassID;
5565 case AMDGPU::AV_512RegClassID:
5566 RCID = AMDGPU::VReg_512RegClassID;
5582 auto RegClass = TID.
operands()[OpNum].RegClass;
5583 bool IsAllocatable =
false;
5593 AMDGPU::OpName::vdst);
5596 : AMDGPU::OpName::vdata);
5597 if (DataIdx != -1) {
5599 TID.
Opcode, AMDGPU::OpName::data1);
5607 unsigned OpNo)
const {
5610 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5611 Desc.operands()[OpNo].RegClass == -1) {
5614 if (Reg.isVirtual())
5615 return MRI.getRegClass(Reg);
5616 return RI.getPhysRegBaseClass(Reg);
5619 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5628 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5630 unsigned Size = RI.getRegSizeInBits(*RC);
5631 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5633 Opcode = AMDGPU::COPY;
5635 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5652 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
5662 Register NewSuperReg =
MRI.createVirtualRegister(SuperRC);
5668 .
addReg(NewSuperReg, 0, SubIdx);
5678 if (SubIdx == AMDGPU::sub0)
5680 if (SubIdx == AMDGPU::sub1)
5692void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5708 if (Reg.isPhysical())
5719 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5746 MO = &
MI.getOperand(OpIdx);
5758 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5764 if (!SGPRsUsed.
count(SGPR) &&
5767 if (--ConstantBusLimit <= 0)
5773 if (!LiteralLimit--)
5775 if (--ConstantBusLimit <= 0)
5789 unsigned Opc =
MI.getOpcode();
5797 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5798 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5799 MI.getOperand(DataIdx).isReg() &&
5800 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5802 if ((
int)OpIdx == DataIdx) {
5803 if (VDstIdx != -1 &&
5804 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5808 AMDGPU::OpName::data1);
5809 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5810 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5813 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5823 bool Is64BitOp = Is64BitFPOp ||
5836 if (!Is64BitFPOp && (int32_t)Imm < 0)
5854 unsigned Opc =
MI.getOpcode();
5873 if (Opc == AMDGPU::V_WRITELANE_B32) {
5876 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5882 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5899 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5901 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5913 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5915 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5927 if (HasImplicitSGPR || !
MI.isCommutable()) {
5944 if (CommutedOpc == -1) {
5949 MI.setDesc(
get(CommutedOpc));
5953 bool Src0Kill = Src0.
isKill();
5957 else if (Src1.
isReg()) {
5972 unsigned Opc =
MI.getOpcode();
5980 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5981 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5987 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5993 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6004 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6006 SGPRsUsed.
insert(SGPRReg);
6010 for (
int Idx : VOP3Idx) {
6019 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6044 if (ConstantBusLimit > 0) {
6056 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6057 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6066 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6070 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6072 get(TargetOpcode::COPY), NewSrcReg)
6079 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6085 for (
unsigned i = 0; i < SubRegs; ++i) {
6086 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6088 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6095 get(AMDGPU::REG_SEQUENCE), DstReg);
6096 for (
unsigned i = 0; i < SubRegs; ++i) {
6111 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6113 SBase->setReg(SGPR);
6125 if (OldSAddrIdx < 0)
6142 if (NewVAddrIdx < 0)
6149 if (OldVAddrIdx >= 0) {
6151 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6152 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6163 if (OldVAddrIdx == NewVAddrIdx) {
6166 MRI.removeRegOperandFromUseList(&NewVAddr);
6167 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6171 MRI.removeRegOperandFromUseList(&NewVAddr);
6172 MRI.addRegOperandToUseList(&NewVAddr);
6174 assert(OldSAddrIdx == NewVAddrIdx);
6176 if (OldVAddrIdx >= 0) {
6178 AMDGPU::OpName::vdst_in);
6182 if (NewVDstIn != -1) {
6189 if (NewVDstIn != -1) {
6228 unsigned OpSubReg =
Op.getSubReg();
6237 Register DstReg =
MRI.createVirtualRegister(DstRC);
6248 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6251 bool ImpDef = Def->isImplicitDef();
6252 while (!ImpDef && Def && Def->isCopy()) {
6253 if (Def->getOperand(1).getReg().isPhysical())
6255 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6256 ImpDef = Def && Def->isImplicitDef();
6258 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6273 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6274 unsigned SaveExecOpc =
6275 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6276 unsigned XorTermOpc =
6277 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6279 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6280 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6288 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6289 unsigned NumSubRegs =
RegSize / 32;
6290 Register VScalarOp = ScalarOp->getReg();
6292 if (NumSubRegs == 1) {
6293 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6295 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6298 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6300 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6306 CondReg = NewCondReg;
6308 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6316 ScalarOp->setReg(CurReg);
6317 ScalarOp->setIsKill();
6320 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6321 "Unhandled register size");
6323 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6324 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6325 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6328 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6329 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6332 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6333 .
addReg(VScalarOp, VScalarOpUndef,
6334 TRI->getSubRegFromChannel(
Idx + 1));
6340 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6341 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6347 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6348 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6351 if (NumSubRegs <= 2)
6352 Cmp.addReg(VScalarOp);
6354 Cmp.addReg(VScalarOp, VScalarOpUndef,
6355 TRI->getSubRegFromChannel(
Idx, 2));
6359 CondReg = NewCondReg;
6361 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6370 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6371 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6375 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6376 unsigned Channel = 0;
6377 for (
Register Piece : ReadlanePieces) {
6378 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6382 ScalarOp->setReg(SScalarOp);
6383 ScalarOp->setIsKill();
6387 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6388 MRI.setSimpleHint(SaveExec, CondReg);
6419 if (!Begin.isValid())
6421 if (!
End.isValid()) {
6426 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6427 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6428 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6435 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6441 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6450 for (
auto I = Begin;
I != AfterMI;
I++) {
6451 for (
auto &MO :
I->all_uses())
6452 MRI.clearKillFlags(MO.getReg());
6487 for (
auto &Succ : RemainderBB->
successors()) {
6510static std::tuple<unsigned, unsigned>
6518 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6519 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6522 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6523 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6524 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6525 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6526 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6534 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6538 .
addImm(RsrcDataFormat >> 32);
6543 .
addImm(AMDGPU::sub0_sub1)
6549 return std::tuple(RsrcPtr, NewSRsrc);
6586 if (
MI.getOpcode() == AMDGPU::PHI) {
6588 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6589 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6592 MRI.getRegClass(
MI.getOperand(i).getReg());
6607 VRC = &AMDGPU::VReg_1RegClass;
6623 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6625 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6641 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6648 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6650 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6668 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6673 if (DstRC != Src0RC) {
6682 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6690 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6691 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6692 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6693 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6694 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6709 : AMDGPU::OpName::srsrc;
6714 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6723 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6729 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6730 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6735 while (Start->getOpcode() != FrameSetupOpcode)
6738 while (
End->getOpcode() != FrameDestroyOpcode)
6742 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6743 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6751 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6753 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6764 bool isSoffsetLegal =
true;
6767 if (SoffsetIdx != -1) {
6771 isSoffsetLegal =
false;
6775 bool isRsrcLegal =
true;
6778 if (RsrcIdx != -1) {
6781 isRsrcLegal =
false;
6786 if (isRsrcLegal && isSoffsetLegal)
6810 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6811 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6812 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6814 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6815 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6816 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6818 unsigned RsrcPtr, NewSRsrc;
6825 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6832 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6850 "FIXME: Need to emit flat atomics here");
6852 unsigned RsrcPtr, NewSRsrc;
6855 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6878 MIB.
addImm(CPol->getImm());
6883 MIB.
addImm(TFE->getImm());
6903 MI.removeFromParent();
6908 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6910 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6914 if (!isSoffsetLegal) {
6926 if (!isSoffsetLegal) {
6935 InstrList.insert(
MI);
6939 if (RsrcIdx != -1) {
6940 DeferredList.insert(
MI);
6945 return DeferredList.contains(
MI);
6951 while (!Worklist.
empty()) {
6965 "Deferred MachineInstr are not supposed to re-populate worklist");
6983 case AMDGPU::S_ADD_U64_PSEUDO:
6984 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6986 case AMDGPU::S_SUB_U64_PSEUDO:
6987 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6989 case AMDGPU::S_ADD_I32:
6990 case AMDGPU::S_SUB_I32: {
6994 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7002 case AMDGPU::S_MUL_U64:
7004 splitScalarSMulU64(Worklist, Inst, MDT);
7008 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7009 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7012 splitScalarSMulPseudo(Worklist, Inst, MDT);
7016 case AMDGPU::S_AND_B64:
7017 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7021 case AMDGPU::S_OR_B64:
7022 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7026 case AMDGPU::S_XOR_B64:
7027 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7031 case AMDGPU::S_NAND_B64:
7032 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7036 case AMDGPU::S_NOR_B64:
7037 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7041 case AMDGPU::S_XNOR_B64:
7043 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7045 splitScalar64BitXnor(Worklist, Inst, MDT);
7049 case AMDGPU::S_ANDN2_B64:
7050 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7054 case AMDGPU::S_ORN2_B64:
7055 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7059 case AMDGPU::S_BREV_B64:
7060 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7064 case AMDGPU::S_NOT_B64:
7065 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7069 case AMDGPU::S_BCNT1_I32_B64:
7070 splitScalar64BitBCNT(Worklist, Inst);
7074 case AMDGPU::S_BFE_I64:
7075 splitScalar64BitBFE(Worklist, Inst);
7079 case AMDGPU::S_FLBIT_I32_B64:
7080 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7083 case AMDGPU::S_FF1_I32_B64:
7084 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7088 case AMDGPU::S_LSHL_B32:
7090 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7094 case AMDGPU::S_ASHR_I32:
7096 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7100 case AMDGPU::S_LSHR_B32:
7102 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7106 case AMDGPU::S_LSHL_B64:
7109 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7110 : AMDGPU::V_LSHLREV_B64_e64;
7114 case AMDGPU::S_ASHR_I64:
7116 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7120 case AMDGPU::S_LSHR_B64:
7122 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7127 case AMDGPU::S_ABS_I32:
7128 lowerScalarAbs(Worklist, Inst);
7132 case AMDGPU::S_CBRANCH_SCC0:
7133 case AMDGPU::S_CBRANCH_SCC1: {
7136 bool IsSCC = CondReg == AMDGPU::SCC;
7139 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7142 .
addReg(IsSCC ? VCC : CondReg);
7146 case AMDGPU::S_BFE_U64:
7147 case AMDGPU::S_BFM_B64:
7150 case AMDGPU::S_PACK_LL_B32_B16:
7151 case AMDGPU::S_PACK_LH_B32_B16:
7152 case AMDGPU::S_PACK_HL_B32_B16:
7153 case AMDGPU::S_PACK_HH_B32_B16:
7154 movePackToVALU(Worklist,
MRI, Inst);
7158 case AMDGPU::S_XNOR_B32:
7159 lowerScalarXnor(Worklist, Inst);
7163 case AMDGPU::S_NAND_B32:
7164 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7168 case AMDGPU::S_NOR_B32:
7169 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7173 case AMDGPU::S_ANDN2_B32:
7174 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7178 case AMDGPU::S_ORN2_B32:
7179 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7187 case AMDGPU::S_ADD_CO_PSEUDO:
7188 case AMDGPU::S_SUB_CO_PSEUDO: {
7189 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7190 ? AMDGPU::V_ADDC_U32_e64
7191 : AMDGPU::V_SUBB_U32_e64;
7192 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7195 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7196 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7214 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7218 case AMDGPU::S_UADDO_PSEUDO:
7219 case AMDGPU::S_USUBO_PSEUDO: {
7226 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7227 ? AMDGPU::V_ADD_CO_U32_e64
7228 : AMDGPU::V_SUB_CO_U32_e64;
7231 Register DestReg =
MRI.createVirtualRegister(NewRC);
7239 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7246 case AMDGPU::S_CSELECT_B32:
7247 case AMDGPU::S_CSELECT_B64:
7248 lowerSelect(Worklist, Inst, MDT);
7251 case AMDGPU::S_CMP_EQ_I32:
7252 case AMDGPU::S_CMP_LG_I32:
7253 case AMDGPU::S_CMP_GT_I32:
7254 case AMDGPU::S_CMP_GE_I32:
7255 case AMDGPU::S_CMP_LT_I32:
7256 case AMDGPU::S_CMP_LE_I32:
7257 case AMDGPU::S_CMP_EQ_U32:
7258 case AMDGPU::S_CMP_LG_U32:
7259 case AMDGPU::S_CMP_GT_U32:
7260 case AMDGPU::S_CMP_GE_U32:
7261 case AMDGPU::S_CMP_LT_U32:
7262 case AMDGPU::S_CMP_LE_U32:
7263 case AMDGPU::S_CMP_EQ_U64:
7264 case AMDGPU::S_CMP_LG_U64:
7265 case AMDGPU::S_CMP_LT_F32:
7266 case AMDGPU::S_CMP_EQ_F32:
7267 case AMDGPU::S_CMP_LE_F32:
7268 case AMDGPU::S_CMP_GT_F32:
7269 case AMDGPU::S_CMP_LG_F32:
7270 case AMDGPU::S_CMP_GE_F32:
7271 case AMDGPU::S_CMP_O_F32:
7272 case AMDGPU::S_CMP_U_F32:
7273 case AMDGPU::S_CMP_NGE_F32:
7274 case AMDGPU::S_CMP_NLG_F32:
7275 case AMDGPU::S_CMP_NGT_F32:
7276 case AMDGPU::S_CMP_NLE_F32:
7277 case AMDGPU::S_CMP_NEQ_F32:
7278 case AMDGPU::S_CMP_NLT_F32:
7279 case AMDGPU::S_CMP_LT_F16:
7280 case AMDGPU::S_CMP_EQ_F16:
7281 case AMDGPU::S_CMP_LE_F16:
7282 case AMDGPU::S_CMP_GT_F16:
7283 case AMDGPU::S_CMP_LG_F16:
7284 case AMDGPU::S_CMP_GE_F16:
7285 case AMDGPU::S_CMP_O_F16:
7286 case AMDGPU::S_CMP_U_F16:
7287 case AMDGPU::S_CMP_NGE_F16:
7288 case AMDGPU::S_CMP_NLG_F16:
7289 case AMDGPU::S_CMP_NGT_F16:
7290 case AMDGPU::S_CMP_NLE_F16:
7291 case AMDGPU::S_CMP_NEQ_F16:
7292 case AMDGPU::S_CMP_NLT_F16: {
7298 AMDGPU::OpName::src0_modifiers) >= 0) {
7313 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7317 case AMDGPU::S_CVT_HI_F32_F16: {
7319 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7320 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7331 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7335 case AMDGPU::S_MINIMUM_F32:
7336 case AMDGPU::S_MAXIMUM_F32:
7337 case AMDGPU::S_MINIMUM_F16:
7338 case AMDGPU::S_MAXIMUM_F16: {
7340 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7351 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7357 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7365 if (NewOpcode == Opcode) {
7389 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7401 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7402 MRI.replaceRegWith(DstReg, NewDstReg);
7404 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7415 AMDGPU::OpName::src0_modifiers) >= 0)
7420 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7421 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7423 NewInstr->addOperand(Src);
7426 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7429 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7431 NewInstr.addImm(
Size);
7432 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7436 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7441 "Scalar BFE is only implemented for constant width and offset");
7450 AMDGPU::OpName::src1_modifiers) >= 0)
7455 AMDGPU::OpName::src2_modifiers) >= 0)
7469 NewInstr->addOperand(
Op);
7476 if (
Op.getReg() == AMDGPU::SCC) {
7478 if (
Op.isDef() && !
Op.isDead())
7479 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7481 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7486 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7487 Register DstReg = NewInstr->getOperand(0).getReg();
7492 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7493 MRI.replaceRegWith(DstReg, NewDstReg);
7499 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7503std::pair<bool, MachineBasicBlock *>
7515 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7518 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7520 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7521 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7529 MRI.replaceRegWith(OldDstReg, ResultReg);
7532 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7533 return std::pair(
true, NewBB);
7536 return std::pair(
false,
nullptr);
7553 bool IsSCC = (CondReg == AMDGPU::SCC);
7561 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7569 NewCondReg =
MRI.createVirtualRegister(TC);
7573 bool CopyFound =
false;
7577 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7579 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7581 .
addReg(CandI.getOperand(1).getReg());
7593 : AMDGPU::S_CSELECT_B32;
7603 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7604 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7617 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7619 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7631 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7632 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7635 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7645 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7646 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7661 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7669 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7670 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7676 bool Src0IsSGPR = Src0.
isReg() &&
7678 bool Src1IsSGPR = Src1.
isReg() &&
7681 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7682 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7692 }
else if (Src1IsSGPR) {
7706 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7710 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7716 unsigned Opcode)
const {
7726 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7727 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7739 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7740 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7745 unsigned Opcode)
const {
7755 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7756 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7768 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7769 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7787 &AMDGPU::SGPR_32RegClass;
7790 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7793 AMDGPU::sub0, Src0SubRC);
7798 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7800 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7804 AMDGPU::sub1, Src0SubRC);
7806 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7812 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7819 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7821 Worklist.
insert(&LoHalf);
7822 Worklist.
insert(&HiHalf);
7828 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7839 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7840 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7841 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7852 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7856 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7886 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7892 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7898 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7909 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7925 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7937 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7948 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7949 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7950 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7961 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7965 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7977 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7978 ? AMDGPU::V_MUL_HI_U32_e64
7979 : AMDGPU::V_MUL_HI_I32_e64;
7994 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8002 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8021 &AMDGPU::SGPR_32RegClass;
8024 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8027 &AMDGPU::SGPR_32RegClass;
8030 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8033 AMDGPU::sub0, Src0SubRC);
8035 AMDGPU::sub0, Src1SubRC);
8037 AMDGPU::sub1, Src0SubRC);
8039 AMDGPU::sub1, Src1SubRC);
8044 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8046 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8051 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8056 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8063 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8065 Worklist.
insert(&LoHalf);
8066 Worklist.
insert(&HiHalf);
8069 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8087 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8103 Register NewDest =
MRI.createVirtualRegister(DestRC);
8109 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8127 MRI.getRegClass(Src.getReg()) :
8128 &AMDGPU::SGPR_32RegClass;
8130 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8131 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8134 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8137 AMDGPU::sub0, SrcSubRC);
8139 AMDGPU::sub1, SrcSubRC);
8145 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8149 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8168 Offset == 0 &&
"Not implemented");
8171 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8172 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8173 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8190 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8191 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8196 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8197 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8201 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8204 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8209 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8210 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8231 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8232 unsigned OpcodeAdd =
8233 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8236 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8238 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8245 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8246 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8247 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8248 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8255 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8261 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8263 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8265 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8268void SIInstrInfo::addUsersToMoveToVALUWorklist(
8272 E =
MRI.use_end();
I != E;) {
8277 switch (
UseMI.getOpcode()) {
8280 case AMDGPU::SOFT_WQM:
8281 case AMDGPU::STRICT_WWM:
8282 case AMDGPU::STRICT_WQM:
8283 case AMDGPU::REG_SEQUENCE:
8285 case AMDGPU::INSERT_SUBREG:
8288 OpNo =
I.getOperandNo();
8297 }
while (
I != E &&
I->getParent() == &
UseMI);
8307 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8314 case AMDGPU::S_PACK_LL_B32_B16: {
8315 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8316 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8333 case AMDGPU::S_PACK_LH_B32_B16: {
8334 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8343 case AMDGPU::S_PACK_HL_B32_B16: {
8344 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8354 case AMDGPU::S_PACK_HH_B32_B16: {
8355 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8356 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8373 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8374 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8383 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8384 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8392 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8396 Register DestReg =
MI.getOperand(0).getReg();
8398 MRI.replaceRegWith(DestReg, NewCond);
8403 MI.getOperand(SCCIdx).setReg(NewCond);
8409 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8412 for (
auto &Copy : CopyToDelete)
8413 Copy->eraseFromParent();
8421void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8430 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8432 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8449 case AMDGPU::REG_SEQUENCE:
8450 case AMDGPU::INSERT_SUBREG:
8452 case AMDGPU::SOFT_WQM:
8453 case AMDGPU::STRICT_WWM:
8454 case AMDGPU::STRICT_WQM: {
8462 case AMDGPU::REG_SEQUENCE:
8463 case AMDGPU::INSERT_SUBREG:
8473 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8490 int OpIndices[3])
const {
8509 for (
unsigned i = 0; i < 3; ++i) {
8510 int Idx = OpIndices[i];
8547 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8548 SGPRReg = UsedSGPRs[0];
8551 if (!SGPRReg && UsedSGPRs[1]) {
8552 if (UsedSGPRs[1] == UsedSGPRs[2])
8553 SGPRReg = UsedSGPRs[1];
8560 unsigned OperandName)
const {
8565 return &
MI.getOperand(
Idx);
8582 RsrcDataFormat |= (1ULL << 56);
8587 RsrcDataFormat |= (2ULL << 59);
8590 return RsrcDataFormat;
8612 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8618 unsigned Opc =
MI.getOpcode();
8624 return get(Opc).mayLoad() &&
8629 int &FrameIndex)
const {
8637 FrameIndex =
Addr->getIndex();
8642 int &FrameIndex)
const {
8645 FrameIndex =
Addr->getIndex();
8650 int &FrameIndex)
const {
8664 int &FrameIndex)
const {
8681 while (++
I != E &&
I->isInsideBundle()) {
8682 assert(!
I->isBundle() &&
"No nested bundle!");
8690 unsigned Opc =
MI.getOpcode();
8692 unsigned DescSize =
Desc.getSize();
8697 unsigned Size = DescSize;
8712 bool HasLiteral =
false;
8713 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8721 return HasLiteral ? DescSize + 4 : DescSize;
8731 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8735 case TargetOpcode::BUNDLE:
8737 case TargetOpcode::INLINEASM:
8738 case TargetOpcode::INLINEASM_BR: {
8740 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8744 if (
MI.isMetaInstruction())
8754 if (
MI.memoperands_empty())
8765 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8777 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8780 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8781 .
add(Branch->getOperand(0))
8782 .
add(Branch->getOperand(1));
8784 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8803 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8808 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8810 if (PMBB == LoopEnd) {
8811 HeaderPHIBuilder.
addReg(BackEdgeReg);
8816 HeaderPHIBuilder.
addReg(ZeroReg);
8818 HeaderPHIBuilder.
addMBB(PMBB);
8822 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8824 .
add(Branch->getOperand(0));
8826 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8832 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8833 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8839 static const std::pair<int, const char *> TargetIndices[] = {
8877std::pair<unsigned, unsigned>
8884 static const std::pair<unsigned, const char *> TargetFlags[] = {
8899 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8913 return AMDGPU::WWM_COPY;
8915 return AMDGPU::COPY;
8926 bool IsNullOrVectorRegister =
true;
8935 return IsNullOrVectorRegister &&
8936 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8937 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8950 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8981 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8982 case AMDGPU::SI_KILL_I1_TERMINATOR:
8991 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8992 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8993 case AMDGPU::SI_KILL_I1_PSEUDO:
8994 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9006 const unsigned OffsetBits =
9008 return (1 << OffsetBits) - 1;
9015 if (
MI.isInlineAsm())
9018 for (
auto &
Op :
MI.implicit_operands()) {
9019 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9020 Op.setReg(AMDGPU::VCC_LO);
9033 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9051 if (Imm <= MaxImm + 64) {
9053 Overflow = Imm - MaxImm;
9143std::pair<int64_t, int64_t>
9146 int64_t RemainderOffset = COffsetVal;
9147 int64_t ImmField = 0;
9152 if (AllowNegative) {
9154 int64_t
D = 1LL << NumBits;
9155 RemainderOffset = (COffsetVal /
D) *
D;
9156 ImmField = COffsetVal - RemainderOffset;
9160 (ImmField % 4) != 0) {
9162 RemainderOffset += ImmField % 4;
9163 ImmField -= ImmField % 4;
9165 }
else if (COffsetVal >= 0) {
9166 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9167 RemainderOffset = COffsetVal - ImmField;
9171 assert(RemainderOffset + ImmField == COffsetVal);
9172 return {ImmField, RemainderOffset};
9184 switch (ST.getGeneration()) {
9209 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9210 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9211 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9212 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9213 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9214 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9215 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9216 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9252 if (
isMAI(Opcode)) {
9297 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9298 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9299 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9311 switch (
MI.getOpcode()) {
9313 case AMDGPU::REG_SEQUENCE:
9317 case AMDGPU::INSERT_SUBREG:
9318 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9335 if (!
P.Reg.isVirtual())
9339 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9340 while (
auto *
MI = DefInst) {
9342 switch (
MI->getOpcode()) {
9344 case AMDGPU::V_MOV_B32_e32: {
9345 auto &Op1 =
MI->getOperand(1);
9350 DefInst =
MRI.getVRegDef(RSR.Reg);
9358 DefInst =
MRI.getVRegDef(RSR.Reg);
9371 assert(
MRI.isSSA() &&
"Must be run on SSA");
9373 auto *
TRI =
MRI.getTargetRegisterInfo();
9374 auto *DefBB =
DefMI.getParent();
9378 if (
UseMI.getParent() != DefBB)
9381 const int MaxInstScan = 20;
9385 auto E =
UseMI.getIterator();
9386 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9387 if (
I->isDebugInstr())
9390 if (++NumInst > MaxInstScan)
9393 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9403 assert(
MRI.isSSA() &&
"Must be run on SSA");
9405 auto *
TRI =
MRI.getTargetRegisterInfo();
9406 auto *DefBB =
DefMI.getParent();
9408 const int MaxUseScan = 10;
9411 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9412 auto &UseInst = *
Use.getParent();
9415 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9418 if (++NumUse > MaxUseScan)
9425 const int MaxInstScan = 20;
9429 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9432 if (
I->isDebugInstr())
9435 if (++NumInst > MaxInstScan)
9448 if (Reg == VReg && --NumUse == 0)
9450 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9462 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9465 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9474 if (InsPt !=
MBB.
end() &&
9475 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9476 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9477 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9478 InsPt->definesRegister(Src,
nullptr)) {
9482 : AMDGPU::S_MOV_B64_term),
9484 .
addReg(Src, 0, SrcSubReg)
9509 if (isFullCopyInstr(
MI)) {
9518 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9521 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9532 unsigned *PredCost)
const {
9533 if (
MI.isBundle()) {
9536 unsigned Lat = 0, Count = 0;
9537 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9539 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9541 return Lat + Count - 1;
9544 return SchedModel.computeInstrLatency(&
MI);
9549 unsigned opcode =
MI.getOpcode();
9550 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9551 auto IID = GI->getIntrinsicID();
9558 case Intrinsic::amdgcn_if:
9559 case Intrinsic::amdgcn_else:
9573 if (opcode == AMDGPU::G_LOAD) {
9574 if (
MI.memoperands_empty())
9578 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9579 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9587 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9588 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9589 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9602 unsigned opcode =
MI.getOpcode();
9603 if (opcode == AMDGPU::V_READLANE_B32 ||
9604 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9605 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9608 if (isCopyInstr(
MI)) {
9612 RI.getPhysRegBaseClass(srcOp.
getReg());
9620 if (
MI.isPreISelOpcode())
9635 if (
MI.memoperands_empty())
9639 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9640 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9655 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9661 if (!Reg || !
SrcOp.readsReg())
9667 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9704 Register &SrcReg2, int64_t &CmpMask,
9705 int64_t &CmpValue)
const {
9706 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9709 switch (
MI.getOpcode()) {
9712 case AMDGPU::S_CMP_EQ_U32:
9713 case AMDGPU::S_CMP_EQ_I32:
9714 case AMDGPU::S_CMP_LG_U32:
9715 case AMDGPU::S_CMP_LG_I32:
9716 case AMDGPU::S_CMP_LT_U32:
9717 case AMDGPU::S_CMP_LT_I32:
9718 case AMDGPU::S_CMP_GT_U32:
9719 case AMDGPU::S_CMP_GT_I32:
9720 case AMDGPU::S_CMP_LE_U32:
9721 case AMDGPU::S_CMP_LE_I32:
9722 case AMDGPU::S_CMP_GE_U32:
9723 case AMDGPU::S_CMP_GE_I32:
9724 case AMDGPU::S_CMP_EQ_U64:
9725 case AMDGPU::S_CMP_LG_U64:
9726 SrcReg =
MI.getOperand(0).getReg();
9727 if (
MI.getOperand(1).isReg()) {
9728 if (
MI.getOperand(1).getSubReg())
9730 SrcReg2 =
MI.getOperand(1).getReg();
9732 }
else if (
MI.getOperand(1).isImm()) {
9734 CmpValue =
MI.getOperand(1).getImm();
9740 case AMDGPU::S_CMPK_EQ_U32:
9741 case AMDGPU::S_CMPK_EQ_I32:
9742 case AMDGPU::S_CMPK_LG_U32:
9743 case AMDGPU::S_CMPK_LG_I32:
9744 case AMDGPU::S_CMPK_LT_U32:
9745 case AMDGPU::S_CMPK_LT_I32:
9746 case AMDGPU::S_CMPK_GT_U32:
9747 case AMDGPU::S_CMPK_GT_I32:
9748 case AMDGPU::S_CMPK_LE_U32:
9749 case AMDGPU::S_CMPK_LE_I32:
9750 case AMDGPU::S_CMPK_GE_U32:
9751 case AMDGPU::S_CMPK_GE_I32:
9752 SrcReg =
MI.getOperand(0).getReg();
9754 CmpValue =
MI.getOperand(1).getImm();
9772 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9773 this](int64_t ExpectedValue,
unsigned SrcSize,
9774 bool IsReversible,
bool IsSigned) ->
bool {
9799 if (!Def || Def->getParent() != CmpInstr.
getParent())
9802 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9803 Def->getOpcode() != AMDGPU::S_AND_B64)
9807 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9818 SrcOp = &Def->getOperand(2);
9819 else if (isMask(&Def->getOperand(2)))
9820 SrcOp = &Def->getOperand(1);
9825 if (IsSigned && BitNo == SrcSize - 1)
9828 ExpectedValue <<= BitNo;
9830 bool IsReversedCC =
false;
9831 if (CmpValue != ExpectedValue) {
9834 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9839 Register DefReg = Def->getOperand(0).getReg();
9840 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9843 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9845 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9846 I->killsRegister(AMDGPU::SCC, &RI))
9851 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9855 if (!
MRI->use_nodbg_empty(DefReg)) {
9863 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9864 : AMDGPU::S_BITCMP1_B32
9865 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9866 : AMDGPU::S_BITCMP1_B64;
9871 Def->eraseFromParent();
9879 case AMDGPU::S_CMP_EQ_U32:
9880 case AMDGPU::S_CMP_EQ_I32:
9881 case AMDGPU::S_CMPK_EQ_U32:
9882 case AMDGPU::S_CMPK_EQ_I32:
9883 return optimizeCmpAnd(1, 32,
true,
false);
9884 case AMDGPU::S_CMP_GE_U32:
9885 case AMDGPU::S_CMPK_GE_U32:
9886 return optimizeCmpAnd(1, 32,
false,
false);
9887 case AMDGPU::S_CMP_GE_I32:
9888 case AMDGPU::S_CMPK_GE_I32:
9889 return optimizeCmpAnd(1, 32,
false,
true);
9890 case AMDGPU::S_CMP_EQ_U64:
9891 return optimizeCmpAnd(1, 64,
true,
false);
9892 case AMDGPU::S_CMP_LG_U32:
9893 case AMDGPU::S_CMP_LG_I32:
9894 case AMDGPU::S_CMPK_LG_U32:
9895 case AMDGPU::S_CMPK_LG_I32:
9896 return optimizeCmpAnd(0, 32,
true,
false);
9897 case AMDGPU::S_CMP_GT_U32:
9898 case AMDGPU::S_CMPK_GT_U32:
9899 return optimizeCmpAnd(0, 32,
false,
false);
9900 case AMDGPU::S_CMP_GT_I32:
9901 case AMDGPU::S_CMPK_GT_I32:
9902 return optimizeCmpAnd(0, 32,
false,
true);
9903 case AMDGPU::S_CMP_LG_U64:
9904 return optimizeCmpAnd(0, 64,
true,
false);
9929 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9932 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9933 : &AMDGPU::VReg_64_Align2RegClass);
9935 .
addReg(DataReg, 0,
Op.getSubReg())
9940 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.