31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
235 int64_t &Offset1)
const {
243 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
247 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
265 if (Offset0Idx == -1 || Offset1Idx == -1)
272 Offset0Idx -=
get(Opc0).NumDefs;
273 Offset1Idx -=
get(Opc1).NumDefs;
294 assert(NumOps == 4 || NumOps == 5);
299 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
301 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
303 if (!Load0Offset || !Load1Offset)
323 if (OffIdx0 == -1 || OffIdx1 == -1)
329 OffIdx0 -=
get(Opc0).NumDefs;
330 OffIdx1 -=
get(Opc1).NumDefs;
336 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
349 case AMDGPU::DS_READ2ST64_B32:
350 case AMDGPU::DS_READ2ST64_B64:
351 case AMDGPU::DS_WRITE2ST64_B32:
352 case AMDGPU::DS_WRITE2ST64_B64:
367 OffsetIsScalable =
false;
397 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
398 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
399 if (Offset0 + 1 != Offset1)
418 Offset = EltSize * Offset0;
421 if (DataOpIdx == -1) {
439 if (BaseOp && !BaseOp->
isFI())
447 if (SOffset->
isReg())
464 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
468 if (VAddr0Idx >= 0) {
470 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
526 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
534 if (MO1->getAddrSpace() != MO2->getAddrSpace())
537 auto Base1 = MO1->getValue();
538 auto Base2 = MO2->getValue();
539 if (!Base1 || !Base2)
544 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
547 return Base1 == Base2;
551 int64_t Offset1,
bool OffsetIsScalable1,
553 int64_t Offset2,
bool OffsetIsScalable2,
554 unsigned ClusterSize,
555 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
580 const unsigned LoadSize = NumBytes / ClusterSize;
581 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
582 return NumDWORDs <= 8;
596 int64_t Offset0, int64_t Offset1,
597 unsigned NumLoads)
const {
598 assert(Offset1 > Offset0 &&
599 "Second offset should be larger than first offset!");
604 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
611 const char *Msg =
"illegal VGPR to SGPR copy") {
615 C.diagnose(IllegalCopy);
632 assert((
TII.getSubtarget().hasMAIInsts() &&
633 !
TII.getSubtarget().hasGFX90AInsts()) &&
634 "Expected GFX908 subtarget.");
637 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
638 "Source register of the copy should be either an SGPR or an AGPR.");
641 "Destination register of the copy should be an AGPR.");
650 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
653 if (!Def->modifiesRegister(SrcReg, &RI))
656 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
657 Def->getOperand(0).getReg() != SrcReg)
664 bool SafeToPropagate =
true;
667 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
668 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
669 SafeToPropagate =
false;
671 if (!SafeToPropagate)
683 if (ImpUseSuperReg) {
684 Builder.
addReg(ImpUseSuperReg,
702 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
706 "VGPR used for an intermediate copy should have been reserved.");
721 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
722 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
723 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
730 if (ImpUseSuperReg) {
731 UseBuilder.
addReg(ImpUseSuperReg,
753 int16_t SubIdx = BaseIndices[
Idx];
754 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
755 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
756 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
757 unsigned Opcode = AMDGPU::S_MOV_B32;
760 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
761 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
762 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
766 DestSubReg = RI.getSubReg(DestReg, SubIdx);
767 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
768 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
769 Opcode = AMDGPU::S_MOV_B64;
784 assert(FirstMI && LastMI);
792 LastMI->addRegisterKilled(SrcReg, &RI);
800 unsigned Size = RI.getRegSizeInBits(*RC);
802 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
808 if (((
Size == 16) != (SrcSize == 16))) {
815 if (DestReg == SrcReg) {
821 RC = RI.getPhysRegBaseClass(DestReg);
822 Size = RI.getRegSizeInBits(*RC);
823 SrcRC = RI.getPhysRegBaseClass(SrcReg);
824 SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (RC == &AMDGPU::VGPR_32RegClass) {
830 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
831 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
832 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
833 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
839 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
840 RC == &AMDGPU::SReg_32RegClass) {
841 if (SrcReg == AMDGPU::SCC) {
848 if (DestReg == AMDGPU::VCC_LO) {
849 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
863 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
873 if (RC == &AMDGPU::SReg_64RegClass) {
874 if (SrcReg == AMDGPU::SCC) {
881 if (DestReg == AMDGPU::VCC) {
882 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
896 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
906 if (DestReg == AMDGPU::SCC) {
909 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 if (RC == &AMDGPU::AGPR_32RegClass) {
928 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
929 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
951 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
952 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
954 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
955 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
956 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
957 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 if (IsAGPRDst || IsAGPRSrc) {
975 if (!DstLow || !SrcLow) {
977 "Cannot use hi16 subreg with an AGPR!");
990 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
991 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1004 if (!DstLow || !SrcLow) {
1006 "Cannot use hi16 subreg on VI!");
1057 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1063 unsigned EltSize = 4;
1064 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1067 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1070 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1072 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1074 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1080 Opcode = AMDGPU::V_MOV_B64_e32;
1083 Opcode = AMDGPU::V_PK_MOV_B32;
1093 std::unique_ptr<RegScavenger> RS;
1094 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1101 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1102 const bool CanKillSuperReg = KillSrc && !Overlap;
1107 SubIdx = SubIndices[
Idx];
1109 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1110 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1111 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1112 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1114 bool IsFirstSubreg =
Idx == 0;
1115 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1117 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1121 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1122 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1169 int64_t
Value)
const {
1172 if (RegClass == &AMDGPU::SReg_32RegClass ||
1173 RegClass == &AMDGPU::SGPR_32RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1175 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1181 if (RegClass == &AMDGPU::SReg_64RegClass ||
1182 RegClass == &AMDGPU::SGPR_64RegClass ||
1183 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1189 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1200 unsigned EltSize = 4;
1201 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1203 if (RI.getRegSizeInBits(*RegClass) > 32) {
1204 Opcode = AMDGPU::S_MOV_B64;
1207 Opcode = AMDGPU::S_MOV_B32;
1214 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1217 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1218 Builder.
addImm(IdxValue);
1224 return &AMDGPU::VGPR_32RegClass;
1236 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1237 "Not a VGPR32 reg");
1239 if (
Cond.size() == 1) {
1240 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1249 }
else if (
Cond.size() == 2) {
1251 switch (
Cond[0].getImm()) {
1252 case SIInstrInfo::SCC_TRUE: {
1253 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 : AMDGPU::S_CSELECT_B64), SReg)
1266 case SIInstrInfo::SCC_FALSE: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 : AMDGPU::S_CSELECT_B64), SReg)
1280 case SIInstrInfo::VCCNZ: {
1283 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1294 case SIInstrInfo::VCCZ: {
1297 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1308 case SIInstrInfo::EXECNZ: {
1309 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1312 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1315 : AMDGPU::S_CSELECT_B64), SReg)
1326 case SIInstrInfo::EXECZ: {
1327 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1330 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1333 : AMDGPU::S_CSELECT_B64), SReg)
1382 return AMDGPU::COPY;
1383 if (RI.getRegSizeInBits(*DstRC) == 16) {
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1390 return AMDGPU::S_MOV_B64;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1394 return AMDGPU::COPY;
1399 bool IsIndirectSrc)
const {
1400 if (IsIndirectSrc) {
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1451 if (VecSize <= 1024)
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1480 if (VecSize <= 1024)
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1524 if (VecSize <= 1024)
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1532 bool IsSGPR)
const {
1544 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1551 return AMDGPU::SI_SPILL_S32_SAVE;
1553 return AMDGPU::SI_SPILL_S64_SAVE;
1555 return AMDGPU::SI_SPILL_S96_SAVE;
1557 return AMDGPU::SI_SPILL_S128_SAVE;
1559 return AMDGPU::SI_SPILL_S160_SAVE;
1561 return AMDGPU::SI_SPILL_S192_SAVE;
1563 return AMDGPU::SI_SPILL_S224_SAVE;
1565 return AMDGPU::SI_SPILL_S256_SAVE;
1567 return AMDGPU::SI_SPILL_S288_SAVE;
1569 return AMDGPU::SI_SPILL_S320_SAVE;
1571 return AMDGPU::SI_SPILL_S352_SAVE;
1573 return AMDGPU::SI_SPILL_S384_SAVE;
1575 return AMDGPU::SI_SPILL_S512_SAVE;
1577 return AMDGPU::SI_SPILL_S1024_SAVE;
1586 return AMDGPU::SI_SPILL_V32_SAVE;
1588 return AMDGPU::SI_SPILL_V64_SAVE;
1590 return AMDGPU::SI_SPILL_V96_SAVE;
1592 return AMDGPU::SI_SPILL_V128_SAVE;
1594 return AMDGPU::SI_SPILL_V160_SAVE;
1596 return AMDGPU::SI_SPILL_V192_SAVE;
1598 return AMDGPU::SI_SPILL_V224_SAVE;
1600 return AMDGPU::SI_SPILL_V256_SAVE;
1602 return AMDGPU::SI_SPILL_V288_SAVE;
1604 return AMDGPU::SI_SPILL_V320_SAVE;
1606 return AMDGPU::SI_SPILL_V352_SAVE;
1608 return AMDGPU::SI_SPILL_V384_SAVE;
1610 return AMDGPU::SI_SPILL_V512_SAVE;
1612 return AMDGPU::SI_SPILL_V1024_SAVE;
1621 return AMDGPU::SI_SPILL_A32_SAVE;
1623 return AMDGPU::SI_SPILL_A64_SAVE;
1625 return AMDGPU::SI_SPILL_A96_SAVE;
1627 return AMDGPU::SI_SPILL_A128_SAVE;
1629 return AMDGPU::SI_SPILL_A160_SAVE;
1631 return AMDGPU::SI_SPILL_A192_SAVE;
1633 return AMDGPU::SI_SPILL_A224_SAVE;
1635 return AMDGPU::SI_SPILL_A256_SAVE;
1637 return AMDGPU::SI_SPILL_A288_SAVE;
1639 return AMDGPU::SI_SPILL_A320_SAVE;
1641 return AMDGPU::SI_SPILL_A352_SAVE;
1643 return AMDGPU::SI_SPILL_A384_SAVE;
1645 return AMDGPU::SI_SPILL_A512_SAVE;
1647 return AMDGPU::SI_SPILL_A1024_SAVE;
1656 return AMDGPU::SI_SPILL_AV32_SAVE;
1658 return AMDGPU::SI_SPILL_AV64_SAVE;
1660 return AMDGPU::SI_SPILL_AV96_SAVE;
1662 return AMDGPU::SI_SPILL_AV128_SAVE;
1664 return AMDGPU::SI_SPILL_AV160_SAVE;
1666 return AMDGPU::SI_SPILL_AV192_SAVE;
1668 return AMDGPU::SI_SPILL_AV224_SAVE;
1670 return AMDGPU::SI_SPILL_AV256_SAVE;
1672 return AMDGPU::SI_SPILL_AV288_SAVE;
1674 return AMDGPU::SI_SPILL_AV320_SAVE;
1676 return AMDGPU::SI_SPILL_AV352_SAVE;
1678 return AMDGPU::SI_SPILL_AV384_SAVE;
1680 return AMDGPU::SI_SPILL_AV512_SAVE;
1682 return AMDGPU::SI_SPILL_AV1024_SAVE;
1689 bool IsVectorSuperClass) {
1694 if (IsVectorSuperClass)
1695 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1697 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1711 if (IsVectorSuperClass)
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize =
TRI->getSpillSize(*RC);
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1748 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1763 SpillSize, RI, *MFI);
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V32_RESTORE;
1814 return AMDGPU::SI_SPILL_V64_RESTORE;
1816 return AMDGPU::SI_SPILL_V96_RESTORE;
1818 return AMDGPU::SI_SPILL_V128_RESTORE;
1820 return AMDGPU::SI_SPILL_V160_RESTORE;
1822 return AMDGPU::SI_SPILL_V192_RESTORE;
1824 return AMDGPU::SI_SPILL_V224_RESTORE;
1826 return AMDGPU::SI_SPILL_V256_RESTORE;
1828 return AMDGPU::SI_SPILL_V288_RESTORE;
1830 return AMDGPU::SI_SPILL_V320_RESTORE;
1832 return AMDGPU::SI_SPILL_V352_RESTORE;
1834 return AMDGPU::SI_SPILL_V384_RESTORE;
1836 return AMDGPU::SI_SPILL_V512_RESTORE;
1838 return AMDGPU::SI_SPILL_V1024_RESTORE;
1847 return AMDGPU::SI_SPILL_A32_RESTORE;
1849 return AMDGPU::SI_SPILL_A64_RESTORE;
1851 return AMDGPU::SI_SPILL_A96_RESTORE;
1853 return AMDGPU::SI_SPILL_A128_RESTORE;
1855 return AMDGPU::SI_SPILL_A160_RESTORE;
1857 return AMDGPU::SI_SPILL_A192_RESTORE;
1859 return AMDGPU::SI_SPILL_A224_RESTORE;
1861 return AMDGPU::SI_SPILL_A256_RESTORE;
1863 return AMDGPU::SI_SPILL_A288_RESTORE;
1865 return AMDGPU::SI_SPILL_A320_RESTORE;
1867 return AMDGPU::SI_SPILL_A352_RESTORE;
1869 return AMDGPU::SI_SPILL_A384_RESTORE;
1871 return AMDGPU::SI_SPILL_A512_RESTORE;
1873 return AMDGPU::SI_SPILL_A1024_RESTORE;
1882 return AMDGPU::SI_SPILL_AV32_RESTORE;
1884 return AMDGPU::SI_SPILL_AV64_RESTORE;
1886 return AMDGPU::SI_SPILL_AV96_RESTORE;
1888 return AMDGPU::SI_SPILL_AV128_RESTORE;
1890 return AMDGPU::SI_SPILL_AV160_RESTORE;
1892 return AMDGPU::SI_SPILL_AV192_RESTORE;
1894 return AMDGPU::SI_SPILL_AV224_RESTORE;
1896 return AMDGPU::SI_SPILL_AV256_RESTORE;
1898 return AMDGPU::SI_SPILL_AV288_RESTORE;
1900 return AMDGPU::SI_SPILL_AV320_RESTORE;
1902 return AMDGPU::SI_SPILL_AV352_RESTORE;
1904 return AMDGPU::SI_SPILL_AV384_RESTORE;
1906 return AMDGPU::SI_SPILL_AV512_RESTORE;
1908 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1915 bool IsVectorSuperClass) {
1920 if (IsVectorSuperClass)
1921 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1923 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1930 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1936 if (IsVectorSuperClass)
1953 unsigned SpillSize =
TRI->getSpillSize(*RC);
1960 FrameInfo.getObjectAlign(FrameIndex));
1964 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1965 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1966 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1971 if (DestReg.
isVirtual() && SpillSize == 4) {
1973 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1987 SpillSize, RI, *MFI);
2002 unsigned Quantity)
const {
2004 while (Quantity > 0) {
2005 unsigned Arg = std::min(Quantity, 8u);
2019 if (HasNoTerminator) {
2020 if (
Info->returnsVoid()) {
2034 constexpr unsigned DoorbellIDMask = 0x3ff;
2035 constexpr unsigned ECQueueWaveAbort = 0x400;
2053 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2057 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2060 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2061 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2065 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2066 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2067 .
addUse(DoorbellRegMasked)
2068 .
addImm(ECQueueWaveAbort);
2069 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2070 .
addUse(SetWaveAbortBit);
2073 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2088 switch (
MI.getOpcode()) {
2090 if (
MI.isMetaInstruction())
2095 return MI.getOperand(0).getImm() + 1;
2105 switch (
MI.getOpcode()) {
2107 case AMDGPU::S_MOV_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2113 case AMDGPU::S_MOV_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2119 case AMDGPU::S_XOR_B64_term:
2122 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2125 case AMDGPU::S_XOR_B32_term:
2128 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2130 case AMDGPU::S_OR_B64_term:
2133 MI.setDesc(
get(AMDGPU::S_OR_B64));
2135 case AMDGPU::S_OR_B32_term:
2138 MI.setDesc(
get(AMDGPU::S_OR_B32));
2141 case AMDGPU::S_ANDN2_B64_term:
2144 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2147 case AMDGPU::S_ANDN2_B32_term:
2150 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2153 case AMDGPU::S_AND_B64_term:
2156 MI.setDesc(
get(AMDGPU::S_AND_B64));
2159 case AMDGPU::S_AND_B32_term:
2162 MI.setDesc(
get(AMDGPU::S_AND_B32));
2165 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2168 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2171 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2174 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2177 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2178 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2181 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2182 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2185 case AMDGPU::V_MOV_B64_PSEUDO: {
2187 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2188 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2194 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2199 if (
SrcOp.isImm()) {
2201 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2202 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2245 MI.eraseFromParent();
2248 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2252 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2257 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2262 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2263 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2265 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2266 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2273 MI.eraseFromParent();
2276 case AMDGPU::V_SET_INACTIVE_B32: {
2277 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2278 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2282 .
add(
MI.getOperand(1));
2286 .
add(
MI.getOperand(2));
2289 MI.eraseFromParent();
2292 case AMDGPU::V_SET_INACTIVE_B64: {
2293 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2294 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2296 MI.getOperand(0).getReg())
2297 .
add(
MI.getOperand(1));
2302 MI.getOperand(0).getReg())
2303 .
add(
MI.getOperand(2));
2307 MI.eraseFromParent();
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2332 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2333 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2334 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2335 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2336 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2337 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2338 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2343 Opc = AMDGPU::V_MOVRELD_B32_e32;
2345 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2346 : AMDGPU::S_MOVRELD_B32;
2351 bool IsUndef =
MI.getOperand(1).isUndef();
2352 unsigned SubReg =
MI.getOperand(3).getImm();
2353 assert(VecReg ==
MI.getOperand(1).getReg());
2358 .
add(
MI.getOperand(2))
2362 const int ImpDefIdx =
2364 const int ImpUseIdx = ImpDefIdx + 1;
2366 MI.eraseFromParent();
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2374 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2375 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2376 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2377 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2378 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2379 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2380 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2383 bool IsUndef =
MI.getOperand(1).isUndef();
2392 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2396 .
add(
MI.getOperand(2))
2401 const int ImpDefIdx =
2403 const int ImpUseIdx = ImpDefIdx + 1;
2410 MI.eraseFromParent();
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2418 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2419 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2420 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2421 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2422 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2423 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2424 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2428 bool IsUndef =
MI.getOperand(1).isUndef();
2446 MI.eraseFromParent();
2449 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2452 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2453 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2476 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2483 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2493 MI.eraseFromParent();
2496 case AMDGPU::ENTER_STRICT_WWM: {
2500 : AMDGPU::S_OR_SAVEEXEC_B64));
2503 case AMDGPU::ENTER_STRICT_WQM: {
2506 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2507 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2508 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2512 MI.eraseFromParent();
2515 case AMDGPU::EXIT_STRICT_WWM:
2516 case AMDGPU::EXIT_STRICT_WQM: {
2519 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2522 case AMDGPU::SI_RETURN: {
2536 MI.eraseFromParent();
2540 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2541 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2542 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2545 case AMDGPU::S_GETPC_B64_pseudo:
2546 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2549 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2571 case AMDGPU::S_LOAD_DWORDX16_IMM:
2572 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2585 for (
auto &CandMO :
I->operands()) {
2586 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2594 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2602 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2604 unsigned NewOpcode = -1;
2605 if (SubregSize == 256)
2606 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2607 else if (SubregSize == 128)
2608 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2615 MRI.setRegClass(DestReg, NewRC);
2618 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2623 MI->getOperand(0).setReg(DestReg);
2624 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2628 OffsetMO->
setImm(FinalOffset);
2634 MI->setMemRefs(*MF, NewMMOs);
2647std::pair<MachineInstr*, MachineInstr*>
2649 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2654 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2655 return std::pair(&
MI,
nullptr);
2666 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2668 if (Dst.isPhysical()) {
2669 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2672 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2676 for (
unsigned I = 1;
I <= 2; ++
I) {
2679 if (
SrcOp.isImm()) {
2681 Imm.ashrInPlace(Part * 32);
2682 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2686 if (Src.isPhysical())
2687 MovDPP.addReg(RI.getSubReg(Src, Sub));
2694 MovDPP.addImm(MO.getImm());
2696 Split[Part] = MovDPP;
2700 if (Dst.isVirtual())
2707 MI.eraseFromParent();
2708 return std::pair(Split[0], Split[1]);
2711std::optional<DestSourcePair>
2713 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2716 return std::nullopt;
2721 unsigned Src0OpName,
2723 unsigned Src1OpName)
const {
2730 "All commutable instructions have both src0 and src1 modifiers");
2732 int Src0ModsVal = Src0Mods->
getImm();
2733 int Src1ModsVal = Src1Mods->
getImm();
2735 Src1Mods->
setImm(Src0ModsVal);
2736 Src0Mods->
setImm(Src1ModsVal);
2745 bool IsKill = RegOp.
isKill();
2747 bool IsUndef = RegOp.
isUndef();
2748 bool IsDebug = RegOp.
isDebug();
2750 if (NonRegOp.
isImm())
2752 else if (NonRegOp.
isFI())
2771 unsigned Src1Idx)
const {
2772 assert(!NewMI &&
"this should never be used");
2774 unsigned Opc =
MI.getOpcode();
2776 if (CommutedOpcode == -1)
2779 if (Src0Idx > Src1Idx)
2783 static_cast<int>(Src0Idx) &&
2785 static_cast<int>(Src1Idx) &&
2786 "inconsistency with findCommutedOpIndices");
2813 Src1, AMDGPU::OpName::src1_modifiers);
2825 unsigned &SrcOpIdx0,
2826 unsigned &SrcOpIdx1)
const {
2831 unsigned &SrcOpIdx0,
2832 unsigned &SrcOpIdx1)
const {
2833 if (!
Desc.isCommutable())
2836 unsigned Opc =
Desc.getOpcode();
2845 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2849 int64_t BrOffset)
const {
2852 assert(BranchOp != AMDGPU::S_SETPC_B64);
2866 return MI.getOperand(0).getMBB();
2871 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2872 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2873 MI.getOpcode() == AMDGPU::SI_LOOP)
2884 assert(RS &&
"RegScavenger required for long branching");
2886 "new block should be inserted for expanding unconditional branch");
2889 "restore block should be inserted for restoring clobbered registers");
2897 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2907 MCCtx.createTempSymbol(
"post_getpc",
true);
2911 MCCtx.createTempSymbol(
"offset_lo",
true);
2913 MCCtx.createTempSymbol(
"offset_hi",
true);
2916 .
addReg(PCReg, 0, AMDGPU::sub0)
2920 .
addReg(PCReg, 0, AMDGPU::sub1)
2962 if (LongBranchReservedReg) {
2964 Scav = LongBranchReservedReg;
2973 MRI.replaceRegWith(PCReg, Scav);
2974 MRI.clearVirtRegs();
2980 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2981 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2982 MRI.clearVirtRegs();
2997unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2999 case SIInstrInfo::SCC_TRUE:
3000 return AMDGPU::S_CBRANCH_SCC1;
3001 case SIInstrInfo::SCC_FALSE:
3002 return AMDGPU::S_CBRANCH_SCC0;
3003 case SIInstrInfo::VCCNZ:
3004 return AMDGPU::S_CBRANCH_VCCNZ;
3005 case SIInstrInfo::VCCZ:
3006 return AMDGPU::S_CBRANCH_VCCZ;
3007 case SIInstrInfo::EXECNZ:
3008 return AMDGPU::S_CBRANCH_EXECNZ;
3009 case SIInstrInfo::EXECZ:
3010 return AMDGPU::S_CBRANCH_EXECZ;
3016SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3018 case AMDGPU::S_CBRANCH_SCC0:
3020 case AMDGPU::S_CBRANCH_SCC1:
3022 case AMDGPU::S_CBRANCH_VCCNZ:
3024 case AMDGPU::S_CBRANCH_VCCZ:
3026 case AMDGPU::S_CBRANCH_EXECNZ:
3028 case AMDGPU::S_CBRANCH_EXECZ:
3040 bool AllowModify)
const {
3041 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3043 TBB =
I->getOperand(0).getMBB();
3049 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3050 CondBB =
I->getOperand(1).getMBB();
3051 Cond.push_back(
I->getOperand(0));
3053 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3054 if (Pred == INVALID_BR)
3057 CondBB =
I->getOperand(0).getMBB();
3059 Cond.push_back(
I->getOperand(1));
3069 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3071 FBB =
I->getOperand(0).getMBB();
3081 bool AllowModify)
const {
3089 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3090 switch (
I->getOpcode()) {
3091 case AMDGPU::S_MOV_B64_term:
3092 case AMDGPU::S_XOR_B64_term:
3093 case AMDGPU::S_OR_B64_term:
3094 case AMDGPU::S_ANDN2_B64_term:
3095 case AMDGPU::S_AND_B64_term:
3096 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3097 case AMDGPU::S_MOV_B32_term:
3098 case AMDGPU::S_XOR_B32_term:
3099 case AMDGPU::S_OR_B32_term:
3100 case AMDGPU::S_ANDN2_B32_term:
3101 case AMDGPU::S_AND_B32_term:
3102 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3105 case AMDGPU::SI_ELSE:
3106 case AMDGPU::SI_KILL_I1_TERMINATOR:
3107 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3124 int *BytesRemoved)
const {
3126 unsigned RemovedSize = 0;
3129 if (
MI.isBranch() ||
MI.isReturn()) {
3131 MI.eraseFromParent();
3137 *BytesRemoved = RemovedSize;
3154 int *BytesAdded)
const {
3155 if (!FBB &&
Cond.empty()) {
3163 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3173 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3210 if (
Cond.size() != 2) {
3225 Register FalseReg,
int &CondCycles,
3226 int &TrueCycles,
int &FalseCycles)
const {
3227 switch (
Cond[0].getImm()) {
3232 if (
MRI.getRegClass(FalseReg) != RC)
3236 CondCycles = TrueCycles = FalseCycles = NumInsts;
3239 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3247 if (
MRI.getRegClass(FalseReg) != RC)
3253 if (NumInsts % 2 == 0)
3256 CondCycles = TrueCycles = FalseCycles = NumInsts;
3268 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3269 if (Pred == VCCZ || Pred == SCC_FALSE) {
3270 Pred =
static_cast<BranchPredicate
>(-Pred);
3276 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3278 if (DstSize == 32) {
3280 if (Pred == SCC_TRUE) {
3295 if (DstSize == 64 && Pred == SCC_TRUE) {
3305 static const int16_t Sub0_15[] = {
3306 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3307 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3308 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3309 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3312 static const int16_t Sub0_15_64[] = {
3313 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3314 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3315 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3316 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3319 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3321 const int16_t *SubIndices = Sub0_15;
3322 int NElts = DstSize / 32;
3326 if (Pred == SCC_TRUE) {
3328 SelOp = AMDGPU::S_CSELECT_B32;
3329 EltRC = &AMDGPU::SGPR_32RegClass;
3331 SelOp = AMDGPU::S_CSELECT_B64;
3332 EltRC = &AMDGPU::SGPR_64RegClass;
3333 SubIndices = Sub0_15_64;
3339 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3344 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3345 Register DstElt =
MRI.createVirtualRegister(EltRC);
3348 unsigned SubIdx = SubIndices[
Idx];
3351 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3354 .
addReg(FalseReg, 0, SubIdx)
3355 .
addReg(TrueReg, 0, SubIdx);
3359 .
addReg(TrueReg, 0, SubIdx)
3360 .
addReg(FalseReg, 0, SubIdx);
3372 switch (
MI.getOpcode()) {
3373 case AMDGPU::V_MOV_B32_e32:
3374 case AMDGPU::V_MOV_B32_e64:
3375 case AMDGPU::V_MOV_B64_PSEUDO:
3376 case AMDGPU::V_MOV_B64_e32:
3377 case AMDGPU::V_MOV_B64_e64:
3378 case AMDGPU::S_MOV_B32:
3379 case AMDGPU::S_MOV_B64:
3380 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3382 case AMDGPU::WWM_COPY:
3383 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3384 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3385 case AMDGPU::V_ACCVGPR_MOV_B32:
3393 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3394 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3395 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3398 unsigned Opc =
MI.getOpcode();
3402 MI.removeOperand(
Idx);
3408 if (!
MRI->hasOneNonDBGUse(Reg))
3411 switch (
DefMI.getOpcode()) {
3414 case AMDGPU::V_MOV_B64_e32:
3415 case AMDGPU::S_MOV_B64:
3416 case AMDGPU::V_MOV_B64_PSEUDO:
3417 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3418 case AMDGPU::V_MOV_B32_e32:
3419 case AMDGPU::S_MOV_B32:
3420 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3427 if (!ImmOp->
isImm())
3430 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3431 int64_t Imm = ImmOp->
getImm();
3432 switch (UseOp.getSubReg()) {
3443 case AMDGPU::sub1_lo16:
3445 case AMDGPU::sub1_hi16:
3450 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3452 unsigned Opc =
UseMI.getOpcode();
3453 if (Opc == AMDGPU::COPY) {
3454 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3458 bool Is16Bit = OpSize == 2;
3459 bool Is64Bit = OpSize == 8;
3461 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3462 : AMDGPU::V_MOV_B32_e32
3463 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3464 : AMDGPU::S_MOV_B32;
3465 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3470 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3477 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3480 UseMI.getOperand(0).setSubReg(0);
3483 UseMI.getOperand(0).setReg(DstReg);
3493 UseMI.setDesc(NewMCID);
3494 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3499 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3500 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3501 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3502 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3503 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3518 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3519 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3521 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3522 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3523 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3531 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3532 if (!RegSrc->
isReg())
3550 if (Def && Def->isMoveImmediate() &&
3555 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3557 : AMDGPU::V_FMAMK_F16)
3558 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3565 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3568 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3574 unsigned SrcSubReg = RegSrc->
getSubReg();
3579 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3580 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3581 Opc == AMDGPU::V_FMAC_F16_e64)
3582 UseMI.untieRegOperand(
3585 Src1->ChangeToImmediate(Imm);
3590 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3592 DefMI.eraseFromParent();
3602 bool Src0Inlined =
false;
3603 if (Src0->
isReg()) {
3608 if (Def && Def->isMoveImmediate() &&
3620 if (Src1->
isReg() && !Src0Inlined) {
3623 if (Def && Def->isMoveImmediate() &&
3634 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3636 : AMDGPU::V_FMAAK_F16)
3637 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3644 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3650 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3651 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3652 Opc == AMDGPU::V_FMAC_F16_e64)
3653 UseMI.untieRegOperand(
3667 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3669 DefMI.eraseFromParent();
3681 if (BaseOps1.
size() != BaseOps2.
size())
3683 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3684 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3692 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3693 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3694 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3696 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3699bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3702 int64_t Offset0, Offset1;
3704 bool Offset0IsScalable, Offset1IsScalable;
3726 "MIa must load from or modify a memory location");
3728 "MIb must load from or modify a memory location");
3747 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3754 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3764 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3778 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3789 if (Reg.isPhysical())
3791 auto *Def =
MRI.getUniqueVRegDef(Reg);
3793 Imm = Def->getOperand(1).getImm();
3813 unsigned NumOps =
MI.getNumOperands();
3814 for (
unsigned I = 1;
I < NumOps; ++
I) {
3816 if (
Op.isReg() &&
Op.isKill())
3826 unsigned Opc =
MI.getOpcode();
3830 if (NewMFMAOpc != -1) {
3833 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3834 MIB.
add(
MI.getOperand(
I));
3840 if (Def.isEarlyClobber() && Def.isReg() &&
3845 auto UpdateDefIndex = [&](
LiveRange &LR) {
3846 auto S = LR.
find(OldIndex);
3847 if (S != LR.end() && S->start == OldIndex) {
3848 assert(S->valno && S->valno->def == OldIndex);
3849 S->start = NewIndex;
3850 S->valno->def = NewIndex;
3854 for (
auto &SR : LI.subranges())
3865 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3875 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3876 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3880 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3881 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3882 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3883 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3884 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3885 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3886 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3887 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3888 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3889 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3890 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3891 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3892 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3893 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3894 bool Src0Literal =
false;
3899 case AMDGPU::V_MAC_F16_e64:
3900 case AMDGPU::V_FMAC_F16_e64:
3901 case AMDGPU::V_FMAC_F16_t16_e64:
3902 case AMDGPU::V_MAC_F32_e64:
3903 case AMDGPU::V_MAC_LEGACY_F32_e64:
3904 case AMDGPU::V_FMAC_F32_e64:
3905 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3906 case AMDGPU::V_FMAC_F64_e64:
3908 case AMDGPU::V_MAC_F16_e32:
3909 case AMDGPU::V_FMAC_F16_e32:
3910 case AMDGPU::V_MAC_F32_e32:
3911 case AMDGPU::V_MAC_LEGACY_F32_e32:
3912 case AMDGPU::V_FMAC_F32_e32:
3913 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3914 case AMDGPU::V_FMAC_F64_e32: {
3916 AMDGPU::OpName::src0);
3943 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3949 const auto killDef = [&]() ->
void {
3953 if (!
MRI.hasOneNonDBGUse(DefReg))
3967 : AMDGPU::V_FMAAK_F16)
3968 : AMDGPU::V_FMAAK_F32)
3969 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3986 : AMDGPU::V_FMAMK_F16)
3987 : AMDGPU::V_FMAMK_F32)
3988 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4034 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4035 : IsF64 ? AMDGPU::V_FMA_F64_e64
4037 ? AMDGPU::V_FMA_LEGACY_F32_e64
4038 : AMDGPU::V_FMA_F32_e64
4039 : IsF16 ? AMDGPU::V_MAD_F16_e64
4040 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4041 : AMDGPU::V_MAD_F32_e64;
4057 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4068 switch (
MI.getOpcode()) {
4069 case AMDGPU::S_SET_GPR_IDX_ON:
4070 case AMDGPU::S_SET_GPR_IDX_MODE:
4071 case AMDGPU::S_SET_GPR_IDX_OFF:
4089 if (
MI.isTerminator() ||
MI.isPosition())
4093 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4096 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4102 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4103 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4104 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4105 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4110 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4121 unsigned Opcode =
MI.getOpcode();
4136 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4138 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4139 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4142 if (
MI.isCall() ||
MI.isInlineAsm())
4154 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4155 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4156 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4157 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4165 if (
MI.isMetaInstruction())
4169 if (
MI.isCopyLike()) {
4174 return MI.readsRegister(AMDGPU::EXEC, &RI);
4185 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4189 switch (Imm.getBitWidth()) {
4209 APInt IntImm = Imm.bitcastToAPInt();
4228 uint8_t OperandType)
const {
4229 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4238 int64_t Imm = MO.
getImm();
4239 switch (OperandType) {
4252 int32_t Trunc =
static_cast<int32_t
>(Imm);
4292 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4297 int16_t Trunc =
static_cast<int16_t
>(Imm);
4308 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4309 int16_t Trunc =
static_cast<int16_t
>(Imm);
4369 AMDGPU::OpName::src2))
4385 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4405 return Mods && Mods->
getImm();
4418 switch (
MI.getOpcode()) {
4419 default:
return false;
4421 case AMDGPU::V_ADDC_U32_e64:
4422 case AMDGPU::V_SUBB_U32_e64:
4423 case AMDGPU::V_SUBBREV_U32_e64: {
4431 case AMDGPU::V_MAC_F16_e64:
4432 case AMDGPU::V_MAC_F32_e64:
4433 case AMDGPU::V_MAC_LEGACY_F32_e64:
4434 case AMDGPU::V_FMAC_F16_e64:
4435 case AMDGPU::V_FMAC_F16_t16_e64:
4436 case AMDGPU::V_FMAC_F32_e64:
4437 case AMDGPU::V_FMAC_F64_e64:
4438 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4444 case AMDGPU::V_CNDMASK_B32_e64:
4476 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4485 unsigned Op32)
const {
4499 Inst32.
add(
MI.getOperand(
I));
4503 int Idx =
MI.getNumExplicitDefs();
4505 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4544 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4549 return MO.
getReg() == AMDGPU::M0 ||
4550 MO.
getReg() == AMDGPU::VCC ||
4551 MO.
getReg() == AMDGPU::VCC_LO;
4553 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4554 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4564 switch (MO.getReg()) {
4566 case AMDGPU::VCC_LO:
4567 case AMDGPU::VCC_HI:
4569 case AMDGPU::FLAT_SCR:
4582 switch (
MI.getOpcode()) {
4583 case AMDGPU::V_READLANE_B32:
4584 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4585 case AMDGPU::V_WRITELANE_B32:
4586 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4593 if (
MI.isPreISelOpcode() ||
4594 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4605 if (
SubReg.getReg().isPhysical())
4608 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4615 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4625 if (Src0Idx == -1) {
4635 if (!
Desc.isVariadic() &&
4636 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4637 ErrInfo =
"Instruction has wrong number of operands.";
4641 if (
MI.isInlineAsm()) {
4654 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4655 ErrInfo =
"inlineasm operand has incorrect register class.";
4663 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4664 ErrInfo =
"missing memory operand from image instruction.";
4669 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4672 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4673 "all fp values to integers.";
4677 int RegClass =
Desc.operands()[i].RegClass;
4679 switch (
Desc.operands()[i].OperandType) {
4681 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4682 ErrInfo =
"Illegal immediate value for operand.";
4703 ErrInfo =
"Illegal immediate value for operand.";
4710 ErrInfo =
"Expected inline constant for operand.";
4719 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4720 ErrInfo =
"Expected immediate, but got non-immediate";
4742 RI.getSubRegisterClass(RC, MO.
getSubReg());
4750 ErrInfo =
"Subtarget requires even aligned vector registers";
4755 if (RegClass != -1) {
4756 if (Reg.isVirtual())
4761 ErrInfo =
"Operand has incorrect register class.";
4770 ErrInfo =
"SDWA is not supported on this target";
4776 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4784 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4791 "Only reg allowed as operands in SDWA instructions on GFX9+";
4800 if (OMod !=
nullptr &&
4802 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4807 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4808 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4809 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4810 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4813 unsigned Mods = Src0ModsMO->
getImm();
4816 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4822 if (
isVOPC(BasicOpcode)) {
4826 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4827 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4833 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4834 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4840 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4841 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4848 if (DstUnused && DstUnused->isImm() &&
4851 if (!Dst.isReg() || !Dst.isTied()) {
4852 ErrInfo =
"Dst register should have tied register";
4857 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4860 "Dst register should be tied to implicit use of preserved register";
4863 Dst.getReg() != TiedMO.
getReg()) {
4864 ErrInfo =
"Dst register should use same physical register as preserved";
4896 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4897 if (RegCount > DstSize) {
4898 ErrInfo =
"Image instruction returns too many registers for dst "
4907 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4908 unsigned ConstantBusCount = 0;
4909 bool UsesLiteral =
false;
4916 LiteralVal = &
MI.getOperand(ImmIdx);
4925 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4943 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4953 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4954 return !RI.regsOverlap(SGPRUsed, SGPR);
4964 Opcode != AMDGPU::V_WRITELANE_B32) {
4965 ErrInfo =
"VOP* instruction violates constant bus restriction";
4970 ErrInfo =
"VOP3 instruction uses literal";
4977 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4978 unsigned SGPRCount = 0;
4981 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4989 if (MO.
getReg() != SGPRUsed)
4995 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5002 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5003 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5010 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5020 ErrInfo =
"ABS not allowed in VOP3B instructions";
5033 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5040 if (
Desc.isBranch()) {
5042 ErrInfo =
"invalid branch target for SOPK instruction";
5048 if (!isUInt<16>(Imm)) {
5049 ErrInfo =
"invalid immediate for SOPK instruction";
5053 if (!isInt<16>(Imm)) {
5054 ErrInfo =
"invalid immediate for SOPK instruction";
5061 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5062 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5063 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5064 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5065 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5066 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5068 const unsigned StaticNumOps =
5069 Desc.getNumOperands() +
Desc.implicit_uses().size();
5070 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5075 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5076 ErrInfo =
"missing implicit register operands";
5082 if (!Dst->isUse()) {
5083 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5088 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5089 UseOpIdx != StaticNumOps + 1) {
5090 ErrInfo =
"movrel implicit operands should be tied";
5097 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5099 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5100 ErrInfo =
"src0 should be subreg of implicit vector use";
5108 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5109 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5115 if (
MI.mayStore() &&
5120 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5121 ErrInfo =
"scalar stores must use m0 as offset register";
5129 if (
Offset->getImm() != 0) {
5130 ErrInfo =
"subtarget does not support offsets in flat instructions";
5137 if (GDSOp && GDSOp->
getImm() != 0) {
5138 ErrInfo =
"GDS is not supported on this subtarget";
5147 AMDGPU::OpName::vaddr0);
5149 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5158 ErrInfo =
"dim is out of range";
5165 IsA16 = R128A16->
getImm() != 0;
5166 }
else if (ST.
hasA16()) {
5168 IsA16 = A16->
getImm() != 0;
5171 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5173 unsigned AddrWords =
5176 unsigned VAddrWords;
5178 VAddrWords = RsrcIdx - VAddr0Idx;
5181 unsigned LastVAddrIdx = RsrcIdx - 1;
5182 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5190 if (VAddrWords != AddrWords) {
5192 <<
" but got " << VAddrWords <<
"\n");
5193 ErrInfo =
"bad vaddr size";
5201 using namespace AMDGPU::DPP;
5203 unsigned DC = DppCt->
getImm();
5204 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5205 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5206 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5207 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5208 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5209 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5210 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5211 ErrInfo =
"Invalid dpp_ctrl value";
5214 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5216 ErrInfo =
"Invalid dpp_ctrl value: "
5217 "wavefront shifts are not supported on GFX10+";
5220 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5222 ErrInfo =
"Invalid dpp_ctrl value: "
5223 "broadcasts are not supported on GFX10+";
5226 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5228 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5229 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5231 ErrInfo =
"Invalid dpp_ctrl value: "
5232 "row_newbroadcast/row_share is not supported before "
5235 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5236 ErrInfo =
"Invalid dpp_ctrl value: "
5237 "row_share and row_xmask are not supported before GFX10";
5242 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5244 ErrInfo =
"Invalid dpp_ctrl value: "
5245 "DP ALU dpp only support row_newbcast";
5252 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5253 : AMDGPU::OpName::vdata;
5262 ErrInfo =
"Invalid register class: "
5263 "vdata and vdst should be both VGPR or AGPR";
5266 if (
Data && Data2 &&
5268 ErrInfo =
"Invalid register class: "
5269 "both data operands should be VGPR or AGPR";
5273 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5276 ErrInfo =
"Invalid register class: "
5277 "agpr loads and stores not supported on this GPU";
5284 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5289 if (Reg.isPhysical())
5296 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5297 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5298 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5300 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5301 ErrInfo =
"Subtarget requires even aligned vector registers "
5302 "for DS_GWS instructions";
5308 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5309 ErrInfo =
"Subtarget requires even aligned vector registers "
5310 "for vaddr operand of image instructions";
5316 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5319 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5320 ErrInfo =
"Invalid register class: "
5321 "v_accvgpr_write with an SGPR is not supported on this GPU";
5326 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5329 ErrInfo =
"pseudo expects only physical SGPRs";
5341 switch (
MI.getOpcode()) {
5342 default:
return AMDGPU::INSTRUCTION_LIST_END;
5343 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5344 case AMDGPU::COPY:
return AMDGPU::COPY;
5345 case AMDGPU::PHI:
return AMDGPU::PHI;
5346 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5347 case AMDGPU::WQM:
return AMDGPU::WQM;
5348 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5349 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5350 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5351 case AMDGPU::S_MOV_B32: {
5353 return MI.getOperand(1).isReg() ||
5355 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5357 case AMDGPU::S_ADD_I32:
5358 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5359 case AMDGPU::S_ADDC_U32:
5360 return AMDGPU::V_ADDC_U32_e32;
5361 case AMDGPU::S_SUB_I32:
5362 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5365 case AMDGPU::S_ADD_U32:
5366 return AMDGPU::V_ADD_CO_U32_e32;
5367 case AMDGPU::S_SUB_U32:
5368 return AMDGPU::V_SUB_CO_U32_e32;
5369 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5370 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5371 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5372 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5373 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5374 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5375 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5376 case AMDGPU::S_XNOR_B32:
5377 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5378 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5379 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5380 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5381 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5382 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5383 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5384 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5385 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5386 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5387 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5388 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5389 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5390 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5391 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5392 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5393 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5394 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5395 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5396 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5397 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5398 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5399 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5400 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5401 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5402 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5403 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5404 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5405 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5406 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5407 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5408 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5409 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5410 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5411 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5412 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5413 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5414 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5415 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5416 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5417 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5418 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5419 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5420 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5421 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5422 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5423 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5424 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5425 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5426 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5427 case AMDGPU::S_CEIL_F16:
5429 : AMDGPU::V_CEIL_F16_fake16_e64;
5430 case AMDGPU::S_FLOOR_F16:
5432 : AMDGPU::V_FLOOR_F16_fake16_e64;
5433 case AMDGPU::S_TRUNC_F16:
5434 return AMDGPU::V_TRUNC_F16_fake16_e64;
5435 case AMDGPU::S_RNDNE_F16:
5436 return AMDGPU::V_RNDNE_F16_fake16_e64;
5437 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5438 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5439 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5440 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5441 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5442 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5443 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5444 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5445 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5446 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5447 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5448 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5449 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5450 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5451 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5452 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5453 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5454 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5455 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5456 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5457 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5458 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5459 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5460 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5461 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5462 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5463 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5464 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5465 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5466 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5467 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5468 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5469 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5470 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5471 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5472 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5473 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5474 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5475 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5476 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5477 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5478 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5479 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5480 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5481 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5482 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5483 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5484 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5485 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5486 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5487 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5488 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5489 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5490 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5491 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5492 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5493 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5496 "Unexpected scalar opcode without corresponding vector one!");
5509 bool IsWave32 = ST.isWave32();
5514 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5515 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5524 const unsigned OrSaveExec =
5525 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5538 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5540 auto ExecRestoreMI =
5550 bool IsAllocatable) {
5551 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5556 case AMDGPU::AV_32RegClassID:
5557 RCID = AMDGPU::VGPR_32RegClassID;
5559 case AMDGPU::AV_64RegClassID:
5560 RCID = AMDGPU::VReg_64RegClassID;
5562 case AMDGPU::AV_96RegClassID:
5563 RCID = AMDGPU::VReg_96RegClassID;
5565 case AMDGPU::AV_128RegClassID:
5566 RCID = AMDGPU::VReg_128RegClassID;
5568 case AMDGPU::AV_160RegClassID:
5569 RCID = AMDGPU::VReg_160RegClassID;
5571 case AMDGPU::AV_512RegClassID:
5572 RCID = AMDGPU::VReg_512RegClassID;
5588 auto RegClass = TID.
operands()[OpNum].RegClass;
5589 bool IsAllocatable =
false;
5599 AMDGPU::OpName::vdst);
5602 : AMDGPU::OpName::vdata);
5603 if (DataIdx != -1) {
5605 TID.
Opcode, AMDGPU::OpName::data1);
5613 unsigned OpNo)
const {
5616 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5617 Desc.operands()[OpNo].RegClass == -1) {
5620 if (Reg.isVirtual())
5621 return MRI.getRegClass(Reg);
5622 return RI.getPhysRegBaseClass(Reg);
5625 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5634 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5636 unsigned Size = RI.getRegSizeInBits(*RC);
5637 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5639 Opcode = AMDGPU::COPY;
5641 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5658 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5669 if (SubIdx == AMDGPU::sub0)
5671 if (SubIdx == AMDGPU::sub1)
5683void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5699 if (Reg.isPhysical())
5710 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5737 MO = &
MI.getOperand(OpIdx);
5749 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5755 if (!SGPRsUsed.
count(SGPR) &&
5758 if (--ConstantBusLimit <= 0)
5764 if (!LiteralLimit--)
5766 if (--ConstantBusLimit <= 0)
5780 unsigned Opc =
MI.getOpcode();
5788 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5789 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5790 MI.getOperand(DataIdx).isReg() &&
5791 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5793 if ((
int)OpIdx == DataIdx) {
5794 if (VDstIdx != -1 &&
5795 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5799 AMDGPU::OpName::data1);
5800 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5801 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5804 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5814 bool Is64BitOp = Is64BitFPOp ||
5827 if (!Is64BitFPOp && (int32_t)Imm < 0)
5845 unsigned Opc =
MI.getOpcode();
5864 if (Opc == AMDGPU::V_WRITELANE_B32) {
5867 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5873 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5890 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5892 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5904 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5906 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5918 if (HasImplicitSGPR || !
MI.isCommutable()) {
5935 if (CommutedOpc == -1) {
5940 MI.setDesc(
get(CommutedOpc));
5944 bool Src0Kill = Src0.
isKill();
5948 else if (Src1.
isReg()) {
5963 unsigned Opc =
MI.getOpcode();
5971 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5972 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5978 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5984 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5995 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
5997 SGPRsUsed.
insert(SGPRReg);
6001 for (
int Idx : VOP3Idx) {
6010 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6035 if (ConstantBusLimit > 0) {
6047 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6048 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6057 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6061 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6063 get(TargetOpcode::COPY), NewSrcReg)
6070 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6076 for (
unsigned i = 0; i < SubRegs; ++i) {
6077 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6079 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6086 get(AMDGPU::REG_SEQUENCE), DstReg);
6087 for (
unsigned i = 0; i < SubRegs; ++i) {
6102 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6104 SBase->setReg(SGPR);
6116 if (OldSAddrIdx < 0)
6133 if (NewVAddrIdx < 0)
6140 if (OldVAddrIdx >= 0) {
6142 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6143 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6154 if (OldVAddrIdx == NewVAddrIdx) {
6157 MRI.removeRegOperandFromUseList(&NewVAddr);
6158 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6162 MRI.removeRegOperandFromUseList(&NewVAddr);
6163 MRI.addRegOperandToUseList(&NewVAddr);
6165 assert(OldSAddrIdx == NewVAddrIdx);
6167 if (OldVAddrIdx >= 0) {
6169 AMDGPU::OpName::vdst_in);
6173 if (NewVDstIn != -1) {
6180 if (NewVDstIn != -1) {
6219 unsigned OpSubReg =
Op.getSubReg();
6228 Register DstReg =
MRI.createVirtualRegister(DstRC);
6239 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6242 bool ImpDef = Def->isImplicitDef();
6243 while (!ImpDef && Def && Def->isCopy()) {
6244 if (Def->getOperand(1).getReg().isPhysical())
6246 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6247 ImpDef = Def && Def->isImplicitDef();
6249 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6264 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6265 unsigned SaveExecOpc =
6266 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6267 unsigned XorTermOpc =
6268 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6270 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6271 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6279 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6280 unsigned NumSubRegs =
RegSize / 32;
6281 Register VScalarOp = ScalarOp->getReg();
6283 if (NumSubRegs == 1) {
6284 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6286 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6289 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6291 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6297 CondReg = NewCondReg;
6299 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6307 ScalarOp->setReg(CurReg);
6308 ScalarOp->setIsKill();
6311 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6312 "Unhandled register size");
6314 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6315 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6316 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6319 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6320 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6323 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6324 .
addReg(VScalarOp, VScalarOpUndef,
6325 TRI->getSubRegFromChannel(
Idx + 1));
6331 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6332 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6338 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6339 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6342 if (NumSubRegs <= 2)
6343 Cmp.addReg(VScalarOp);
6345 Cmp.addReg(VScalarOp, VScalarOpUndef,
6346 TRI->getSubRegFromChannel(
Idx, 2));
6350 CondReg = NewCondReg;
6352 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6361 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6362 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6366 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6367 unsigned Channel = 0;
6368 for (
Register Piece : ReadlanePieces) {
6369 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6373 ScalarOp->setReg(SScalarOp);
6374 ScalarOp->setIsKill();
6378 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6379 MRI.setSimpleHint(SaveExec, CondReg);
6410 if (!Begin.isValid())
6412 if (!
End.isValid()) {
6417 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6418 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6419 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6428 std::numeric_limits<unsigned>::max()) !=
6431 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6437 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6446 for (
auto I = Begin;
I != AfterMI;
I++) {
6447 for (
auto &MO :
I->all_uses())
6448 MRI.clearKillFlags(MO.getReg());
6483 for (
auto &Succ : RemainderBB->
successors()) {
6506static std::tuple<unsigned, unsigned>
6514 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6515 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6518 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6519 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6520 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6521 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6522 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6530 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6534 .
addImm(RsrcDataFormat >> 32);
6539 .
addImm(AMDGPU::sub0_sub1)
6545 return std::tuple(RsrcPtr, NewSRsrc);
6582 if (
MI.getOpcode() == AMDGPU::PHI) {
6584 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6585 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6588 MRI.getRegClass(
MI.getOperand(i).getReg());
6603 VRC = &AMDGPU::VReg_1RegClass;
6619 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6621 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6637 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6644 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6646 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6664 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6669 if (DstRC != Src0RC) {
6678 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6686 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6687 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6688 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6689 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6690 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6705 : AMDGPU::OpName::srsrc;
6710 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6719 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6725 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6726 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6731 while (Start->getOpcode() != FrameSetupOpcode)
6734 while (
End->getOpcode() != FrameDestroyOpcode)
6738 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6739 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6747 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6749 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6760 bool isSoffsetLegal =
true;
6763 if (SoffsetIdx != -1) {
6767 isSoffsetLegal =
false;
6771 bool isRsrcLegal =
true;
6774 if (RsrcIdx != -1) {
6777 isRsrcLegal =
false;
6782 if (isRsrcLegal && isSoffsetLegal)
6806 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6807 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6808 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6810 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6811 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6812 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6814 unsigned RsrcPtr, NewSRsrc;
6821 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6828 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6846 "FIXME: Need to emit flat atomics here");
6848 unsigned RsrcPtr, NewSRsrc;
6851 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6874 MIB.
addImm(CPol->getImm());
6879 MIB.
addImm(TFE->getImm());
6899 MI.removeFromParent();
6904 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6906 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6910 if (!isSoffsetLegal) {
6922 if (!isSoffsetLegal) {
6931 InstrList.insert(
MI);
6935 if (RsrcIdx != -1) {
6936 DeferredList.insert(
MI);
6941 return DeferredList.contains(
MI);
6947 while (!Worklist.
empty()) {
6961 "Deferred MachineInstr are not supposed to re-populate worklist");
6979 case AMDGPU::S_ADD_U64_PSEUDO:
6980 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6982 case AMDGPU::S_SUB_U64_PSEUDO:
6983 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6985 case AMDGPU::S_ADD_I32:
6986 case AMDGPU::S_SUB_I32: {
6990 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
6998 case AMDGPU::S_MUL_U64:
7000 splitScalarSMulU64(Worklist, Inst, MDT);
7004 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7005 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7008 splitScalarSMulPseudo(Worklist, Inst, MDT);
7012 case AMDGPU::S_AND_B64:
7013 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7017 case AMDGPU::S_OR_B64:
7018 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7022 case AMDGPU::S_XOR_B64:
7023 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7027 case AMDGPU::S_NAND_B64:
7028 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7032 case AMDGPU::S_NOR_B64:
7033 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7037 case AMDGPU::S_XNOR_B64:
7039 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7041 splitScalar64BitXnor(Worklist, Inst, MDT);
7045 case AMDGPU::S_ANDN2_B64:
7046 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7050 case AMDGPU::S_ORN2_B64:
7051 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7055 case AMDGPU::S_BREV_B64:
7056 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7060 case AMDGPU::S_NOT_B64:
7061 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7065 case AMDGPU::S_BCNT1_I32_B64:
7066 splitScalar64BitBCNT(Worklist, Inst);
7070 case AMDGPU::S_BFE_I64:
7071 splitScalar64BitBFE(Worklist, Inst);
7075 case AMDGPU::S_FLBIT_I32_B64:
7076 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7079 case AMDGPU::S_FF1_I32_B64:
7080 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7084 case AMDGPU::S_LSHL_B32:
7086 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7090 case AMDGPU::S_ASHR_I32:
7092 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7096 case AMDGPU::S_LSHR_B32:
7098 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7102 case AMDGPU::S_LSHL_B64:
7105 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7106 : AMDGPU::V_LSHLREV_B64_e64;
7110 case AMDGPU::S_ASHR_I64:
7112 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7116 case AMDGPU::S_LSHR_B64:
7118 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7123 case AMDGPU::S_ABS_I32:
7124 lowerScalarAbs(Worklist, Inst);
7128 case AMDGPU::S_CBRANCH_SCC0:
7129 case AMDGPU::S_CBRANCH_SCC1: {
7132 bool IsSCC = CondReg == AMDGPU::SCC;
7135 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7138 .
addReg(IsSCC ? VCC : CondReg);
7142 case AMDGPU::S_BFE_U64:
7143 case AMDGPU::S_BFM_B64:
7146 case AMDGPU::S_PACK_LL_B32_B16:
7147 case AMDGPU::S_PACK_LH_B32_B16:
7148 case AMDGPU::S_PACK_HL_B32_B16:
7149 case AMDGPU::S_PACK_HH_B32_B16:
7150 movePackToVALU(Worklist,
MRI, Inst);
7154 case AMDGPU::S_XNOR_B32:
7155 lowerScalarXnor(Worklist, Inst);
7159 case AMDGPU::S_NAND_B32:
7160 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7164 case AMDGPU::S_NOR_B32:
7165 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7169 case AMDGPU::S_ANDN2_B32:
7170 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7174 case AMDGPU::S_ORN2_B32:
7175 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7183 case AMDGPU::S_ADD_CO_PSEUDO:
7184 case AMDGPU::S_SUB_CO_PSEUDO: {
7185 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7186 ? AMDGPU::V_ADDC_U32_e64
7187 : AMDGPU::V_SUBB_U32_e64;
7188 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7191 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7192 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7210 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7214 case AMDGPU::S_UADDO_PSEUDO:
7215 case AMDGPU::S_USUBO_PSEUDO: {
7222 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7223 ? AMDGPU::V_ADD_CO_U32_e64
7224 : AMDGPU::V_SUB_CO_U32_e64;
7227 Register DestReg =
MRI.createVirtualRegister(NewRC);
7235 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7242 case AMDGPU::S_CSELECT_B32:
7243 case AMDGPU::S_CSELECT_B64:
7244 lowerSelect(Worklist, Inst, MDT);
7247 case AMDGPU::S_CMP_EQ_I32:
7248 case AMDGPU::S_CMP_LG_I32:
7249 case AMDGPU::S_CMP_GT_I32:
7250 case AMDGPU::S_CMP_GE_I32:
7251 case AMDGPU::S_CMP_LT_I32:
7252 case AMDGPU::S_CMP_LE_I32:
7253 case AMDGPU::S_CMP_EQ_U32:
7254 case AMDGPU::S_CMP_LG_U32:
7255 case AMDGPU::S_CMP_GT_U32:
7256 case AMDGPU::S_CMP_GE_U32:
7257 case AMDGPU::S_CMP_LT_U32:
7258 case AMDGPU::S_CMP_LE_U32:
7259 case AMDGPU::S_CMP_EQ_U64:
7260 case AMDGPU::S_CMP_LG_U64:
7261 case AMDGPU::S_CMP_LT_F32:
7262 case AMDGPU::S_CMP_EQ_F32:
7263 case AMDGPU::S_CMP_LE_F32:
7264 case AMDGPU::S_CMP_GT_F32:
7265 case AMDGPU::S_CMP_LG_F32:
7266 case AMDGPU::S_CMP_GE_F32:
7267 case AMDGPU::S_CMP_O_F32:
7268 case AMDGPU::S_CMP_U_F32:
7269 case AMDGPU::S_CMP_NGE_F32:
7270 case AMDGPU::S_CMP_NLG_F32:
7271 case AMDGPU::S_CMP_NGT_F32:
7272 case AMDGPU::S_CMP_NLE_F32:
7273 case AMDGPU::S_CMP_NEQ_F32:
7274 case AMDGPU::S_CMP_NLT_F32:
7275 case AMDGPU::S_CMP_LT_F16:
7276 case AMDGPU::S_CMP_EQ_F16:
7277 case AMDGPU::S_CMP_LE_F16:
7278 case AMDGPU::S_CMP_GT_F16:
7279 case AMDGPU::S_CMP_LG_F16:
7280 case AMDGPU::S_CMP_GE_F16:
7281 case AMDGPU::S_CMP_O_F16:
7282 case AMDGPU::S_CMP_U_F16:
7283 case AMDGPU::S_CMP_NGE_F16:
7284 case AMDGPU::S_CMP_NLG_F16:
7285 case AMDGPU::S_CMP_NGT_F16:
7286 case AMDGPU::S_CMP_NLE_F16:
7287 case AMDGPU::S_CMP_NEQ_F16:
7288 case AMDGPU::S_CMP_NLT_F16: {
7294 AMDGPU::OpName::src0_modifiers) >= 0) {
7309 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7313 case AMDGPU::S_CVT_HI_F32_F16: {
7315 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7316 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7327 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7331 case AMDGPU::S_MINIMUM_F32:
7332 case AMDGPU::S_MAXIMUM_F32:
7333 case AMDGPU::S_MINIMUM_F16:
7334 case AMDGPU::S_MAXIMUM_F16: {
7336 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7347 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7353 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7361 if (NewOpcode == Opcode) {
7385 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7397 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7398 MRI.replaceRegWith(DstReg, NewDstReg);
7400 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7411 AMDGPU::OpName::src0_modifiers) >= 0)
7416 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7417 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7419 NewInstr->addOperand(Src);
7422 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7425 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7427 NewInstr.addImm(
Size);
7428 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7432 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7437 "Scalar BFE is only implemented for constant width and offset");
7446 AMDGPU::OpName::src1_modifiers) >= 0)
7451 AMDGPU::OpName::src2_modifiers) >= 0)
7465 NewInstr->addOperand(
Op);
7472 if (
Op.getReg() == AMDGPU::SCC) {
7474 if (
Op.isDef() && !
Op.isDead())
7475 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7477 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7482 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7483 Register DstReg = NewInstr->getOperand(0).getReg();
7488 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7489 MRI.replaceRegWith(DstReg, NewDstReg);
7495 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7499std::pair<bool, MachineBasicBlock *>
7511 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7514 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7516 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7517 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7525 MRI.replaceRegWith(OldDstReg, ResultReg);
7528 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7529 return std::pair(
true, NewBB);
7532 return std::pair(
false,
nullptr);
7549 bool IsSCC = (CondReg == AMDGPU::SCC);
7557 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7565 NewCondReg =
MRI.createVirtualRegister(TC);
7569 bool CopyFound =
false;
7573 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7575 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7577 .
addReg(CandI.getOperand(1).getReg());
7589 : AMDGPU::S_CSELECT_B32;
7599 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7600 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7613 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7615 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7627 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7628 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7631 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7641 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7642 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7657 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7665 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7666 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7672 bool Src0IsSGPR = Src0.
isReg() &&
7674 bool Src1IsSGPR = Src1.
isReg() &&
7677 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7678 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7688 }
else if (Src1IsSGPR) {
7702 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7706 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7712 unsigned Opcode)
const {
7722 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7723 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7735 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7736 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7741 unsigned Opcode)
const {
7751 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7752 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7764 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7765 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7783 &AMDGPU::SGPR_32RegClass;
7786 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7789 AMDGPU::sub0, Src0SubRC);
7794 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7796 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7800 AMDGPU::sub1, Src0SubRC);
7802 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7808 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7815 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7817 Worklist.
insert(&LoHalf);
7818 Worklist.
insert(&HiHalf);
7824 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7835 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7836 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7837 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7848 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7852 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7882 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7888 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7894 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7905 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7921 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7933 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7944 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7945 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7946 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7957 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7961 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7973 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7974 ? AMDGPU::V_MUL_HI_U32_e64
7975 : AMDGPU::V_MUL_HI_I32_e64;
7990 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7998 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8017 &AMDGPU::SGPR_32RegClass;
8020 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8023 &AMDGPU::SGPR_32RegClass;
8026 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8029 AMDGPU::sub0, Src0SubRC);
8031 AMDGPU::sub0, Src1SubRC);
8033 AMDGPU::sub1, Src0SubRC);
8035 AMDGPU::sub1, Src1SubRC);
8040 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8042 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8047 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8052 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8059 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8061 Worklist.
insert(&LoHalf);
8062 Worklist.
insert(&HiHalf);
8065 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8083 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8099 Register NewDest =
MRI.createVirtualRegister(DestRC);
8105 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8123 MRI.getRegClass(Src.getReg()) :
8124 &AMDGPU::SGPR_32RegClass;
8126 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8127 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8130 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8133 AMDGPU::sub0, SrcSubRC);
8135 AMDGPU::sub1, SrcSubRC);
8141 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8145 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8164 Offset == 0 &&
"Not implemented");
8167 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8168 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8169 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8186 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8187 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8192 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8193 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8197 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8200 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8205 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8206 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8227 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8228 unsigned OpcodeAdd =
8229 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8232 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8234 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8241 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8242 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8243 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8244 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8251 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8257 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8259 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8261 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8264void SIInstrInfo::addUsersToMoveToVALUWorklist(
8268 E =
MRI.use_end();
I != E;) {
8273 switch (
UseMI.getOpcode()) {
8276 case AMDGPU::SOFT_WQM:
8277 case AMDGPU::STRICT_WWM:
8278 case AMDGPU::STRICT_WQM:
8279 case AMDGPU::REG_SEQUENCE:
8281 case AMDGPU::INSERT_SUBREG:
8284 OpNo =
I.getOperandNo();
8293 }
while (
I != E &&
I->getParent() == &
UseMI);
8303 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8310 case AMDGPU::S_PACK_LL_B32_B16: {
8311 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8312 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8329 case AMDGPU::S_PACK_LH_B32_B16: {
8330 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8339 case AMDGPU::S_PACK_HL_B32_B16: {
8340 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8350 case AMDGPU::S_PACK_HH_B32_B16: {
8351 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8352 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8369 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8370 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8379 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8380 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8388 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8392 Register DestReg =
MI.getOperand(0).getReg();
8394 MRI.replaceRegWith(DestReg, NewCond);
8399 MI.getOperand(SCCIdx).setReg(NewCond);
8405 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8408 for (
auto &Copy : CopyToDelete)
8409 Copy->eraseFromParent();
8417void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8426 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8428 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8445 case AMDGPU::REG_SEQUENCE:
8446 case AMDGPU::INSERT_SUBREG:
8448 case AMDGPU::SOFT_WQM:
8449 case AMDGPU::STRICT_WWM:
8450 case AMDGPU::STRICT_WQM: {
8458 case AMDGPU::REG_SEQUENCE:
8459 case AMDGPU::INSERT_SUBREG:
8469 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8486 int OpIndices[3])
const {
8505 for (
unsigned i = 0; i < 3; ++i) {
8506 int Idx = OpIndices[i];
8543 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8544 SGPRReg = UsedSGPRs[0];
8547 if (!SGPRReg && UsedSGPRs[1]) {
8548 if (UsedSGPRs[1] == UsedSGPRs[2])
8549 SGPRReg = UsedSGPRs[1];
8556 unsigned OperandName)
const {
8561 return &
MI.getOperand(
Idx);
8578 RsrcDataFormat |= (1ULL << 56);
8583 RsrcDataFormat |= (2ULL << 59);
8586 return RsrcDataFormat;
8608 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8614 unsigned Opc =
MI.getOpcode();
8620 return get(Opc).mayLoad() &&
8625 int &FrameIndex)
const {
8633 FrameIndex =
Addr->getIndex();
8638 int &FrameIndex)
const {
8641 FrameIndex =
Addr->getIndex();
8646 int &FrameIndex)
const {
8660 int &FrameIndex)
const {
8677 while (++
I != E &&
I->isInsideBundle()) {
8678 assert(!
I->isBundle() &&
"No nested bundle!");
8686 unsigned Opc =
MI.getOpcode();
8688 unsigned DescSize =
Desc.getSize();
8693 unsigned Size = DescSize;
8708 bool HasLiteral =
false;
8709 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8717 return HasLiteral ? DescSize + 4 : DescSize;
8727 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8731 case TargetOpcode::BUNDLE:
8733 case TargetOpcode::INLINEASM:
8734 case TargetOpcode::INLINEASM_BR: {
8736 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8740 if (
MI.isMetaInstruction())
8750 if (
MI.memoperands_empty())
8761 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8773 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8776 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8777 .
add(Branch->getOperand(0))
8778 .
add(Branch->getOperand(1));
8780 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8799 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8804 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8806 if (PMBB == LoopEnd) {
8807 HeaderPHIBuilder.
addReg(BackEdgeReg);
8812 HeaderPHIBuilder.
addReg(ZeroReg);
8814 HeaderPHIBuilder.
addMBB(PMBB);
8818 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8820 .
add(Branch->getOperand(0));
8822 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8828 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8829 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8835 static const std::pair<int, const char *> TargetIndices[] = {
8873std::pair<unsigned, unsigned>
8880 static const std::pair<unsigned, const char *> TargetFlags[] = {
8895 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8909 return AMDGPU::WWM_COPY;
8911 return AMDGPU::COPY;
8922 bool IsNullOrVectorRegister =
true;
8931 return IsNullOrVectorRegister &&
8932 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8933 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8946 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8977 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8978 case AMDGPU::SI_KILL_I1_TERMINATOR:
8987 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8988 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8989 case AMDGPU::SI_KILL_I1_PSEUDO:
8990 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9002 const unsigned OffsetBits =
9004 return (1 << OffsetBits) - 1;
9011 if (
MI.isInlineAsm())
9014 for (
auto &
Op :
MI.implicit_operands()) {
9015 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9016 Op.setReg(AMDGPU::VCC_LO);
9029 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9047 if (Imm <= MaxImm + 64) {
9049 Overflow = Imm - MaxImm;
9139std::pair<int64_t, int64_t>
9142 int64_t RemainderOffset = COffsetVal;
9143 int64_t ImmField = 0;
9148 if (AllowNegative) {
9150 int64_t
D = 1LL << NumBits;
9151 RemainderOffset = (COffsetVal /
D) *
D;
9152 ImmField = COffsetVal - RemainderOffset;
9156 (ImmField % 4) != 0) {
9158 RemainderOffset += ImmField % 4;
9159 ImmField -= ImmField % 4;
9161 }
else if (COffsetVal >= 0) {
9162 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9163 RemainderOffset = COffsetVal - ImmField;
9167 assert(RemainderOffset + ImmField == COffsetVal);
9168 return {ImmField, RemainderOffset};
9180 switch (ST.getGeneration()) {
9205 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9206 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9207 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9208 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9209 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9210 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9211 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9212 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9248 if (
isMAI(Opcode)) {
9293 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9294 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9295 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9307 switch (
MI.getOpcode()) {
9309 case AMDGPU::REG_SEQUENCE:
9313 case AMDGPU::INSERT_SUBREG:
9314 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9331 if (!
P.Reg.isVirtual())
9335 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9336 while (
auto *
MI = DefInst) {
9338 switch (
MI->getOpcode()) {
9340 case AMDGPU::V_MOV_B32_e32: {
9341 auto &Op1 =
MI->getOperand(1);
9346 DefInst =
MRI.getVRegDef(RSR.Reg);
9354 DefInst =
MRI.getVRegDef(RSR.Reg);
9367 assert(
MRI.isSSA() &&
"Must be run on SSA");
9369 auto *
TRI =
MRI.getTargetRegisterInfo();
9370 auto *DefBB =
DefMI.getParent();
9374 if (
UseMI.getParent() != DefBB)
9377 const int MaxInstScan = 20;
9381 auto E =
UseMI.getIterator();
9382 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9383 if (
I->isDebugInstr())
9386 if (++NumInst > MaxInstScan)
9389 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9399 assert(
MRI.isSSA() &&
"Must be run on SSA");
9401 auto *
TRI =
MRI.getTargetRegisterInfo();
9402 auto *DefBB =
DefMI.getParent();
9404 const int MaxUseScan = 10;
9407 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9408 auto &UseInst = *
Use.getParent();
9411 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9414 if (++NumUse > MaxUseScan)
9421 const int MaxInstScan = 20;
9425 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9428 if (
I->isDebugInstr())
9431 if (++NumInst > MaxInstScan)
9444 if (Reg == VReg && --NumUse == 0)
9446 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9458 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9461 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9470 if (InsPt !=
MBB.
end() &&
9471 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9472 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9473 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9474 InsPt->definesRegister(Src,
nullptr)) {
9478 : AMDGPU::S_MOV_B64_term),
9480 .
addReg(Src, 0, SrcSubReg)
9505 if (isFullCopyInstr(
MI)) {
9514 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9517 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9528 unsigned *PredCost)
const {
9529 if (
MI.isBundle()) {
9532 unsigned Lat = 0, Count = 0;
9533 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9535 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9537 return Lat + Count - 1;
9540 return SchedModel.computeInstrLatency(&
MI);
9545 unsigned opcode =
MI.getOpcode();
9546 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9547 auto IID = GI->getIntrinsicID();
9554 case Intrinsic::amdgcn_if:
9555 case Intrinsic::amdgcn_else:
9569 if (opcode == AMDGPU::G_LOAD) {
9570 if (
MI.memoperands_empty())
9574 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9575 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9583 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9584 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9585 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9598 unsigned opcode =
MI.getOpcode();
9599 if (opcode == AMDGPU::V_READLANE_B32 ||
9600 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9601 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9604 if (isCopyInstr(
MI)) {
9608 RI.getPhysRegBaseClass(srcOp.
getReg());
9616 if (
MI.isPreISelOpcode())
9631 if (
MI.memoperands_empty())
9635 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9636 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9651 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9657 if (!Reg || !
SrcOp.readsReg())
9663 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9700 Register &SrcReg2, int64_t &CmpMask,
9701 int64_t &CmpValue)
const {
9702 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9705 switch (
MI.getOpcode()) {
9708 case AMDGPU::S_CMP_EQ_U32:
9709 case AMDGPU::S_CMP_EQ_I32:
9710 case AMDGPU::S_CMP_LG_U32:
9711 case AMDGPU::S_CMP_LG_I32:
9712 case AMDGPU::S_CMP_LT_U32:
9713 case AMDGPU::S_CMP_LT_I32:
9714 case AMDGPU::S_CMP_GT_U32:
9715 case AMDGPU::S_CMP_GT_I32:
9716 case AMDGPU::S_CMP_LE_U32:
9717 case AMDGPU::S_CMP_LE_I32:
9718 case AMDGPU::S_CMP_GE_U32:
9719 case AMDGPU::S_CMP_GE_I32:
9720 case AMDGPU::S_CMP_EQ_U64:
9721 case AMDGPU::S_CMP_LG_U64:
9722 SrcReg =
MI.getOperand(0).getReg();
9723 if (
MI.getOperand(1).isReg()) {
9724 if (
MI.getOperand(1).getSubReg())
9726 SrcReg2 =
MI.getOperand(1).getReg();
9728 }
else if (
MI.getOperand(1).isImm()) {
9730 CmpValue =
MI.getOperand(1).getImm();
9736 case AMDGPU::S_CMPK_EQ_U32:
9737 case AMDGPU::S_CMPK_EQ_I32:
9738 case AMDGPU::S_CMPK_LG_U32:
9739 case AMDGPU::S_CMPK_LG_I32:
9740 case AMDGPU::S_CMPK_LT_U32:
9741 case AMDGPU::S_CMPK_LT_I32:
9742 case AMDGPU::S_CMPK_GT_U32:
9743 case AMDGPU::S_CMPK_GT_I32:
9744 case AMDGPU::S_CMPK_LE_U32:
9745 case AMDGPU::S_CMPK_LE_I32:
9746 case AMDGPU::S_CMPK_GE_U32:
9747 case AMDGPU::S_CMPK_GE_I32:
9748 SrcReg =
MI.getOperand(0).getReg();
9750 CmpValue =
MI.getOperand(1).getImm();
9768 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9769 this](int64_t ExpectedValue,
unsigned SrcSize,
9770 bool IsReversible,
bool IsSigned) ->
bool {
9795 if (!Def || Def->getParent() != CmpInstr.
getParent())
9798 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9799 Def->getOpcode() != AMDGPU::S_AND_B64)
9803 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9814 SrcOp = &Def->getOperand(2);
9815 else if (isMask(&Def->getOperand(2)))
9816 SrcOp = &Def->getOperand(1);
9821 if (IsSigned && BitNo == SrcSize - 1)
9824 ExpectedValue <<= BitNo;
9826 bool IsReversedCC =
false;
9827 if (CmpValue != ExpectedValue) {
9830 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9835 Register DefReg = Def->getOperand(0).getReg();
9836 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9839 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9841 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9842 I->killsRegister(AMDGPU::SCC, &RI))
9847 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9851 if (!
MRI->use_nodbg_empty(DefReg)) {
9859 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9860 : AMDGPU::S_BITCMP1_B32
9861 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9862 : AMDGPU::S_BITCMP1_B64;
9867 Def->eraseFromParent();
9875 case AMDGPU::S_CMP_EQ_U32:
9876 case AMDGPU::S_CMP_EQ_I32:
9877 case AMDGPU::S_CMPK_EQ_U32:
9878 case AMDGPU::S_CMPK_EQ_I32:
9879 return optimizeCmpAnd(1, 32,
true,
false);
9880 case AMDGPU::S_CMP_GE_U32:
9881 case AMDGPU::S_CMPK_GE_U32:
9882 return optimizeCmpAnd(1, 32,
false,
false);
9883 case AMDGPU::S_CMP_GE_I32:
9884 case AMDGPU::S_CMPK_GE_I32:
9885 return optimizeCmpAnd(1, 32,
false,
true);
9886 case AMDGPU::S_CMP_EQ_U64:
9887 return optimizeCmpAnd(1, 64,
true,
false);
9888 case AMDGPU::S_CMP_LG_U32:
9889 case AMDGPU::S_CMP_LG_I32:
9890 case AMDGPU::S_CMPK_LG_U32:
9891 case AMDGPU::S_CMPK_LG_I32:
9892 return optimizeCmpAnd(0, 32,
true,
false);
9893 case AMDGPU::S_CMP_GT_U32:
9894 case AMDGPU::S_CMPK_GT_U32:
9895 return optimizeCmpAnd(0, 32,
false,
false);
9896 case AMDGPU::S_CMP_GT_I32:
9897 case AMDGPU::S_CMPK_GT_I32:
9898 return optimizeCmpAnd(0, 32,
false,
true);
9899 case AMDGPU::S_CMP_LG_U64:
9900 return optimizeCmpAnd(0, 64,
true,
false);
9925 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9928 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9929 : &AMDGPU::VReg_64_Align2RegClass);
9931 .
addReg(DataReg, 0,
Op.getSubReg())
9936 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.