31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
523 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
531 if (MO1->getAddrSpace() != MO2->getAddrSpace())
534 auto Base1 = MO1->getValue();
535 auto Base2 = MO2->getValue();
536 if (!Base1 || !Base2)
541 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
544 return Base1 == Base2;
548 int64_t Offset1,
bool OffsetIsScalable1,
550 int64_t Offset2,
bool OffsetIsScalable2,
551 unsigned ClusterSize,
552 unsigned NumBytes)
const {
560 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
577 const unsigned LoadSize = NumBytes / ClusterSize;
578 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
579 return NumDWORDs <= 8;
593 int64_t Offset0, int64_t Offset1,
594 unsigned NumLoads)
const {
595 assert(Offset1 > Offset0 &&
596 "Second offset should be larger than first offset!");
601 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
608 const char *Msg =
"illegal VGPR to SGPR copy") {
612 C.diagnose(IllegalCopy);
629 assert((
TII.getSubtarget().hasMAIInsts() &&
630 !
TII.getSubtarget().hasGFX90AInsts()) &&
631 "Expected GFX908 subtarget.");
634 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
635 "Source register of the copy should be either an SGPR or an AGPR.");
638 "Destination register of the copy should be an AGPR.");
647 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
650 if (!Def->modifiesRegister(SrcReg, &RI))
653 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
654 Def->getOperand(0).getReg() != SrcReg)
661 bool SafeToPropagate =
true;
664 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
665 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
666 SafeToPropagate =
false;
668 if (!SafeToPropagate)
680 if (ImpUseSuperReg) {
681 Builder.
addReg(ImpUseSuperReg,
699 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
703 "VGPR used for an intermediate copy should have been reserved.");
718 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
719 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
720 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
727 if (ImpUseSuperReg) {
728 UseBuilder.
addReg(ImpUseSuperReg,
750 int16_t SubIdx = BaseIndices[
Idx];
751 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
752 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
753 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
754 unsigned Opcode = AMDGPU::S_MOV_B32;
757 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
758 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
759 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
763 DestSubReg = RI.getSubReg(DestReg, SubIdx);
764 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
765 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
766 Opcode = AMDGPU::S_MOV_B64;
781 assert(FirstMI && LastMI);
789 LastMI->addRegisterKilled(SrcReg, &RI);
797 unsigned Size = RI.getRegSizeInBits(*RC);
799 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
805 if (((
Size == 16) != (SrcSize == 16))) {
812 if (DestReg == SrcReg) {
818 RC = RI.getPhysRegBaseClass(DestReg);
819 Size = RI.getRegSizeInBits(*RC);
820 SrcRC = RI.getPhysRegBaseClass(SrcReg);
821 SrcSize = RI.getRegSizeInBits(*SrcRC);
825 if (RC == &AMDGPU::VGPR_32RegClass) {
827 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
828 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
829 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
830 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
836 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
837 RC == &AMDGPU::SReg_32RegClass) {
838 if (SrcReg == AMDGPU::SCC) {
845 if (DestReg == AMDGPU::VCC_LO) {
846 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
860 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (RC == &AMDGPU::SReg_64RegClass) {
871 if (SrcReg == AMDGPU::SCC) {
878 if (DestReg == AMDGPU::VCC) {
879 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
893 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
903 if (DestReg == AMDGPU::SCC) {
906 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
924 if (RC == &AMDGPU::AGPR_32RegClass) {
925 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
926 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
941 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
948 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
949 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
951 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
952 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
953 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
954 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
971 if (IsAGPRDst || IsAGPRSrc) {
972 if (!DstLow || !SrcLow) {
974 "Cannot use hi16 subreg with an AGPR!");
987 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
988 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1001 if (!DstLow || !SrcLow) {
1003 "Cannot use hi16 subreg on VI!");
1054 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1060 unsigned EltSize = 4;
1061 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1064 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1067 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1069 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1071 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1077 Opcode = AMDGPU::V_MOV_B64_e32;
1080 Opcode = AMDGPU::V_PK_MOV_B32;
1090 std::unique_ptr<RegScavenger> RS;
1091 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1092 RS = std::make_unique<RegScavenger>();
1098 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1099 const bool CanKillSuperReg = KillSrc && !Overlap;
1104 SubIdx = SubIndices[
Idx];
1106 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1107 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1108 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1109 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1111 bool IsFirstSubreg =
Idx == 0;
1112 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1114 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1118 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1119 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1166 int64_t
Value)
const {
1169 if (RegClass == &AMDGPU::SReg_32RegClass ||
1170 RegClass == &AMDGPU::SGPR_32RegClass ||
1171 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1172 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1178 if (RegClass == &AMDGPU::SReg_64RegClass ||
1179 RegClass == &AMDGPU::SGPR_64RegClass ||
1180 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1186 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1197 unsigned EltSize = 4;
1198 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1200 if (RI.getRegSizeInBits(*RegClass) > 32) {
1201 Opcode = AMDGPU::S_MOV_B64;
1204 Opcode = AMDGPU::S_MOV_B32;
1211 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1214 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1215 Builder.
addImm(IdxValue);
1221 return &AMDGPU::VGPR_32RegClass;
1233 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1234 "Not a VGPR32 reg");
1236 if (
Cond.size() == 1) {
1237 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1246 }
else if (
Cond.size() == 2) {
1248 switch (
Cond[0].getImm()) {
1249 case SIInstrInfo::SCC_TRUE: {
1250 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1252 : AMDGPU::S_CSELECT_B64), SReg)
1263 case SIInstrInfo::SCC_FALSE: {
1264 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1266 : AMDGPU::S_CSELECT_B64), SReg)
1277 case SIInstrInfo::VCCNZ: {
1280 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1291 case SIInstrInfo::VCCZ: {
1294 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1305 case SIInstrInfo::EXECNZ: {
1306 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1309 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1312 : AMDGPU::S_CSELECT_B64), SReg)
1323 case SIInstrInfo::EXECZ: {
1324 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1327 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1330 : AMDGPU::S_CSELECT_B64), SReg)
1379 return AMDGPU::COPY;
1380 if (RI.getRegSizeInBits(*DstRC) == 16) {
1383 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1385 if (RI.getRegSizeInBits(*DstRC) == 32)
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1387 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1388 return AMDGPU::S_MOV_B64;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1390 return AMDGPU::V_MOV_B64_PSEUDO;
1391 return AMDGPU::COPY;
1396 bool IsIndirectSrc)
const {
1397 if (IsIndirectSrc) {
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1420 if (VecSize <= 1024)
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1448 if (VecSize <= 1024)
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1477 if (VecSize <= 1024)
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1529 bool IsSGPR)
const {
1541 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1548 return AMDGPU::SI_SPILL_S32_SAVE;
1550 return AMDGPU::SI_SPILL_S64_SAVE;
1552 return AMDGPU::SI_SPILL_S96_SAVE;
1554 return AMDGPU::SI_SPILL_S128_SAVE;
1556 return AMDGPU::SI_SPILL_S160_SAVE;
1558 return AMDGPU::SI_SPILL_S192_SAVE;
1560 return AMDGPU::SI_SPILL_S224_SAVE;
1562 return AMDGPU::SI_SPILL_S256_SAVE;
1564 return AMDGPU::SI_SPILL_S288_SAVE;
1566 return AMDGPU::SI_SPILL_S320_SAVE;
1568 return AMDGPU::SI_SPILL_S352_SAVE;
1570 return AMDGPU::SI_SPILL_S384_SAVE;
1572 return AMDGPU::SI_SPILL_S512_SAVE;
1574 return AMDGPU::SI_SPILL_S1024_SAVE;
1583 return AMDGPU::SI_SPILL_V32_SAVE;
1585 return AMDGPU::SI_SPILL_V64_SAVE;
1587 return AMDGPU::SI_SPILL_V96_SAVE;
1589 return AMDGPU::SI_SPILL_V128_SAVE;
1591 return AMDGPU::SI_SPILL_V160_SAVE;
1593 return AMDGPU::SI_SPILL_V192_SAVE;
1595 return AMDGPU::SI_SPILL_V224_SAVE;
1597 return AMDGPU::SI_SPILL_V256_SAVE;
1599 return AMDGPU::SI_SPILL_V288_SAVE;
1601 return AMDGPU::SI_SPILL_V320_SAVE;
1603 return AMDGPU::SI_SPILL_V352_SAVE;
1605 return AMDGPU::SI_SPILL_V384_SAVE;
1607 return AMDGPU::SI_SPILL_V512_SAVE;
1609 return AMDGPU::SI_SPILL_V1024_SAVE;
1618 return AMDGPU::SI_SPILL_A32_SAVE;
1620 return AMDGPU::SI_SPILL_A64_SAVE;
1622 return AMDGPU::SI_SPILL_A96_SAVE;
1624 return AMDGPU::SI_SPILL_A128_SAVE;
1626 return AMDGPU::SI_SPILL_A160_SAVE;
1628 return AMDGPU::SI_SPILL_A192_SAVE;
1630 return AMDGPU::SI_SPILL_A224_SAVE;
1632 return AMDGPU::SI_SPILL_A256_SAVE;
1634 return AMDGPU::SI_SPILL_A288_SAVE;
1636 return AMDGPU::SI_SPILL_A320_SAVE;
1638 return AMDGPU::SI_SPILL_A352_SAVE;
1640 return AMDGPU::SI_SPILL_A384_SAVE;
1642 return AMDGPU::SI_SPILL_A512_SAVE;
1644 return AMDGPU::SI_SPILL_A1024_SAVE;
1653 return AMDGPU::SI_SPILL_AV32_SAVE;
1655 return AMDGPU::SI_SPILL_AV64_SAVE;
1657 return AMDGPU::SI_SPILL_AV96_SAVE;
1659 return AMDGPU::SI_SPILL_AV128_SAVE;
1661 return AMDGPU::SI_SPILL_AV160_SAVE;
1663 return AMDGPU::SI_SPILL_AV192_SAVE;
1665 return AMDGPU::SI_SPILL_AV224_SAVE;
1667 return AMDGPU::SI_SPILL_AV256_SAVE;
1669 return AMDGPU::SI_SPILL_AV288_SAVE;
1671 return AMDGPU::SI_SPILL_AV320_SAVE;
1673 return AMDGPU::SI_SPILL_AV352_SAVE;
1675 return AMDGPU::SI_SPILL_AV384_SAVE;
1677 return AMDGPU::SI_SPILL_AV512_SAVE;
1679 return AMDGPU::SI_SPILL_AV1024_SAVE;
1686 bool IsVectorSuperClass) {
1691 if (IsVectorSuperClass)
1692 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1694 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1702 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1708 if (IsVectorSuperClass)
1728 FrameInfo.getObjectAlign(FrameIndex));
1729 unsigned SpillSize =
TRI->getSpillSize(*RC);
1734 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1735 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1736 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1744 if (SrcReg.
isVirtual() && SpillSize == 4) {
1745 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1760 SpillSize, RI, *MFI);
1774 return AMDGPU::SI_SPILL_S32_RESTORE;
1776 return AMDGPU::SI_SPILL_S64_RESTORE;
1778 return AMDGPU::SI_SPILL_S96_RESTORE;
1780 return AMDGPU::SI_SPILL_S128_RESTORE;
1782 return AMDGPU::SI_SPILL_S160_RESTORE;
1784 return AMDGPU::SI_SPILL_S192_RESTORE;
1786 return AMDGPU::SI_SPILL_S224_RESTORE;
1788 return AMDGPU::SI_SPILL_S256_RESTORE;
1790 return AMDGPU::SI_SPILL_S288_RESTORE;
1792 return AMDGPU::SI_SPILL_S320_RESTORE;
1794 return AMDGPU::SI_SPILL_S352_RESTORE;
1796 return AMDGPU::SI_SPILL_S384_RESTORE;
1798 return AMDGPU::SI_SPILL_S512_RESTORE;
1800 return AMDGPU::SI_SPILL_S1024_RESTORE;
1809 return AMDGPU::SI_SPILL_V32_RESTORE;
1811 return AMDGPU::SI_SPILL_V64_RESTORE;
1813 return AMDGPU::SI_SPILL_V96_RESTORE;
1815 return AMDGPU::SI_SPILL_V128_RESTORE;
1817 return AMDGPU::SI_SPILL_V160_RESTORE;
1819 return AMDGPU::SI_SPILL_V192_RESTORE;
1821 return AMDGPU::SI_SPILL_V224_RESTORE;
1823 return AMDGPU::SI_SPILL_V256_RESTORE;
1825 return AMDGPU::SI_SPILL_V288_RESTORE;
1827 return AMDGPU::SI_SPILL_V320_RESTORE;
1829 return AMDGPU::SI_SPILL_V352_RESTORE;
1831 return AMDGPU::SI_SPILL_V384_RESTORE;
1833 return AMDGPU::SI_SPILL_V512_RESTORE;
1835 return AMDGPU::SI_SPILL_V1024_RESTORE;
1844 return AMDGPU::SI_SPILL_A32_RESTORE;
1846 return AMDGPU::SI_SPILL_A64_RESTORE;
1848 return AMDGPU::SI_SPILL_A96_RESTORE;
1850 return AMDGPU::SI_SPILL_A128_RESTORE;
1852 return AMDGPU::SI_SPILL_A160_RESTORE;
1854 return AMDGPU::SI_SPILL_A192_RESTORE;
1856 return AMDGPU::SI_SPILL_A224_RESTORE;
1858 return AMDGPU::SI_SPILL_A256_RESTORE;
1860 return AMDGPU::SI_SPILL_A288_RESTORE;
1862 return AMDGPU::SI_SPILL_A320_RESTORE;
1864 return AMDGPU::SI_SPILL_A352_RESTORE;
1866 return AMDGPU::SI_SPILL_A384_RESTORE;
1868 return AMDGPU::SI_SPILL_A512_RESTORE;
1870 return AMDGPU::SI_SPILL_A1024_RESTORE;
1879 return AMDGPU::SI_SPILL_AV32_RESTORE;
1881 return AMDGPU::SI_SPILL_AV64_RESTORE;
1883 return AMDGPU::SI_SPILL_AV96_RESTORE;
1885 return AMDGPU::SI_SPILL_AV128_RESTORE;
1887 return AMDGPU::SI_SPILL_AV160_RESTORE;
1889 return AMDGPU::SI_SPILL_AV192_RESTORE;
1891 return AMDGPU::SI_SPILL_AV224_RESTORE;
1893 return AMDGPU::SI_SPILL_AV256_RESTORE;
1895 return AMDGPU::SI_SPILL_AV288_RESTORE;
1897 return AMDGPU::SI_SPILL_AV320_RESTORE;
1899 return AMDGPU::SI_SPILL_AV352_RESTORE;
1901 return AMDGPU::SI_SPILL_AV384_RESTORE;
1903 return AMDGPU::SI_SPILL_AV512_RESTORE;
1905 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1912 bool IsVectorSuperClass) {
1917 if (IsVectorSuperClass)
1918 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1920 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1927 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1933 if (IsVectorSuperClass)
1950 unsigned SpillSize =
TRI->getSpillSize(*RC);
1957 FrameInfo.getObjectAlign(FrameIndex));
1961 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1962 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1963 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1968 if (DestReg.
isVirtual() && SpillSize == 4) {
1970 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1984 SpillSize, RI, *MFI);
1999 unsigned Quantity)
const {
2001 while (Quantity > 0) {
2002 unsigned Arg = std::min(Quantity, 8u);
2016 if (HasNoTerminator) {
2017 if (
Info->returnsVoid()) {
2031 constexpr unsigned DoorbellIDMask = 0x3ff;
2032 constexpr unsigned ECQueueWaveAbort = 0x400;
2050 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2054 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2057 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2058 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2062 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2063 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2064 .
addUse(DoorbellRegMasked)
2065 .
addImm(ECQueueWaveAbort);
2066 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2067 .
addUse(SetWaveAbortBit);
2070 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2085 switch (
MI.getOpcode()) {
2087 if (
MI.isMetaInstruction())
2092 return MI.getOperand(0).getImm() + 1;
2102 switch (
MI.getOpcode()) {
2104 case AMDGPU::S_MOV_B64_term:
2107 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2110 case AMDGPU::S_MOV_B32_term:
2113 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2116 case AMDGPU::S_XOR_B64_term:
2119 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2122 case AMDGPU::S_XOR_B32_term:
2125 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2127 case AMDGPU::S_OR_B64_term:
2130 MI.setDesc(
get(AMDGPU::S_OR_B64));
2132 case AMDGPU::S_OR_B32_term:
2135 MI.setDesc(
get(AMDGPU::S_OR_B32));
2138 case AMDGPU::S_ANDN2_B64_term:
2141 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2144 case AMDGPU::S_ANDN2_B32_term:
2147 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2150 case AMDGPU::S_AND_B64_term:
2153 MI.setDesc(
get(AMDGPU::S_AND_B64));
2156 case AMDGPU::S_AND_B32_term:
2159 MI.setDesc(
get(AMDGPU::S_AND_B32));
2162 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2165 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2168 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2171 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2174 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2175 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2178 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2179 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2182 case AMDGPU::V_MOV_B64_PSEUDO: {
2184 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2185 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2191 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2196 if (
SrcOp.isImm()) {
2198 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2199 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2242 MI.eraseFromParent();
2245 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2249 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2254 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2259 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2260 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2262 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2263 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2270 MI.eraseFromParent();
2273 case AMDGPU::V_SET_INACTIVE_B32: {
2274 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2275 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2279 .
add(
MI.getOperand(1));
2283 .
add(
MI.getOperand(2));
2286 MI.eraseFromParent();
2289 case AMDGPU::V_SET_INACTIVE_B64: {
2290 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2291 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2293 MI.getOperand(0).getReg())
2294 .
add(
MI.getOperand(1));
2299 MI.getOperand(0).getReg())
2300 .
add(
MI.getOperand(2));
2304 MI.eraseFromParent();
2307 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2332 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2333 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2334 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2335 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2340 Opc = AMDGPU::V_MOVRELD_B32_e32;
2342 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2343 : AMDGPU::S_MOVRELD_B32;
2348 bool IsUndef =
MI.getOperand(1).isUndef();
2349 unsigned SubReg =
MI.getOperand(3).getImm();
2350 assert(VecReg ==
MI.getOperand(1).getReg());
2355 .
add(
MI.getOperand(2))
2359 const int ImpDefIdx =
2361 const int ImpUseIdx = ImpDefIdx + 1;
2363 MI.eraseFromParent();
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2374 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2375 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2376 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2377 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2380 bool IsUndef =
MI.getOperand(1).isUndef();
2389 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2393 .
add(
MI.getOperand(2))
2398 const int ImpDefIdx =
2400 const int ImpUseIdx = ImpDefIdx + 1;
2407 MI.eraseFromParent();
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2418 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2419 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2420 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2421 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2425 bool IsUndef =
MI.getOperand(1).isUndef();
2443 MI.eraseFromParent();
2446 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2449 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2450 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2473 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2480 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2490 MI.eraseFromParent();
2493 case AMDGPU::ENTER_STRICT_WWM: {
2497 : AMDGPU::S_OR_SAVEEXEC_B64));
2500 case AMDGPU::ENTER_STRICT_WQM: {
2503 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2504 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2505 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2509 MI.eraseFromParent();
2512 case AMDGPU::EXIT_STRICT_WWM:
2513 case AMDGPU::EXIT_STRICT_WQM: {
2516 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2519 case AMDGPU::SI_RETURN: {
2533 MI.eraseFromParent();
2537 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2538 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2539 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2542 case AMDGPU::S_GETPC_B64_pseudo:
2543 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2546 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2568 case AMDGPU::S_LOAD_DWORDX16_IMM:
2569 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2582 for (
auto &CandMO :
I->operands()) {
2583 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2591 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2599 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2601 unsigned NewOpcode = -1;
2602 if (SubregSize == 256)
2603 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2604 else if (SubregSize == 128)
2605 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2612 MRI.setRegClass(DestReg, NewRC);
2615 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2620 MI->getOperand(0).setReg(DestReg);
2621 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2625 OffsetMO->
setImm(FinalOffset);
2631 MI->setMemRefs(*MF, NewMMOs);
2644std::pair<MachineInstr*, MachineInstr*>
2646 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2651 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2652 return std::pair(&
MI,
nullptr);
2663 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2665 if (Dst.isPhysical()) {
2666 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2669 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2673 for (
unsigned I = 1;
I <= 2; ++
I) {
2676 if (
SrcOp.isImm()) {
2678 Imm.ashrInPlace(Part * 32);
2679 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2683 if (Src.isPhysical())
2684 MovDPP.addReg(RI.getSubReg(Src, Sub));
2691 MovDPP.addImm(MO.getImm());
2693 Split[Part] = MovDPP;
2697 if (Dst.isVirtual())
2704 MI.eraseFromParent();
2705 return std::pair(Split[0], Split[1]);
2708std::optional<DestSourcePair>
2710 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2713 return std::nullopt;
2718 unsigned Src0OpName,
2720 unsigned Src1OpName)
const {
2727 "All commutable instructions have both src0 and src1 modifiers");
2729 int Src0ModsVal = Src0Mods->
getImm();
2730 int Src1ModsVal = Src1Mods->
getImm();
2732 Src1Mods->
setImm(Src0ModsVal);
2733 Src0Mods->
setImm(Src1ModsVal);
2742 bool IsKill = RegOp.
isKill();
2744 bool IsUndef = RegOp.
isUndef();
2745 bool IsDebug = RegOp.
isDebug();
2747 if (NonRegOp.
isImm())
2749 else if (NonRegOp.
isFI())
2768 unsigned Src1Idx)
const {
2769 assert(!NewMI &&
"this should never be used");
2771 unsigned Opc =
MI.getOpcode();
2773 if (CommutedOpcode == -1)
2776 if (Src0Idx > Src1Idx)
2780 static_cast<int>(Src0Idx) &&
2782 static_cast<int>(Src1Idx) &&
2783 "inconsistency with findCommutedOpIndices");
2810 Src1, AMDGPU::OpName::src1_modifiers);
2822 unsigned &SrcOpIdx0,
2823 unsigned &SrcOpIdx1)
const {
2828 unsigned &SrcOpIdx0,
2829 unsigned &SrcOpIdx1)
const {
2830 if (!
Desc.isCommutable())
2833 unsigned Opc =
Desc.getOpcode();
2842 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2846 int64_t BrOffset)
const {
2849 assert(BranchOp != AMDGPU::S_SETPC_B64);
2863 return MI.getOperand(0).getMBB();
2868 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2869 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2870 MI.getOpcode() == AMDGPU::SI_LOOP)
2881 assert(RS &&
"RegScavenger required for long branching");
2883 "new block should be inserted for expanding unconditional branch");
2886 "restore block should be inserted for restoring clobbered registers");
2894 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2904 MCCtx.createTempSymbol(
"post_getpc",
true);
2908 MCCtx.createTempSymbol(
"offset_lo",
true);
2910 MCCtx.createTempSymbol(
"offset_hi",
true);
2913 .
addReg(PCReg, 0, AMDGPU::sub0)
2917 .
addReg(PCReg, 0, AMDGPU::sub1)
2959 if (LongBranchReservedReg) {
2961 Scav = LongBranchReservedReg;
2970 MRI.replaceRegWith(PCReg, Scav);
2971 MRI.clearVirtRegs();
2977 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2978 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2979 MRI.clearVirtRegs();
2994unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2996 case SIInstrInfo::SCC_TRUE:
2997 return AMDGPU::S_CBRANCH_SCC1;
2998 case SIInstrInfo::SCC_FALSE:
2999 return AMDGPU::S_CBRANCH_SCC0;
3000 case SIInstrInfo::VCCNZ:
3001 return AMDGPU::S_CBRANCH_VCCNZ;
3002 case SIInstrInfo::VCCZ:
3003 return AMDGPU::S_CBRANCH_VCCZ;
3004 case SIInstrInfo::EXECNZ:
3005 return AMDGPU::S_CBRANCH_EXECNZ;
3006 case SIInstrInfo::EXECZ:
3007 return AMDGPU::S_CBRANCH_EXECZ;
3013SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3015 case AMDGPU::S_CBRANCH_SCC0:
3017 case AMDGPU::S_CBRANCH_SCC1:
3019 case AMDGPU::S_CBRANCH_VCCNZ:
3021 case AMDGPU::S_CBRANCH_VCCZ:
3023 case AMDGPU::S_CBRANCH_EXECNZ:
3025 case AMDGPU::S_CBRANCH_EXECZ:
3037 bool AllowModify)
const {
3038 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3040 TBB =
I->getOperand(0).getMBB();
3046 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3047 CondBB =
I->getOperand(1).getMBB();
3048 Cond.push_back(
I->getOperand(0));
3050 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3051 if (Pred == INVALID_BR)
3054 CondBB =
I->getOperand(0).getMBB();
3056 Cond.push_back(
I->getOperand(1));
3066 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3068 FBB =
I->getOperand(0).getMBB();
3078 bool AllowModify)
const {
3086 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3087 switch (
I->getOpcode()) {
3088 case AMDGPU::S_MOV_B64_term:
3089 case AMDGPU::S_XOR_B64_term:
3090 case AMDGPU::S_OR_B64_term:
3091 case AMDGPU::S_ANDN2_B64_term:
3092 case AMDGPU::S_AND_B64_term:
3093 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3094 case AMDGPU::S_MOV_B32_term:
3095 case AMDGPU::S_XOR_B32_term:
3096 case AMDGPU::S_OR_B32_term:
3097 case AMDGPU::S_ANDN2_B32_term:
3098 case AMDGPU::S_AND_B32_term:
3099 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3102 case AMDGPU::SI_ELSE:
3103 case AMDGPU::SI_KILL_I1_TERMINATOR:
3104 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3121 int *BytesRemoved)
const {
3123 unsigned RemovedSize = 0;
3126 if (
MI.isBranch() ||
MI.isReturn()) {
3128 MI.eraseFromParent();
3134 *BytesRemoved = RemovedSize;
3151 int *BytesAdded)
const {
3152 if (!FBB &&
Cond.empty()) {
3160 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3170 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3207 if (
Cond.size() != 2) {
3222 Register FalseReg,
int &CondCycles,
3223 int &TrueCycles,
int &FalseCycles)
const {
3224 switch (
Cond[0].getImm()) {
3229 if (
MRI.getRegClass(FalseReg) != RC)
3233 CondCycles = TrueCycles = FalseCycles = NumInsts;
3236 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3244 if (
MRI.getRegClass(FalseReg) != RC)
3250 if (NumInsts % 2 == 0)
3253 CondCycles = TrueCycles = FalseCycles = NumInsts;
3265 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3266 if (Pred == VCCZ || Pred == SCC_FALSE) {
3267 Pred =
static_cast<BranchPredicate
>(-Pred);
3273 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3275 if (DstSize == 32) {
3277 if (Pred == SCC_TRUE) {
3292 if (DstSize == 64 && Pred == SCC_TRUE) {
3302 static const int16_t Sub0_15[] = {
3303 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3304 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3305 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3306 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3309 static const int16_t Sub0_15_64[] = {
3310 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3311 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3312 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3313 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3316 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3318 const int16_t *SubIndices = Sub0_15;
3319 int NElts = DstSize / 32;
3323 if (Pred == SCC_TRUE) {
3325 SelOp = AMDGPU::S_CSELECT_B32;
3326 EltRC = &AMDGPU::SGPR_32RegClass;
3328 SelOp = AMDGPU::S_CSELECT_B64;
3329 EltRC = &AMDGPU::SGPR_64RegClass;
3330 SubIndices = Sub0_15_64;
3336 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3341 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3342 Register DstElt =
MRI.createVirtualRegister(EltRC);
3345 unsigned SubIdx = SubIndices[
Idx];
3348 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3351 .
addReg(FalseReg, 0, SubIdx)
3352 .
addReg(TrueReg, 0, SubIdx);
3356 .
addReg(TrueReg, 0, SubIdx)
3357 .
addReg(FalseReg, 0, SubIdx);
3369 switch (
MI.getOpcode()) {
3370 case AMDGPU::V_MOV_B32_e32:
3371 case AMDGPU::V_MOV_B32_e64:
3372 case AMDGPU::V_MOV_B64_PSEUDO:
3373 case AMDGPU::V_MOV_B64_e32:
3374 case AMDGPU::V_MOV_B64_e64:
3375 case AMDGPU::S_MOV_B32:
3376 case AMDGPU::S_MOV_B64:
3377 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3379 case AMDGPU::WWM_COPY:
3380 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3381 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3382 case AMDGPU::V_ACCVGPR_MOV_B32:
3390 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3391 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3392 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3395 unsigned Opc =
MI.getOpcode();
3399 MI.removeOperand(
Idx);
3405 if (!
MRI->hasOneNonDBGUse(Reg))
3408 switch (
DefMI.getOpcode()) {
3411 case AMDGPU::V_MOV_B64_e32:
3412 case AMDGPU::S_MOV_B64:
3413 case AMDGPU::V_MOV_B64_PSEUDO:
3414 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3415 case AMDGPU::V_MOV_B32_e32:
3416 case AMDGPU::S_MOV_B32:
3417 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3424 if (!ImmOp->
isImm())
3427 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3428 int64_t Imm = ImmOp->
getImm();
3429 switch (UseOp.getSubReg()) {
3440 case AMDGPU::sub1_lo16:
3442 case AMDGPU::sub1_hi16:
3447 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3449 unsigned Opc =
UseMI.getOpcode();
3450 if (Opc == AMDGPU::COPY) {
3451 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3455 bool Is16Bit = OpSize == 2;
3456 bool Is64Bit = OpSize == 8;
3458 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3459 : AMDGPU::V_MOV_B32_e32
3460 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3461 : AMDGPU::S_MOV_B32;
3462 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3467 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3474 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3477 UseMI.getOperand(0).setSubReg(0);
3480 UseMI.getOperand(0).setReg(DstReg);
3490 UseMI.setDesc(NewMCID);
3491 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3496 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3497 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3498 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3499 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3500 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3515 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3516 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3518 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3519 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3520 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3528 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3529 if (!RegSrc->
isReg())
3547 if (Def && Def->isMoveImmediate() &&
3552 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3554 : AMDGPU::V_FMAMK_F16)
3555 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3562 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3565 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3571 unsigned SrcSubReg = RegSrc->
getSubReg();
3576 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3577 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3578 Opc == AMDGPU::V_FMAC_F16_e64)
3579 UseMI.untieRegOperand(
3582 Src1->ChangeToImmediate(Imm);
3587 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3589 DefMI.eraseFromParent();
3599 bool Src0Inlined =
false;
3600 if (Src0->
isReg()) {
3605 if (Def && Def->isMoveImmediate() &&
3617 if (Src1->
isReg() && !Src0Inlined) {
3620 if (Def && Def->isMoveImmediate() &&
3631 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3633 : AMDGPU::V_FMAAK_F16)
3634 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3641 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3647 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3648 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3649 Opc == AMDGPU::V_FMAC_F16_e64)
3650 UseMI.untieRegOperand(
3664 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3666 DefMI.eraseFromParent();
3678 if (BaseOps1.
size() != BaseOps2.
size())
3680 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3681 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3689 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3690 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3691 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3693 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3696bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3699 int64_t Offset0, Offset1;
3701 bool Offset0IsScalable, Offset1IsScalable;
3723 "MIa must load from or modify a memory location");
3725 "MIb must load from or modify a memory location");
3744 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3751 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3761 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3775 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3786 if (Reg.isPhysical())
3788 auto *Def =
MRI.getUniqueVRegDef(Reg);
3790 Imm = Def->getOperand(1).getImm();
3810 unsigned NumOps =
MI.getNumOperands();
3811 for (
unsigned I = 1;
I < NumOps; ++
I) {
3813 if (
Op.isReg() &&
Op.isKill())
3823 unsigned Opc =
MI.getOpcode();
3827 if (NewMFMAOpc != -1) {
3830 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3831 MIB.
add(
MI.getOperand(
I));
3837 if (Def.isEarlyClobber() && Def.isReg() &&
3842 auto UpdateDefIndex = [&](
LiveRange &LR) {
3843 auto S = LR.
find(OldIndex);
3844 if (S != LR.end() && S->start == OldIndex) {
3845 assert(S->valno && S->valno->def == OldIndex);
3846 S->start = NewIndex;
3847 S->valno->def = NewIndex;
3851 for (
auto &SR : LI.subranges())
3862 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3872 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3873 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3877 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3878 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3879 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3880 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3881 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3882 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3883 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3884 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3885 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3886 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3887 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3888 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3889 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3890 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3891 bool Src0Literal =
false;
3896 case AMDGPU::V_MAC_F16_e64:
3897 case AMDGPU::V_FMAC_F16_e64:
3898 case AMDGPU::V_FMAC_F16_t16_e64:
3899 case AMDGPU::V_MAC_F32_e64:
3900 case AMDGPU::V_MAC_LEGACY_F32_e64:
3901 case AMDGPU::V_FMAC_F32_e64:
3902 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3903 case AMDGPU::V_FMAC_F64_e64:
3905 case AMDGPU::V_MAC_F16_e32:
3906 case AMDGPU::V_FMAC_F16_e32:
3907 case AMDGPU::V_MAC_F32_e32:
3908 case AMDGPU::V_MAC_LEGACY_F32_e32:
3909 case AMDGPU::V_FMAC_F32_e32:
3910 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3911 case AMDGPU::V_FMAC_F64_e32: {
3913 AMDGPU::OpName::src0);
3940 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3946 const auto killDef = [&]() ->
void {
3950 if (!
MRI.hasOneNonDBGUse(DefReg))
3964 : AMDGPU::V_FMAAK_F16)
3965 : AMDGPU::V_FMAAK_F32)
3966 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3983 : AMDGPU::V_FMAMK_F16)
3984 : AMDGPU::V_FMAMK_F32)
3985 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4031 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4032 : IsF64 ? AMDGPU::V_FMA_F64_e64
4034 ? AMDGPU::V_FMA_LEGACY_F32_e64
4035 : AMDGPU::V_FMA_F32_e64
4036 : IsF16 ? AMDGPU::V_MAD_F16_e64
4037 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4038 : AMDGPU::V_MAD_F32_e64;
4054 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4065 switch (
MI.getOpcode()) {
4066 case AMDGPU::S_SET_GPR_IDX_ON:
4067 case AMDGPU::S_SET_GPR_IDX_MODE:
4068 case AMDGPU::S_SET_GPR_IDX_OFF:
4086 if (
MI.isTerminator() ||
MI.isPosition())
4090 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4093 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4099 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4100 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4101 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4102 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4107 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4118 unsigned Opcode =
MI.getOpcode();
4133 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4134 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4135 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4138 if (
MI.isCall() ||
MI.isInlineAsm())
4154 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4155 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4156 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4157 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4165 if (
MI.isMetaInstruction())
4169 if (
MI.isCopyLike()) {
4174 return MI.readsRegister(AMDGPU::EXEC, &RI);
4185 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4189 switch (Imm.getBitWidth()) {
4209 APInt IntImm = Imm.bitcastToAPInt();
4228 uint8_t OperandType)
const {
4229 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4238 int64_t Imm = MO.
getImm();
4239 switch (OperandType) {
4252 int32_t Trunc =
static_cast<int32_t
>(Imm);
4292 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4297 int16_t Trunc =
static_cast<int16_t
>(Imm);
4308 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4309 int16_t Trunc =
static_cast<int16_t
>(Imm);
4369 AMDGPU::OpName::src2))
4385 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4405 return Mods && Mods->
getImm();
4418 switch (
MI.getOpcode()) {
4419 default:
return false;
4421 case AMDGPU::V_ADDC_U32_e64:
4422 case AMDGPU::V_SUBB_U32_e64:
4423 case AMDGPU::V_SUBBREV_U32_e64: {
4431 case AMDGPU::V_MAC_F16_e64:
4432 case AMDGPU::V_MAC_F32_e64:
4433 case AMDGPU::V_MAC_LEGACY_F32_e64:
4434 case AMDGPU::V_FMAC_F16_e64:
4435 case AMDGPU::V_FMAC_F16_t16_e64:
4436 case AMDGPU::V_FMAC_F32_e64:
4437 case AMDGPU::V_FMAC_F64_e64:
4438 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4444 case AMDGPU::V_CNDMASK_B32_e64:
4476 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4485 unsigned Op32)
const {
4499 Inst32.
add(
MI.getOperand(
I));
4503 int Idx =
MI.getNumExplicitDefs();
4505 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4544 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4549 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4550 MO.
getReg() == AMDGPU::VCC_LO;
4552 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4553 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4562 switch (MO.getReg()) {
4564 case AMDGPU::VCC_LO:
4565 case AMDGPU::VCC_HI:
4567 case AMDGPU::FLAT_SCR:
4580 switch (
MI.getOpcode()) {
4581 case AMDGPU::V_READLANE_B32:
4582 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4583 case AMDGPU::V_WRITELANE_B32:
4584 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4591 if (
MI.isPreISelOpcode() ||
4592 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4603 if (
SubReg.getReg().isPhysical())
4606 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4613 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4623 if (Src0Idx == -1) {
4633 if (!
Desc.isVariadic() &&
4634 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4635 ErrInfo =
"Instruction has wrong number of operands.";
4639 if (
MI.isInlineAsm()) {
4652 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4653 ErrInfo =
"inlineasm operand has incorrect register class.";
4661 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4662 ErrInfo =
"missing memory operand from image instruction.";
4667 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4670 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4671 "all fp values to integers.";
4675 int RegClass =
Desc.operands()[i].RegClass;
4677 switch (
Desc.operands()[i].OperandType) {
4679 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4680 ErrInfo =
"Illegal immediate value for operand.";
4701 ErrInfo =
"Illegal immediate value for operand.";
4708 ErrInfo =
"Expected inline constant for operand.";
4717 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4718 ErrInfo =
"Expected immediate, but got non-immediate";
4740 RI.getSubRegisterClass(RC, MO.
getSubReg());
4748 ErrInfo =
"Subtarget requires even aligned vector registers";
4753 if (RegClass != -1) {
4754 if (Reg.isVirtual())
4759 ErrInfo =
"Operand has incorrect register class.";
4768 ErrInfo =
"SDWA is not supported on this target";
4774 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4782 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4789 "Only reg allowed as operands in SDWA instructions on GFX9+";
4798 if (OMod !=
nullptr &&
4800 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4805 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4806 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4807 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4808 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4811 unsigned Mods = Src0ModsMO->
getImm();
4814 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4820 if (
isVOPC(BasicOpcode)) {
4824 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4825 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4831 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4832 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4838 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4839 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4846 if (DstUnused && DstUnused->isImm() &&
4849 if (!Dst.isReg() || !Dst.isTied()) {
4850 ErrInfo =
"Dst register should have tied register";
4855 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4858 "Dst register should be tied to implicit use of preserved register";
4862 ErrInfo =
"Dst register should use same physical register as preserved";
4894 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4895 if (RegCount > DstSize) {
4896 ErrInfo =
"Image instruction returns too many registers for dst "
4905 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4906 unsigned ConstantBusCount = 0;
4907 bool UsesLiteral =
false;
4914 LiteralVal = &
MI.getOperand(ImmIdx);
4923 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4941 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4951 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4952 return !RI.regsOverlap(SGPRUsed, SGPR);
4962 Opcode != AMDGPU::V_WRITELANE_B32) {
4963 ErrInfo =
"VOP* instruction violates constant bus restriction";
4968 ErrInfo =
"VOP3 instruction uses literal";
4975 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4976 unsigned SGPRCount = 0;
4979 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4987 if (MO.
getReg() != SGPRUsed)
4993 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5000 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5001 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5008 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5018 ErrInfo =
"ABS not allowed in VOP3B instructions";
5031 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5038 if (
Desc.isBranch()) {
5040 ErrInfo =
"invalid branch target for SOPK instruction";
5046 if (!isUInt<16>(Imm)) {
5047 ErrInfo =
"invalid immediate for SOPK instruction";
5051 if (!isInt<16>(Imm)) {
5052 ErrInfo =
"invalid immediate for SOPK instruction";
5059 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5060 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5061 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5062 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5063 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5064 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5066 const unsigned StaticNumOps =
5067 Desc.getNumOperands() +
Desc.implicit_uses().size();
5068 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5073 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5074 ErrInfo =
"missing implicit register operands";
5080 if (!Dst->isUse()) {
5081 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5086 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5087 UseOpIdx != StaticNumOps + 1) {
5088 ErrInfo =
"movrel implicit operands should be tied";
5095 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5097 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5098 ErrInfo =
"src0 should be subreg of implicit vector use";
5106 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5107 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5113 if (
MI.mayStore() &&
5118 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5119 ErrInfo =
"scalar stores must use m0 as offset register";
5127 if (
Offset->getImm() != 0) {
5128 ErrInfo =
"subtarget does not support offsets in flat instructions";
5135 if (GDSOp && GDSOp->
getImm() != 0) {
5136 ErrInfo =
"GDS is not supported on this subtarget";
5145 AMDGPU::OpName::vaddr0);
5147 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5156 ErrInfo =
"dim is out of range";
5163 IsA16 = R128A16->
getImm() != 0;
5164 }
else if (ST.
hasA16()) {
5166 IsA16 = A16->
getImm() != 0;
5169 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5171 unsigned AddrWords =
5174 unsigned VAddrWords;
5176 VAddrWords = RsrcIdx - VAddr0Idx;
5179 unsigned LastVAddrIdx = RsrcIdx - 1;
5180 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5188 if (VAddrWords != AddrWords) {
5190 <<
" but got " << VAddrWords <<
"\n");
5191 ErrInfo =
"bad vaddr size";
5199 using namespace AMDGPU::DPP;
5201 unsigned DC = DppCt->
getImm();
5202 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5203 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5204 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5205 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5206 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5207 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5208 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5209 ErrInfo =
"Invalid dpp_ctrl value";
5212 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5214 ErrInfo =
"Invalid dpp_ctrl value: "
5215 "wavefront shifts are not supported on GFX10+";
5218 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5220 ErrInfo =
"Invalid dpp_ctrl value: "
5221 "broadcasts are not supported on GFX10+";
5224 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5226 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5227 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5229 ErrInfo =
"Invalid dpp_ctrl value: "
5230 "row_newbroadcast/row_share is not supported before "
5235 ErrInfo =
"Invalid dpp_ctrl value: "
5236 "row_share and row_xmask are not supported before GFX10";
5241 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5243 ErrInfo =
"Invalid dpp_ctrl value: "
5244 "DP ALU dpp only support row_newbcast";
5251 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5252 : AMDGPU::OpName::vdata;
5261 ErrInfo =
"Invalid register class: "
5262 "vdata and vdst should be both VGPR or AGPR";
5265 if (
Data && Data2 &&
5267 ErrInfo =
"Invalid register class: "
5268 "both data operands should be VGPR or AGPR";
5272 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5275 ErrInfo =
"Invalid register class: "
5276 "agpr loads and stores not supported on this GPU";
5283 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5288 if (Reg.isPhysical())
5295 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5296 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5297 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5299 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5300 ErrInfo =
"Subtarget requires even aligned vector registers "
5301 "for DS_GWS instructions";
5307 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5308 ErrInfo =
"Subtarget requires even aligned vector registers "
5309 "for vaddr operand of image instructions";
5315 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5318 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5319 ErrInfo =
"Invalid register class: "
5320 "v_accvgpr_write with an SGPR is not supported on this GPU";
5325 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5328 ErrInfo =
"pseudo expects only physical SGPRs";
5340 switch (
MI.getOpcode()) {
5341 default:
return AMDGPU::INSTRUCTION_LIST_END;
5342 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5343 case AMDGPU::COPY:
return AMDGPU::COPY;
5344 case AMDGPU::PHI:
return AMDGPU::PHI;
5345 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5346 case AMDGPU::WQM:
return AMDGPU::WQM;
5347 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5348 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5349 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5350 case AMDGPU::S_MOV_B32: {
5352 return MI.getOperand(1).isReg() ||
5354 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5356 case AMDGPU::S_ADD_I32:
5357 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5358 case AMDGPU::S_ADDC_U32:
5359 return AMDGPU::V_ADDC_U32_e32;
5360 case AMDGPU::S_SUB_I32:
5361 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5364 case AMDGPU::S_ADD_U32:
5365 return AMDGPU::V_ADD_CO_U32_e32;
5366 case AMDGPU::S_SUB_U32:
5367 return AMDGPU::V_SUB_CO_U32_e32;
5368 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5369 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5370 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5371 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5372 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5373 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5374 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5375 case AMDGPU::S_XNOR_B32:
5376 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5377 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5378 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5379 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5380 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5381 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5382 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5383 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5384 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5385 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5386 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5387 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5388 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5389 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5390 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5391 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5392 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5393 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5394 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5395 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5396 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5397 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5398 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5399 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5400 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5401 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5402 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5403 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5404 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5405 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5406 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5407 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5408 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5409 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5410 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5411 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5412 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5413 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5414 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5415 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5416 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5417 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5418 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5419 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5420 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5421 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5422 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5423 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5424 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5425 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5426 case AMDGPU::S_CEIL_F16:
5428 : AMDGPU::V_CEIL_F16_fake16_e64;
5429 case AMDGPU::S_FLOOR_F16:
5431 : AMDGPU::V_FLOOR_F16_fake16_e64;
5432 case AMDGPU::S_TRUNC_F16:
5433 return AMDGPU::V_TRUNC_F16_fake16_e64;
5434 case AMDGPU::S_RNDNE_F16:
5435 return AMDGPU::V_RNDNE_F16_fake16_e64;
5436 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5437 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5438 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5439 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5440 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5441 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5442 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5443 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5444 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5445 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5446 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5447 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5448 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5449 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5450 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5451 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5452 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5453 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5454 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5455 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5456 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5457 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5458 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5459 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5460 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5461 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5462 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5463 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5464 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5465 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5466 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5467 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5468 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5469 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5470 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5471 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5472 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5473 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5474 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5475 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5476 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5477 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5478 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5479 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5480 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5481 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5482 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5483 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5484 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5485 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5486 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5487 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5488 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5489 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5490 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5491 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5492 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5495 "Unexpected scalar opcode without corresponding vector one!");
5508 bool IsWave32 = ST.isWave32();
5513 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5514 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5523 const unsigned OrSaveExec =
5524 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5537 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5539 auto ExecRestoreMI =
5549 bool IsAllocatable) {
5550 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5555 case AMDGPU::AV_32RegClassID:
5556 RCID = AMDGPU::VGPR_32RegClassID;
5558 case AMDGPU::AV_64RegClassID:
5559 RCID = AMDGPU::VReg_64RegClassID;
5561 case AMDGPU::AV_96RegClassID:
5562 RCID = AMDGPU::VReg_96RegClassID;
5564 case AMDGPU::AV_128RegClassID:
5565 RCID = AMDGPU::VReg_128RegClassID;
5567 case AMDGPU::AV_160RegClassID:
5568 RCID = AMDGPU::VReg_160RegClassID;
5570 case AMDGPU::AV_512RegClassID:
5571 RCID = AMDGPU::VReg_512RegClassID;
5587 auto RegClass = TID.
operands()[OpNum].RegClass;
5588 bool IsAllocatable =
false;
5598 AMDGPU::OpName::vdst);
5601 : AMDGPU::OpName::vdata);
5602 if (DataIdx != -1) {
5604 TID.
Opcode, AMDGPU::OpName::data1);
5612 unsigned OpNo)
const {
5615 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5616 Desc.operands()[OpNo].RegClass == -1) {
5619 if (Reg.isVirtual())
5620 return MRI.getRegClass(Reg);
5621 return RI.getPhysRegBaseClass(Reg);
5624 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5633 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5635 unsigned Size = RI.getRegSizeInBits(*RC);
5636 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5638 Opcode = AMDGPU::COPY;
5640 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5657 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5668 if (SubIdx == AMDGPU::sub0)
5670 if (SubIdx == AMDGPU::sub1)
5682void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5698 if (Reg.isPhysical())
5709 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5736 MO = &
MI.getOperand(OpIdx);
5748 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5754 if (!SGPRsUsed.
count(SGPR) &&
5757 if (--ConstantBusLimit <= 0)
5763 if (!LiteralLimit--)
5765 if (--ConstantBusLimit <= 0)
5779 unsigned Opc =
MI.getOpcode();
5787 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5788 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5789 MI.getOperand(DataIdx).isReg() &&
5790 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5792 if ((
int)OpIdx == DataIdx) {
5793 if (VDstIdx != -1 &&
5794 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5798 AMDGPU::OpName::data1);
5799 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5800 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5803 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5813 bool Is64BitOp = Is64BitFPOp ||
5826 if (!Is64BitFPOp && (int32_t)Imm < 0)
5844 unsigned Opc =
MI.getOpcode();
5863 if (Opc == AMDGPU::V_WRITELANE_B32) {
5866 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5872 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5889 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5891 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5903 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5905 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5917 if (HasImplicitSGPR || !
MI.isCommutable()) {
5934 if (CommutedOpc == -1) {
5939 MI.setDesc(
get(CommutedOpc));
5943 bool Src0Kill = Src0.
isKill();
5947 else if (Src1.
isReg()) {
5962 unsigned Opc =
MI.getOpcode();
5970 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5971 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5977 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5983 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5994 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
5996 SGPRsUsed.
insert(SGPRReg);
6000 for (
int Idx : VOP3Idx) {
6009 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6034 if (ConstantBusLimit > 0) {
6046 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6047 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6056 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6060 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6062 get(TargetOpcode::COPY), NewSrcReg)
6069 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6075 for (
unsigned i = 0; i < SubRegs; ++i) {
6076 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6078 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6085 get(AMDGPU::REG_SEQUENCE), DstReg);
6086 for (
unsigned i = 0; i < SubRegs; ++i) {
6101 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6103 SBase->setReg(SGPR);
6115 if (OldSAddrIdx < 0)
6132 if (NewVAddrIdx < 0)
6139 if (OldVAddrIdx >= 0) {
6141 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6142 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6153 if (OldVAddrIdx == NewVAddrIdx) {
6156 MRI.removeRegOperandFromUseList(&NewVAddr);
6157 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6161 MRI.removeRegOperandFromUseList(&NewVAddr);
6162 MRI.addRegOperandToUseList(&NewVAddr);
6164 assert(OldSAddrIdx == NewVAddrIdx);
6166 if (OldVAddrIdx >= 0) {
6168 AMDGPU::OpName::vdst_in);
6172 if (NewVDstIn != -1) {
6179 if (NewVDstIn != -1) {
6218 unsigned OpSubReg =
Op.getSubReg();
6227 Register DstReg =
MRI.createVirtualRegister(DstRC);
6238 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6241 bool ImpDef = Def->isImplicitDef();
6242 while (!ImpDef && Def && Def->isCopy()) {
6243 if (Def->getOperand(1).getReg().isPhysical())
6245 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6246 ImpDef = Def && Def->isImplicitDef();
6248 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6263 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6264 unsigned SaveExecOpc =
6265 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6266 unsigned XorTermOpc =
6267 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6269 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6270 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6278 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6279 unsigned NumSubRegs =
RegSize / 32;
6280 Register VScalarOp = ScalarOp->getReg();
6282 if (NumSubRegs == 1) {
6283 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6285 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6288 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6290 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6296 CondReg = NewCondReg;
6298 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6306 ScalarOp->setReg(CurReg);
6307 ScalarOp->setIsKill();
6310 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6311 "Unhandled register size");
6313 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6314 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6315 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6318 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6319 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6322 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6323 .
addReg(VScalarOp, VScalarOpUndef,
6324 TRI->getSubRegFromChannel(
Idx + 1));
6330 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6331 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6337 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6338 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6341 if (NumSubRegs <= 2)
6342 Cmp.addReg(VScalarOp);
6344 Cmp.addReg(VScalarOp, VScalarOpUndef,
6345 TRI->getSubRegFromChannel(
Idx, 2));
6349 CondReg = NewCondReg;
6351 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6360 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6361 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6365 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6366 unsigned Channel = 0;
6367 for (
Register Piece : ReadlanePieces) {
6368 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6372 ScalarOp->setReg(SScalarOp);
6373 ScalarOp->setIsKill();
6377 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6378 MRI.setSimpleHint(SaveExec, CondReg);
6409 if (!Begin.isValid())
6411 if (!
End.isValid()) {
6416 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6417 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6418 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6427 std::numeric_limits<unsigned>::max()) !=
6430 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6436 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6445 for (
auto I = Begin;
I != AfterMI;
I++) {
6446 for (
auto &MO :
I->all_uses())
6447 MRI.clearKillFlags(MO.getReg());
6482 for (
auto &Succ : RemainderBB->
successors()) {
6505static std::tuple<unsigned, unsigned>
6513 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6514 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6517 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6518 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6519 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6520 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6521 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6529 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6533 .
addImm(RsrcDataFormat >> 32);
6538 .
addImm(AMDGPU::sub0_sub1)
6544 return std::tuple(RsrcPtr, NewSRsrc);
6581 if (
MI.getOpcode() == AMDGPU::PHI) {
6583 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6584 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6587 MRI.getRegClass(
MI.getOperand(i).getReg());
6602 VRC = &AMDGPU::VReg_1RegClass;
6618 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6620 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6636 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6643 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6645 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6663 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6668 if (DstRC != Src0RC) {
6677 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6685 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6686 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6687 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6688 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6689 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6690 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6691 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
6706 : AMDGPU::OpName::srsrc;
6711 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6720 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6726 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6727 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6732 while (Start->getOpcode() != FrameSetupOpcode)
6735 while (
End->getOpcode() != FrameDestroyOpcode)
6739 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6740 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6748 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6750 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6761 bool isSoffsetLegal =
true;
6764 if (SoffsetIdx != -1) {
6768 isSoffsetLegal =
false;
6772 bool isRsrcLegal =
true;
6775 if (RsrcIdx != -1) {
6778 isRsrcLegal =
false;
6783 if (isRsrcLegal && isSoffsetLegal)
6807 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6808 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6809 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6811 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6812 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6813 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6815 unsigned RsrcPtr, NewSRsrc;
6822 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6829 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6847 "FIXME: Need to emit flat atomics here");
6849 unsigned RsrcPtr, NewSRsrc;
6852 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6875 MIB.
addImm(CPol->getImm());
6880 MIB.
addImm(TFE->getImm());
6900 MI.removeFromParent();
6905 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6907 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6911 if (!isSoffsetLegal) {
6923 if (!isSoffsetLegal) {
6932 InstrList.insert(
MI);
6936 if (RsrcIdx != -1) {
6937 DeferredList.insert(
MI);
6942 return DeferredList.contains(
MI);
6948 while (!Worklist.
empty()) {
6962 "Deferred MachineInstr are not supposed to re-populate worklist");
6980 case AMDGPU::S_ADD_U64_PSEUDO:
6981 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6983 case AMDGPU::S_SUB_U64_PSEUDO:
6984 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6986 case AMDGPU::S_ADD_I32:
6987 case AMDGPU::S_SUB_I32: {
6991 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
6999 case AMDGPU::S_MUL_U64:
7001 splitScalarSMulU64(Worklist, Inst, MDT);
7005 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7006 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7009 splitScalarSMulPseudo(Worklist, Inst, MDT);
7013 case AMDGPU::S_AND_B64:
7014 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7018 case AMDGPU::S_OR_B64:
7019 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7023 case AMDGPU::S_XOR_B64:
7024 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7028 case AMDGPU::S_NAND_B64:
7029 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7033 case AMDGPU::S_NOR_B64:
7034 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7038 case AMDGPU::S_XNOR_B64:
7040 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7042 splitScalar64BitXnor(Worklist, Inst, MDT);
7046 case AMDGPU::S_ANDN2_B64:
7047 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7051 case AMDGPU::S_ORN2_B64:
7052 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7056 case AMDGPU::S_BREV_B64:
7057 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7061 case AMDGPU::S_NOT_B64:
7062 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7066 case AMDGPU::S_BCNT1_I32_B64:
7067 splitScalar64BitBCNT(Worklist, Inst);
7071 case AMDGPU::S_BFE_I64:
7072 splitScalar64BitBFE(Worklist, Inst);
7076 case AMDGPU::S_FLBIT_I32_B64:
7077 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7080 case AMDGPU::S_FF1_I32_B64:
7081 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7085 case AMDGPU::S_LSHL_B32:
7087 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7091 case AMDGPU::S_ASHR_I32:
7093 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7097 case AMDGPU::S_LSHR_B32:
7099 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7103 case AMDGPU::S_LSHL_B64:
7106 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7107 : AMDGPU::V_LSHLREV_B64_e64;
7111 case AMDGPU::S_ASHR_I64:
7113 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7117 case AMDGPU::S_LSHR_B64:
7119 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7124 case AMDGPU::S_ABS_I32:
7125 lowerScalarAbs(Worklist, Inst);
7129 case AMDGPU::S_CBRANCH_SCC0:
7130 case AMDGPU::S_CBRANCH_SCC1: {
7133 bool IsSCC = CondReg == AMDGPU::SCC;
7136 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7139 .
addReg(IsSCC ? VCC : CondReg);
7143 case AMDGPU::S_BFE_U64:
7144 case AMDGPU::S_BFM_B64:
7147 case AMDGPU::S_PACK_LL_B32_B16:
7148 case AMDGPU::S_PACK_LH_B32_B16:
7149 case AMDGPU::S_PACK_HL_B32_B16:
7150 case AMDGPU::S_PACK_HH_B32_B16:
7151 movePackToVALU(Worklist,
MRI, Inst);
7155 case AMDGPU::S_XNOR_B32:
7156 lowerScalarXnor(Worklist, Inst);
7160 case AMDGPU::S_NAND_B32:
7161 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7165 case AMDGPU::S_NOR_B32:
7166 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7170 case AMDGPU::S_ANDN2_B32:
7171 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7175 case AMDGPU::S_ORN2_B32:
7176 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7184 case AMDGPU::S_ADD_CO_PSEUDO:
7185 case AMDGPU::S_SUB_CO_PSEUDO: {
7186 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7187 ? AMDGPU::V_ADDC_U32_e64
7188 : AMDGPU::V_SUBB_U32_e64;
7189 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7192 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7193 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7211 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7215 case AMDGPU::S_UADDO_PSEUDO:
7216 case AMDGPU::S_USUBO_PSEUDO: {
7223 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7224 ? AMDGPU::V_ADD_CO_U32_e64
7225 : AMDGPU::V_SUB_CO_U32_e64;
7228 Register DestReg =
MRI.createVirtualRegister(NewRC);
7236 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7243 case AMDGPU::S_CSELECT_B32:
7244 case AMDGPU::S_CSELECT_B64:
7245 lowerSelect(Worklist, Inst, MDT);
7248 case AMDGPU::S_CMP_EQ_I32:
7249 case AMDGPU::S_CMP_LG_I32:
7250 case AMDGPU::S_CMP_GT_I32:
7251 case AMDGPU::S_CMP_GE_I32:
7252 case AMDGPU::S_CMP_LT_I32:
7253 case AMDGPU::S_CMP_LE_I32:
7254 case AMDGPU::S_CMP_EQ_U32:
7255 case AMDGPU::S_CMP_LG_U32:
7256 case AMDGPU::S_CMP_GT_U32:
7257 case AMDGPU::S_CMP_GE_U32:
7258 case AMDGPU::S_CMP_LT_U32:
7259 case AMDGPU::S_CMP_LE_U32:
7260 case AMDGPU::S_CMP_EQ_U64:
7261 case AMDGPU::S_CMP_LG_U64:
7262 case AMDGPU::S_CMP_LT_F32:
7263 case AMDGPU::S_CMP_EQ_F32:
7264 case AMDGPU::S_CMP_LE_F32:
7265 case AMDGPU::S_CMP_GT_F32:
7266 case AMDGPU::S_CMP_LG_F32:
7267 case AMDGPU::S_CMP_GE_F32:
7268 case AMDGPU::S_CMP_O_F32:
7269 case AMDGPU::S_CMP_U_F32:
7270 case AMDGPU::S_CMP_NGE_F32:
7271 case AMDGPU::S_CMP_NLG_F32:
7272 case AMDGPU::S_CMP_NGT_F32:
7273 case AMDGPU::S_CMP_NLE_F32:
7274 case AMDGPU::S_CMP_NEQ_F32:
7275 case AMDGPU::S_CMP_NLT_F32:
7276 case AMDGPU::S_CMP_LT_F16:
7277 case AMDGPU::S_CMP_EQ_F16:
7278 case AMDGPU::S_CMP_LE_F16:
7279 case AMDGPU::S_CMP_GT_F16:
7280 case AMDGPU::S_CMP_LG_F16:
7281 case AMDGPU::S_CMP_GE_F16:
7282 case AMDGPU::S_CMP_O_F16:
7283 case AMDGPU::S_CMP_U_F16:
7284 case AMDGPU::S_CMP_NGE_F16:
7285 case AMDGPU::S_CMP_NLG_F16:
7286 case AMDGPU::S_CMP_NGT_F16:
7287 case AMDGPU::S_CMP_NLE_F16:
7288 case AMDGPU::S_CMP_NEQ_F16:
7289 case AMDGPU::S_CMP_NLT_F16: {
7295 AMDGPU::OpName::src0_modifiers) >= 0) {
7310 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7314 case AMDGPU::S_CVT_HI_F32_F16: {
7316 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7317 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7328 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7332 case AMDGPU::S_MINIMUM_F32:
7333 case AMDGPU::S_MAXIMUM_F32:
7334 case AMDGPU::S_MINIMUM_F16:
7335 case AMDGPU::S_MAXIMUM_F16: {
7337 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7348 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7354 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7362 if (NewOpcode == Opcode) {
7386 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7398 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7399 MRI.replaceRegWith(DstReg, NewDstReg);
7401 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7412 AMDGPU::OpName::src0_modifiers) >= 0)
7417 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7418 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7420 NewInstr->addOperand(Src);
7423 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7426 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7428 NewInstr.addImm(
Size);
7429 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7433 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7438 "Scalar BFE is only implemented for constant width and offset");
7447 AMDGPU::OpName::src1_modifiers) >= 0)
7452 AMDGPU::OpName::src2_modifiers) >= 0)
7466 NewInstr->addOperand(
Op);
7473 if (
Op.getReg() == AMDGPU::SCC) {
7475 if (
Op.isDef() && !
Op.isDead())
7476 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7478 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7483 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7484 Register DstReg = NewInstr->getOperand(0).getReg();
7489 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7490 MRI.replaceRegWith(DstReg, NewDstReg);
7496 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7500std::pair<bool, MachineBasicBlock *>
7512 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7515 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7517 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7518 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7526 MRI.replaceRegWith(OldDstReg, ResultReg);
7529 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7530 return std::pair(
true, NewBB);
7533 return std::pair(
false,
nullptr);
7550 bool IsSCC = (CondReg == AMDGPU::SCC);
7558 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7566 NewCondReg =
MRI.createVirtualRegister(TC);
7570 bool CopyFound =
false;
7574 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7576 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7578 .
addReg(CandI.getOperand(1).getReg());
7590 : AMDGPU::S_CSELECT_B32;
7600 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7601 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7614 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7616 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7628 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7629 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7632 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7642 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7643 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7658 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7666 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7667 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7673 bool Src0IsSGPR = Src0.
isReg() &&
7675 bool Src1IsSGPR = Src1.
isReg() &&
7678 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7679 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7689 }
else if (Src1IsSGPR) {
7703 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7707 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7713 unsigned Opcode)
const {
7723 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7724 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7736 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7737 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7742 unsigned Opcode)
const {
7752 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7753 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7765 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7766 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7784 &AMDGPU::SGPR_32RegClass;
7787 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7790 AMDGPU::sub0, Src0SubRC);
7795 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7797 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7801 AMDGPU::sub1, Src0SubRC);
7803 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7809 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7816 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7818 Worklist.
insert(&LoHalf);
7819 Worklist.
insert(&HiHalf);
7825 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7836 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7837 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7838 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7849 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7853 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7883 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7889 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7895 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7906 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7922 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7934 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7945 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7946 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7947 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7958 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7962 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7974 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7975 ? AMDGPU::V_MUL_HI_U32_e64
7976 : AMDGPU::V_MUL_HI_I32_e64;
7991 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7999 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8018 &AMDGPU::SGPR_32RegClass;
8021 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8024 &AMDGPU::SGPR_32RegClass;
8027 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8030 AMDGPU::sub0, Src0SubRC);
8032 AMDGPU::sub0, Src1SubRC);
8034 AMDGPU::sub1, Src0SubRC);
8036 AMDGPU::sub1, Src1SubRC);
8041 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8043 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8048 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8053 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8060 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8062 Worklist.
insert(&LoHalf);
8063 Worklist.
insert(&HiHalf);
8066 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8084 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8100 Register NewDest =
MRI.createVirtualRegister(DestRC);
8106 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8124 MRI.getRegClass(Src.getReg()) :
8125 &AMDGPU::SGPR_32RegClass;
8127 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8128 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8131 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8134 AMDGPU::sub0, SrcSubRC);
8136 AMDGPU::sub1, SrcSubRC);
8142 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8146 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8165 Offset == 0 &&
"Not implemented");
8168 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8169 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8170 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8187 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8188 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8193 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8194 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8198 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8201 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8206 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8207 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8228 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8229 unsigned OpcodeAdd =
8230 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8233 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8235 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8242 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8243 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8244 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8245 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8252 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8258 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8260 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8262 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8265void SIInstrInfo::addUsersToMoveToVALUWorklist(
8269 E =
MRI.use_end();
I != E;) {
8274 switch (
UseMI.getOpcode()) {
8277 case AMDGPU::SOFT_WQM:
8278 case AMDGPU::STRICT_WWM:
8279 case AMDGPU::STRICT_WQM:
8280 case AMDGPU::REG_SEQUENCE:
8282 case AMDGPU::INSERT_SUBREG:
8285 OpNo =
I.getOperandNo();
8294 }
while (
I != E &&
I->getParent() == &
UseMI);
8304 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8311 case AMDGPU::S_PACK_LL_B32_B16: {
8312 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8313 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8330 case AMDGPU::S_PACK_LH_B32_B16: {
8331 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8340 case AMDGPU::S_PACK_HL_B32_B16: {
8341 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8351 case AMDGPU::S_PACK_HH_B32_B16: {
8352 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8353 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8370 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8371 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8380 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8381 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8389 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8393 Register DestReg =
MI.getOperand(0).getReg();
8395 MRI.replaceRegWith(DestReg, NewCond);
8400 MI.getOperand(SCCIdx).setReg(NewCond);
8406 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8409 for (
auto &Copy : CopyToDelete)
8410 Copy->eraseFromParent();
8418void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8427 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8429 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8446 case AMDGPU::REG_SEQUENCE:
8447 case AMDGPU::INSERT_SUBREG:
8449 case AMDGPU::SOFT_WQM:
8450 case AMDGPU::STRICT_WWM:
8451 case AMDGPU::STRICT_WQM: {
8459 case AMDGPU::REG_SEQUENCE:
8460 case AMDGPU::INSERT_SUBREG:
8470 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8487 int OpIndices[3])
const {
8506 for (
unsigned i = 0; i < 3; ++i) {
8507 int Idx = OpIndices[i];
8544 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8545 SGPRReg = UsedSGPRs[0];
8548 if (!SGPRReg && UsedSGPRs[1]) {
8549 if (UsedSGPRs[1] == UsedSGPRs[2])
8550 SGPRReg = UsedSGPRs[1];
8557 unsigned OperandName)
const {
8562 return &
MI.getOperand(
Idx);
8579 RsrcDataFormat |= (1ULL << 56);
8584 RsrcDataFormat |= (2ULL << 59);
8587 return RsrcDataFormat;
8609 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8615 unsigned Opc =
MI.getOpcode();
8621 return get(Opc).mayLoad() &&
8626 int &FrameIndex)
const {
8634 FrameIndex =
Addr->getIndex();
8639 int &FrameIndex)
const {
8642 FrameIndex =
Addr->getIndex();
8647 int &FrameIndex)
const {
8661 int &FrameIndex)
const {
8678 while (++
I != E &&
I->isInsideBundle()) {
8679 assert(!
I->isBundle() &&
"No nested bundle!");
8687 unsigned Opc =
MI.getOpcode();
8689 unsigned DescSize =
Desc.getSize();
8694 unsigned Size = DescSize;
8709 bool HasLiteral =
false;
8710 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8718 return HasLiteral ? DescSize + 4 : DescSize;
8728 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8732 case TargetOpcode::BUNDLE:
8734 case TargetOpcode::INLINEASM:
8735 case TargetOpcode::INLINEASM_BR: {
8737 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8741 if (
MI.isMetaInstruction())
8751 if (
MI.memoperands_empty())
8762 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8774 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8777 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8778 .
add(Branch->getOperand(0))
8779 .
add(Branch->getOperand(1));
8781 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8800 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8805 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8807 if (PMBB == LoopEnd) {
8808 HeaderPHIBuilder.
addReg(BackEdgeReg);
8813 HeaderPHIBuilder.
addReg(ZeroReg);
8815 HeaderPHIBuilder.
addMBB(PMBB);
8819 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8821 .
add(Branch->getOperand(0));
8823 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8829 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8830 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8836 static const std::pair<int, const char *> TargetIndices[] = {
8874std::pair<unsigned, unsigned>
8881 static const std::pair<unsigned, const char *> TargetFlags[] = {
8896 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8910 return AMDGPU::WWM_COPY;
8912 return AMDGPU::COPY;
8923 bool IsNullOrVectorRegister =
true;
8932 return IsNullOrVectorRegister &&
8933 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8934 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8947 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8978 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8979 case AMDGPU::SI_KILL_I1_TERMINATOR:
8988 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8989 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8990 case AMDGPU::SI_KILL_I1_PSEUDO:
8991 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9003 const unsigned OffsetBits =
9005 return (1 << OffsetBits) - 1;
9012 if (
MI.isInlineAsm())
9015 for (
auto &
Op :
MI.implicit_operands()) {
9016 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9017 Op.setReg(AMDGPU::VCC_LO);
9030 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9048 if (Imm <= MaxImm + 64) {
9050 Overflow = Imm - MaxImm;
9140std::pair<int64_t, int64_t>
9143 int64_t RemainderOffset = COffsetVal;
9144 int64_t ImmField = 0;
9149 if (AllowNegative) {
9151 int64_t
D = 1LL << NumBits;
9152 RemainderOffset = (COffsetVal /
D) *
D;
9153 ImmField = COffsetVal - RemainderOffset;
9157 (ImmField % 4) != 0) {
9159 RemainderOffset += ImmField % 4;
9160 ImmField -= ImmField % 4;
9162 }
else if (COffsetVal >= 0) {
9163 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9164 RemainderOffset = COffsetVal - ImmField;
9168 assert(RemainderOffset + ImmField == COffsetVal);
9169 return {ImmField, RemainderOffset};
9181 switch (ST.getGeneration()) {
9206 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9207 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9208 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9209 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9210 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9211 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9212 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9213 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9249 if (
isMAI(Opcode)) {
9294 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9295 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9296 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9308 switch (
MI.getOpcode()) {
9310 case AMDGPU::REG_SEQUENCE:
9314 case AMDGPU::INSERT_SUBREG:
9315 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9332 if (!
P.Reg.isVirtual())
9336 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9337 while (
auto *
MI = DefInst) {
9339 switch (
MI->getOpcode()) {
9341 case AMDGPU::V_MOV_B32_e32: {
9342 auto &Op1 =
MI->getOperand(1);
9347 DefInst =
MRI.getVRegDef(RSR.Reg);
9355 DefInst =
MRI.getVRegDef(RSR.Reg);
9368 assert(
MRI.isSSA() &&
"Must be run on SSA");
9370 auto *
TRI =
MRI.getTargetRegisterInfo();
9371 auto *DefBB =
DefMI.getParent();
9375 if (
UseMI.getParent() != DefBB)
9378 const int MaxInstScan = 20;
9382 auto E =
UseMI.getIterator();
9383 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9384 if (
I->isDebugInstr())
9387 if (++NumInst > MaxInstScan)
9390 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9400 assert(
MRI.isSSA() &&
"Must be run on SSA");
9402 auto *
TRI =
MRI.getTargetRegisterInfo();
9403 auto *DefBB =
DefMI.getParent();
9405 const int MaxUseScan = 10;
9408 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9409 auto &UseInst = *
Use.getParent();
9412 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9415 if (++NumUse > MaxUseScan)
9422 const int MaxInstScan = 20;
9426 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9429 if (
I->isDebugInstr())
9432 if (++NumInst > MaxInstScan)
9445 if (Reg == VReg && --NumUse == 0)
9447 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9459 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9462 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9471 if (InsPt !=
MBB.
end() &&
9472 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9473 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9474 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9475 InsPt->definesRegister(Src,
nullptr)) {
9479 : AMDGPU::S_MOV_B64_term),
9481 .
addReg(Src, 0, SrcSubReg)
9506 if (isFullCopyInstr(
MI)) {
9515 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9519 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9530 unsigned *PredCost)
const {
9531 if (
MI.isBundle()) {
9534 unsigned Lat = 0, Count = 0;
9535 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9537 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9539 return Lat + Count - 1;
9542 return SchedModel.computeInstrLatency(&
MI);
9547 unsigned opcode =
MI.getOpcode();
9548 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9549 auto IID = GI->getIntrinsicID();
9556 case Intrinsic::amdgcn_if:
9557 case Intrinsic::amdgcn_else:
9571 if (opcode == AMDGPU::G_LOAD) {
9572 if (
MI.memoperands_empty())
9576 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9577 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9585 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9586 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9587 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9600 unsigned opcode =
MI.getOpcode();
9601 if (opcode == AMDGPU::V_READLANE_B32 ||
9602 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9603 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9606 if (isCopyInstr(
MI)) {
9610 RI.getPhysRegBaseClass(srcOp.
getReg());
9618 if (
MI.isPreISelOpcode())
9633 if (
MI.memoperands_empty())
9637 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9638 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9653 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9659 if (!Reg || !
SrcOp.readsReg())
9665 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9702 Register &SrcReg2, int64_t &CmpMask,
9703 int64_t &CmpValue)
const {
9704 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9707 switch (
MI.getOpcode()) {
9710 case AMDGPU::S_CMP_EQ_U32:
9711 case AMDGPU::S_CMP_EQ_I32:
9712 case AMDGPU::S_CMP_LG_U32:
9713 case AMDGPU::S_CMP_LG_I32:
9714 case AMDGPU::S_CMP_LT_U32:
9715 case AMDGPU::S_CMP_LT_I32:
9716 case AMDGPU::S_CMP_GT_U32:
9717 case AMDGPU::S_CMP_GT_I32:
9718 case AMDGPU::S_CMP_LE_U32:
9719 case AMDGPU::S_CMP_LE_I32:
9720 case AMDGPU::S_CMP_GE_U32:
9721 case AMDGPU::S_CMP_GE_I32:
9722 case AMDGPU::S_CMP_EQ_U64:
9723 case AMDGPU::S_CMP_LG_U64:
9724 SrcReg =
MI.getOperand(0).getReg();
9725 if (
MI.getOperand(1).isReg()) {
9726 if (
MI.getOperand(1).getSubReg())
9728 SrcReg2 =
MI.getOperand(1).getReg();
9730 }
else if (
MI.getOperand(1).isImm()) {
9732 CmpValue =
MI.getOperand(1).getImm();
9738 case AMDGPU::S_CMPK_EQ_U32:
9739 case AMDGPU::S_CMPK_EQ_I32:
9740 case AMDGPU::S_CMPK_LG_U32:
9741 case AMDGPU::S_CMPK_LG_I32:
9742 case AMDGPU::S_CMPK_LT_U32:
9743 case AMDGPU::S_CMPK_LT_I32:
9744 case AMDGPU::S_CMPK_GT_U32:
9745 case AMDGPU::S_CMPK_GT_I32:
9746 case AMDGPU::S_CMPK_LE_U32:
9747 case AMDGPU::S_CMPK_LE_I32:
9748 case AMDGPU::S_CMPK_GE_U32:
9749 case AMDGPU::S_CMPK_GE_I32:
9750 SrcReg =
MI.getOperand(0).getReg();
9752 CmpValue =
MI.getOperand(1).getImm();
9770 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9771 this](int64_t ExpectedValue,
unsigned SrcSize,
9772 bool IsReversible,
bool IsSigned) ->
bool {
9797 if (!Def || Def->getParent() != CmpInstr.
getParent())
9800 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9801 Def->getOpcode() != AMDGPU::S_AND_B64)
9805 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9816 SrcOp = &Def->getOperand(2);
9817 else if (isMask(&Def->getOperand(2)))
9818 SrcOp = &Def->getOperand(1);
9823 if (IsSigned && BitNo == SrcSize - 1)
9826 ExpectedValue <<= BitNo;
9828 bool IsReversedCC =
false;
9829 if (CmpValue != ExpectedValue) {
9832 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9837 Register DefReg = Def->getOperand(0).getReg();
9838 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9841 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9843 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9844 I->killsRegister(AMDGPU::SCC, &RI))
9849 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9853 if (!
MRI->use_nodbg_empty(DefReg)) {
9861 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9862 : AMDGPU::S_BITCMP1_B32
9863 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9864 : AMDGPU::S_BITCMP1_B64;
9869 Def->eraseFromParent();
9877 case AMDGPU::S_CMP_EQ_U32:
9878 case AMDGPU::S_CMP_EQ_I32:
9879 case AMDGPU::S_CMPK_EQ_U32:
9880 case AMDGPU::S_CMPK_EQ_I32:
9881 return optimizeCmpAnd(1, 32,
true,
false);
9882 case AMDGPU::S_CMP_GE_U32:
9883 case AMDGPU::S_CMPK_GE_U32:
9884 return optimizeCmpAnd(1, 32,
false,
false);
9885 case AMDGPU::S_CMP_GE_I32:
9886 case AMDGPU::S_CMPK_GE_I32:
9887 return optimizeCmpAnd(1, 32,
false,
true);
9888 case AMDGPU::S_CMP_EQ_U64:
9889 return optimizeCmpAnd(1, 64,
true,
false);
9890 case AMDGPU::S_CMP_LG_U32:
9891 case AMDGPU::S_CMP_LG_I32:
9892 case AMDGPU::S_CMPK_LG_U32:
9893 case AMDGPU::S_CMPK_LG_I32:
9894 return optimizeCmpAnd(0, 32,
true,
false);
9895 case AMDGPU::S_CMP_GT_U32:
9896 case AMDGPU::S_CMPK_GT_U32:
9897 return optimizeCmpAnd(0, 32,
false,
false);
9898 case AMDGPU::S_CMP_GT_I32:
9899 case AMDGPU::S_CMPK_GT_I32:
9900 return optimizeCmpAnd(0, 32,
false,
true);
9901 case AMDGPU::S_CMP_LG_U64:
9902 return optimizeCmpAnd(0, 64,
true,
false);
9927 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9930 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9931 : &AMDGPU::VReg_64_Align2RegClass);
9933 .
addReg(DataReg, 0,
Op.getSubReg())
9938 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.