31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
525 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
533 if (MO1->getAddrSpace() != MO2->getAddrSpace())
536 auto Base1 = MO1->getValue();
537 auto Base2 = MO2->getValue();
538 if (!Base1 || !Base2)
543 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
546 return Base1 == Base2;
550 int64_t Offset1,
bool OffsetIsScalable1,
552 int64_t Offset2,
bool OffsetIsScalable2,
553 unsigned ClusterSize,
554 unsigned NumBytes)
const {
562 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
579 const unsigned LoadSize = NumBytes / ClusterSize;
580 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
581 return NumDWORDs <= 8;
595 int64_t Offset0, int64_t Offset1,
596 unsigned NumLoads)
const {
597 assert(Offset1 > Offset0 &&
598 "Second offset should be larger than first offset!");
603 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
610 const char *Msg =
"illegal VGPR to SGPR copy") {
614 C.diagnose(IllegalCopy);
631 assert((
TII.getSubtarget().hasMAIInsts() &&
632 !
TII.getSubtarget().hasGFX90AInsts()) &&
633 "Expected GFX908 subtarget.");
636 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
637 "Source register of the copy should be either an SGPR or an AGPR.");
640 "Destination register of the copy should be an AGPR.");
649 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
652 if (!Def->modifiesRegister(SrcReg, &RI))
655 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
656 Def->getOperand(0).getReg() != SrcReg)
663 bool SafeToPropagate =
true;
666 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
667 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
668 SafeToPropagate =
false;
670 if (!SafeToPropagate)
682 if (ImpUseSuperReg) {
683 Builder.
addReg(ImpUseSuperReg,
701 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
705 "VGPR used for an intermediate copy should have been reserved.");
720 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
721 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
722 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
729 if (ImpUseSuperReg) {
730 UseBuilder.
addReg(ImpUseSuperReg,
752 int16_t SubIdx = BaseIndices[
Idx];
753 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
754 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
755 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
756 unsigned Opcode = AMDGPU::S_MOV_B32;
759 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
760 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
761 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
765 DestSubReg = RI.getSubReg(DestReg, SubIdx);
766 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
767 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
768 Opcode = AMDGPU::S_MOV_B64;
783 assert(FirstMI && LastMI);
791 LastMI->addRegisterKilled(SrcReg, &RI);
799 unsigned Size = RI.getRegSizeInBits(*RC);
801 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
807 if (((
Size == 16) != (SrcSize == 16))) {
814 if (DestReg == SrcReg) {
820 RC = RI.getPhysRegBaseClass(DestReg);
821 Size = RI.getRegSizeInBits(*RC);
822 SrcRC = RI.getPhysRegBaseClass(SrcReg);
823 SrcSize = RI.getRegSizeInBits(*SrcRC);
827 if (RC == &AMDGPU::VGPR_32RegClass) {
829 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
830 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
831 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
832 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
838 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
839 RC == &AMDGPU::SReg_32RegClass) {
840 if (SrcReg == AMDGPU::SCC) {
847 if (DestReg == AMDGPU::VCC_LO) {
848 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
862 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
872 if (RC == &AMDGPU::SReg_64RegClass) {
873 if (SrcReg == AMDGPU::SCC) {
880 if (DestReg == AMDGPU::VCC) {
881 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
895 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
905 if (DestReg == AMDGPU::SCC) {
908 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
926 if (RC == &AMDGPU::AGPR_32RegClass) {
927 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
928 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
943 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
950 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
951 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
953 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
954 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
955 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
956 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
973 if (IsAGPRDst || IsAGPRSrc) {
974 if (!DstLow || !SrcLow) {
976 "Cannot use hi16 subreg with an AGPR!");
989 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
990 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1003 if (!DstLow || !SrcLow) {
1005 "Cannot use hi16 subreg on VI!");
1056 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1062 unsigned EltSize = 4;
1063 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1066 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1069 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1071 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1073 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1079 Opcode = AMDGPU::V_MOV_B64_e32;
1082 Opcode = AMDGPU::V_PK_MOV_B32;
1092 std::unique_ptr<RegScavenger> RS;
1093 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1094 RS = std::make_unique<RegScavenger>();
1100 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1101 const bool CanKillSuperReg = KillSrc && !Overlap;
1106 SubIdx = SubIndices[
Idx];
1108 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1109 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1110 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1111 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1113 bool IsFirstSubreg =
Idx == 0;
1114 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1116 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1120 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1121 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1168 int64_t
Value)
const {
1171 if (RegClass == &AMDGPU::SReg_32RegClass ||
1172 RegClass == &AMDGPU::SGPR_32RegClass ||
1173 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1180 if (RegClass == &AMDGPU::SReg_64RegClass ||
1181 RegClass == &AMDGPU::SGPR_64RegClass ||
1182 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1188 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1199 unsigned EltSize = 4;
1200 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1202 if (RI.getRegSizeInBits(*RegClass) > 32) {
1203 Opcode = AMDGPU::S_MOV_B64;
1206 Opcode = AMDGPU::S_MOV_B32;
1213 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1216 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1217 Builder.
addImm(IdxValue);
1223 return &AMDGPU::VGPR_32RegClass;
1235 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1236 "Not a VGPR32 reg");
1238 if (
Cond.size() == 1) {
1239 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1248 }
else if (
Cond.size() == 2) {
1250 switch (
Cond[0].getImm()) {
1251 case SIInstrInfo::SCC_TRUE: {
1252 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1254 : AMDGPU::S_CSELECT_B64), SReg)
1265 case SIInstrInfo::SCC_FALSE: {
1266 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1268 : AMDGPU::S_CSELECT_B64), SReg)
1279 case SIInstrInfo::VCCNZ: {
1282 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1293 case SIInstrInfo::VCCZ: {
1296 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1307 case SIInstrInfo::EXECNZ: {
1308 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1311 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1314 : AMDGPU::S_CSELECT_B64), SReg)
1325 case SIInstrInfo::EXECZ: {
1326 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1329 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1332 : AMDGPU::S_CSELECT_B64), SReg)
1381 return AMDGPU::COPY;
1382 if (RI.getRegSizeInBits(*DstRC) == 16) {
1385 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 if (RI.getRegSizeInBits(*DstRC) == 32)
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1390 return AMDGPU::S_MOV_B64;
1391 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1393 return AMDGPU::COPY;
1398 bool IsIndirectSrc)
const {
1399 if (IsIndirectSrc) {
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1422 if (VecSize <= 1024)
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1450 if (VecSize <= 1024)
1451 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1479 if (VecSize <= 1024)
1480 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1508 if (VecSize <= 1024)
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1523 if (VecSize <= 1024)
1524 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1531 bool IsSGPR)
const {
1543 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1550 return AMDGPU::SI_SPILL_S32_SAVE;
1552 return AMDGPU::SI_SPILL_S64_SAVE;
1554 return AMDGPU::SI_SPILL_S96_SAVE;
1556 return AMDGPU::SI_SPILL_S128_SAVE;
1558 return AMDGPU::SI_SPILL_S160_SAVE;
1560 return AMDGPU::SI_SPILL_S192_SAVE;
1562 return AMDGPU::SI_SPILL_S224_SAVE;
1564 return AMDGPU::SI_SPILL_S256_SAVE;
1566 return AMDGPU::SI_SPILL_S288_SAVE;
1568 return AMDGPU::SI_SPILL_S320_SAVE;
1570 return AMDGPU::SI_SPILL_S352_SAVE;
1572 return AMDGPU::SI_SPILL_S384_SAVE;
1574 return AMDGPU::SI_SPILL_S512_SAVE;
1576 return AMDGPU::SI_SPILL_S1024_SAVE;
1585 return AMDGPU::SI_SPILL_V32_SAVE;
1587 return AMDGPU::SI_SPILL_V64_SAVE;
1589 return AMDGPU::SI_SPILL_V96_SAVE;
1591 return AMDGPU::SI_SPILL_V128_SAVE;
1593 return AMDGPU::SI_SPILL_V160_SAVE;
1595 return AMDGPU::SI_SPILL_V192_SAVE;
1597 return AMDGPU::SI_SPILL_V224_SAVE;
1599 return AMDGPU::SI_SPILL_V256_SAVE;
1601 return AMDGPU::SI_SPILL_V288_SAVE;
1603 return AMDGPU::SI_SPILL_V320_SAVE;
1605 return AMDGPU::SI_SPILL_V352_SAVE;
1607 return AMDGPU::SI_SPILL_V384_SAVE;
1609 return AMDGPU::SI_SPILL_V512_SAVE;
1611 return AMDGPU::SI_SPILL_V1024_SAVE;
1620 return AMDGPU::SI_SPILL_A32_SAVE;
1622 return AMDGPU::SI_SPILL_A64_SAVE;
1624 return AMDGPU::SI_SPILL_A96_SAVE;
1626 return AMDGPU::SI_SPILL_A128_SAVE;
1628 return AMDGPU::SI_SPILL_A160_SAVE;
1630 return AMDGPU::SI_SPILL_A192_SAVE;
1632 return AMDGPU::SI_SPILL_A224_SAVE;
1634 return AMDGPU::SI_SPILL_A256_SAVE;
1636 return AMDGPU::SI_SPILL_A288_SAVE;
1638 return AMDGPU::SI_SPILL_A320_SAVE;
1640 return AMDGPU::SI_SPILL_A352_SAVE;
1642 return AMDGPU::SI_SPILL_A384_SAVE;
1644 return AMDGPU::SI_SPILL_A512_SAVE;
1646 return AMDGPU::SI_SPILL_A1024_SAVE;
1655 return AMDGPU::SI_SPILL_AV32_SAVE;
1657 return AMDGPU::SI_SPILL_AV64_SAVE;
1659 return AMDGPU::SI_SPILL_AV96_SAVE;
1661 return AMDGPU::SI_SPILL_AV128_SAVE;
1663 return AMDGPU::SI_SPILL_AV160_SAVE;
1665 return AMDGPU::SI_SPILL_AV192_SAVE;
1667 return AMDGPU::SI_SPILL_AV224_SAVE;
1669 return AMDGPU::SI_SPILL_AV256_SAVE;
1671 return AMDGPU::SI_SPILL_AV288_SAVE;
1673 return AMDGPU::SI_SPILL_AV320_SAVE;
1675 return AMDGPU::SI_SPILL_AV352_SAVE;
1677 return AMDGPU::SI_SPILL_AV384_SAVE;
1679 return AMDGPU::SI_SPILL_AV512_SAVE;
1681 return AMDGPU::SI_SPILL_AV1024_SAVE;
1688 bool IsVectorSuperClass) {
1693 if (IsVectorSuperClass)
1694 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1696 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1704 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1710 if (IsVectorSuperClass)
1730 FrameInfo.getObjectAlign(FrameIndex));
1731 unsigned SpillSize =
TRI->getSpillSize(*RC);
1736 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1737 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1738 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1746 if (SrcReg.
isVirtual() && SpillSize == 4) {
1747 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1762 SpillSize, RI, *MFI);
1776 return AMDGPU::SI_SPILL_S32_RESTORE;
1778 return AMDGPU::SI_SPILL_S64_RESTORE;
1780 return AMDGPU::SI_SPILL_S96_RESTORE;
1782 return AMDGPU::SI_SPILL_S128_RESTORE;
1784 return AMDGPU::SI_SPILL_S160_RESTORE;
1786 return AMDGPU::SI_SPILL_S192_RESTORE;
1788 return AMDGPU::SI_SPILL_S224_RESTORE;
1790 return AMDGPU::SI_SPILL_S256_RESTORE;
1792 return AMDGPU::SI_SPILL_S288_RESTORE;
1794 return AMDGPU::SI_SPILL_S320_RESTORE;
1796 return AMDGPU::SI_SPILL_S352_RESTORE;
1798 return AMDGPU::SI_SPILL_S384_RESTORE;
1800 return AMDGPU::SI_SPILL_S512_RESTORE;
1802 return AMDGPU::SI_SPILL_S1024_RESTORE;
1811 return AMDGPU::SI_SPILL_V32_RESTORE;
1813 return AMDGPU::SI_SPILL_V64_RESTORE;
1815 return AMDGPU::SI_SPILL_V96_RESTORE;
1817 return AMDGPU::SI_SPILL_V128_RESTORE;
1819 return AMDGPU::SI_SPILL_V160_RESTORE;
1821 return AMDGPU::SI_SPILL_V192_RESTORE;
1823 return AMDGPU::SI_SPILL_V224_RESTORE;
1825 return AMDGPU::SI_SPILL_V256_RESTORE;
1827 return AMDGPU::SI_SPILL_V288_RESTORE;
1829 return AMDGPU::SI_SPILL_V320_RESTORE;
1831 return AMDGPU::SI_SPILL_V352_RESTORE;
1833 return AMDGPU::SI_SPILL_V384_RESTORE;
1835 return AMDGPU::SI_SPILL_V512_RESTORE;
1837 return AMDGPU::SI_SPILL_V1024_RESTORE;
1846 return AMDGPU::SI_SPILL_A32_RESTORE;
1848 return AMDGPU::SI_SPILL_A64_RESTORE;
1850 return AMDGPU::SI_SPILL_A96_RESTORE;
1852 return AMDGPU::SI_SPILL_A128_RESTORE;
1854 return AMDGPU::SI_SPILL_A160_RESTORE;
1856 return AMDGPU::SI_SPILL_A192_RESTORE;
1858 return AMDGPU::SI_SPILL_A224_RESTORE;
1860 return AMDGPU::SI_SPILL_A256_RESTORE;
1862 return AMDGPU::SI_SPILL_A288_RESTORE;
1864 return AMDGPU::SI_SPILL_A320_RESTORE;
1866 return AMDGPU::SI_SPILL_A352_RESTORE;
1868 return AMDGPU::SI_SPILL_A384_RESTORE;
1870 return AMDGPU::SI_SPILL_A512_RESTORE;
1872 return AMDGPU::SI_SPILL_A1024_RESTORE;
1881 return AMDGPU::SI_SPILL_AV32_RESTORE;
1883 return AMDGPU::SI_SPILL_AV64_RESTORE;
1885 return AMDGPU::SI_SPILL_AV96_RESTORE;
1887 return AMDGPU::SI_SPILL_AV128_RESTORE;
1889 return AMDGPU::SI_SPILL_AV160_RESTORE;
1891 return AMDGPU::SI_SPILL_AV192_RESTORE;
1893 return AMDGPU::SI_SPILL_AV224_RESTORE;
1895 return AMDGPU::SI_SPILL_AV256_RESTORE;
1897 return AMDGPU::SI_SPILL_AV288_RESTORE;
1899 return AMDGPU::SI_SPILL_AV320_RESTORE;
1901 return AMDGPU::SI_SPILL_AV352_RESTORE;
1903 return AMDGPU::SI_SPILL_AV384_RESTORE;
1905 return AMDGPU::SI_SPILL_AV512_RESTORE;
1907 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1914 bool IsVectorSuperClass) {
1919 if (IsVectorSuperClass)
1920 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1922 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1929 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1935 if (IsVectorSuperClass)
1952 unsigned SpillSize =
TRI->getSpillSize(*RC);
1959 FrameInfo.getObjectAlign(FrameIndex));
1963 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1964 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1965 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1970 if (DestReg.
isVirtual() && SpillSize == 4) {
1972 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1986 SpillSize, RI, *MFI);
2001 unsigned Quantity)
const {
2003 while (Quantity > 0) {
2004 unsigned Arg = std::min(Quantity, 8u);
2018 if (HasNoTerminator) {
2019 if (
Info->returnsVoid()) {
2033 constexpr unsigned DoorbellIDMask = 0x3ff;
2034 constexpr unsigned ECQueueWaveAbort = 0x400;
2052 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2056 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2059 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2060 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2064 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2065 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2066 .
addUse(DoorbellRegMasked)
2067 .
addImm(ECQueueWaveAbort);
2068 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2069 .
addUse(SetWaveAbortBit);
2072 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2087 switch (
MI.getOpcode()) {
2089 if (
MI.isMetaInstruction())
2094 return MI.getOperand(0).getImm() + 1;
2104 switch (
MI.getOpcode()) {
2106 case AMDGPU::S_MOV_B64_term:
2109 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2112 case AMDGPU::S_MOV_B32_term:
2115 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2118 case AMDGPU::S_XOR_B64_term:
2121 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2124 case AMDGPU::S_XOR_B32_term:
2127 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2129 case AMDGPU::S_OR_B64_term:
2132 MI.setDesc(
get(AMDGPU::S_OR_B64));
2134 case AMDGPU::S_OR_B32_term:
2137 MI.setDesc(
get(AMDGPU::S_OR_B32));
2140 case AMDGPU::S_ANDN2_B64_term:
2143 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2146 case AMDGPU::S_ANDN2_B32_term:
2149 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2152 case AMDGPU::S_AND_B64_term:
2155 MI.setDesc(
get(AMDGPU::S_AND_B64));
2158 case AMDGPU::S_AND_B32_term:
2161 MI.setDesc(
get(AMDGPU::S_AND_B32));
2164 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2167 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2170 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2173 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2176 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2177 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2180 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2181 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2184 case AMDGPU::V_MOV_B64_PSEUDO: {
2186 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2187 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2193 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2198 if (
SrcOp.isImm()) {
2200 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2201 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2244 MI.eraseFromParent();
2247 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2251 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2256 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2261 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2262 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2264 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2265 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2272 MI.eraseFromParent();
2275 case AMDGPU::V_SET_INACTIVE_B32: {
2276 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2277 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2281 .
add(
MI.getOperand(1));
2285 .
add(
MI.getOperand(2));
2288 MI.eraseFromParent();
2291 case AMDGPU::V_SET_INACTIVE_B64: {
2292 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2293 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2295 MI.getOperand(0).getReg())
2296 .
add(
MI.getOperand(1));
2301 MI.getOperand(0).getReg())
2302 .
add(
MI.getOperand(2));
2306 MI.eraseFromParent();
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2332 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2333 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2334 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2335 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2336 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2337 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2342 Opc = AMDGPU::V_MOVRELD_B32_e32;
2344 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2345 : AMDGPU::S_MOVRELD_B32;
2350 bool IsUndef =
MI.getOperand(1).isUndef();
2351 unsigned SubReg =
MI.getOperand(3).getImm();
2352 assert(VecReg ==
MI.getOperand(1).getReg());
2357 .
add(
MI.getOperand(2))
2361 const int ImpDefIdx =
2363 const int ImpUseIdx = ImpDefIdx + 1;
2365 MI.eraseFromParent();
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2374 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2375 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2376 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2377 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2378 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2379 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2382 bool IsUndef =
MI.getOperand(1).isUndef();
2391 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2395 .
add(
MI.getOperand(2))
2400 const int ImpDefIdx =
2402 const int ImpUseIdx = ImpDefIdx + 1;
2409 MI.eraseFromParent();
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2418 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2419 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2420 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2421 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2422 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2423 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2427 bool IsUndef =
MI.getOperand(1).isUndef();
2445 MI.eraseFromParent();
2448 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2451 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2452 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2475 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2482 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2492 MI.eraseFromParent();
2495 case AMDGPU::ENTER_STRICT_WWM: {
2499 : AMDGPU::S_OR_SAVEEXEC_B64));
2502 case AMDGPU::ENTER_STRICT_WQM: {
2505 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2506 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2507 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2511 MI.eraseFromParent();
2514 case AMDGPU::EXIT_STRICT_WWM:
2515 case AMDGPU::EXIT_STRICT_WQM: {
2518 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2521 case AMDGPU::SI_RETURN: {
2535 MI.eraseFromParent();
2539 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2540 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2541 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2544 case AMDGPU::S_GETPC_B64_pseudo:
2545 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2548 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2570 case AMDGPU::S_LOAD_DWORDX16_IMM:
2571 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2584 for (
auto &CandMO :
I->operands()) {
2585 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2593 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2601 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2603 unsigned NewOpcode = -1;
2604 if (SubregSize == 256)
2605 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2606 else if (SubregSize == 128)
2607 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2614 MRI.setRegClass(DestReg, NewRC);
2617 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2622 MI->getOperand(0).setReg(DestReg);
2623 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2627 OffsetMO->
setImm(FinalOffset);
2633 MI->setMemRefs(*MF, NewMMOs);
2646std::pair<MachineInstr*, MachineInstr*>
2648 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2653 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2654 return std::pair(&
MI,
nullptr);
2665 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2667 if (Dst.isPhysical()) {
2668 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2671 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2675 for (
unsigned I = 1;
I <= 2; ++
I) {
2678 if (
SrcOp.isImm()) {
2680 Imm.ashrInPlace(Part * 32);
2681 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2685 if (Src.isPhysical())
2686 MovDPP.addReg(RI.getSubReg(Src, Sub));
2693 MovDPP.addImm(MO.getImm());
2695 Split[Part] = MovDPP;
2699 if (Dst.isVirtual())
2706 MI.eraseFromParent();
2707 return std::pair(Split[0], Split[1]);
2710std::optional<DestSourcePair>
2712 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2715 return std::nullopt;
2720 unsigned Src0OpName,
2722 unsigned Src1OpName)
const {
2729 "All commutable instructions have both src0 and src1 modifiers");
2731 int Src0ModsVal = Src0Mods->
getImm();
2732 int Src1ModsVal = Src1Mods->
getImm();
2734 Src1Mods->
setImm(Src0ModsVal);
2735 Src0Mods->
setImm(Src1ModsVal);
2744 bool IsKill = RegOp.
isKill();
2746 bool IsUndef = RegOp.
isUndef();
2747 bool IsDebug = RegOp.
isDebug();
2749 if (NonRegOp.
isImm())
2751 else if (NonRegOp.
isFI())
2770 unsigned Src1Idx)
const {
2771 assert(!NewMI &&
"this should never be used");
2773 unsigned Opc =
MI.getOpcode();
2775 if (CommutedOpcode == -1)
2778 if (Src0Idx > Src1Idx)
2782 static_cast<int>(Src0Idx) &&
2784 static_cast<int>(Src1Idx) &&
2785 "inconsistency with findCommutedOpIndices");
2812 Src1, AMDGPU::OpName::src1_modifiers);
2824 unsigned &SrcOpIdx0,
2825 unsigned &SrcOpIdx1)
const {
2830 unsigned &SrcOpIdx0,
2831 unsigned &SrcOpIdx1)
const {
2832 if (!
Desc.isCommutable())
2835 unsigned Opc =
Desc.getOpcode();
2844 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2848 int64_t BrOffset)
const {
2851 assert(BranchOp != AMDGPU::S_SETPC_B64);
2865 return MI.getOperand(0).getMBB();
2870 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2871 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2872 MI.getOpcode() == AMDGPU::SI_LOOP)
2883 assert(RS &&
"RegScavenger required for long branching");
2885 "new block should be inserted for expanding unconditional branch");
2888 "restore block should be inserted for restoring clobbered registers");
2896 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2906 MCCtx.createTempSymbol(
"post_getpc",
true);
2910 MCCtx.createTempSymbol(
"offset_lo",
true);
2912 MCCtx.createTempSymbol(
"offset_hi",
true);
2915 .
addReg(PCReg, 0, AMDGPU::sub0)
2919 .
addReg(PCReg, 0, AMDGPU::sub1)
2961 if (LongBranchReservedReg) {
2963 Scav = LongBranchReservedReg;
2972 MRI.replaceRegWith(PCReg, Scav);
2973 MRI.clearVirtRegs();
2979 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2980 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2981 MRI.clearVirtRegs();
2996unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2998 case SIInstrInfo::SCC_TRUE:
2999 return AMDGPU::S_CBRANCH_SCC1;
3000 case SIInstrInfo::SCC_FALSE:
3001 return AMDGPU::S_CBRANCH_SCC0;
3002 case SIInstrInfo::VCCNZ:
3003 return AMDGPU::S_CBRANCH_VCCNZ;
3004 case SIInstrInfo::VCCZ:
3005 return AMDGPU::S_CBRANCH_VCCZ;
3006 case SIInstrInfo::EXECNZ:
3007 return AMDGPU::S_CBRANCH_EXECNZ;
3008 case SIInstrInfo::EXECZ:
3009 return AMDGPU::S_CBRANCH_EXECZ;
3015SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3017 case AMDGPU::S_CBRANCH_SCC0:
3019 case AMDGPU::S_CBRANCH_SCC1:
3021 case AMDGPU::S_CBRANCH_VCCNZ:
3023 case AMDGPU::S_CBRANCH_VCCZ:
3025 case AMDGPU::S_CBRANCH_EXECNZ:
3027 case AMDGPU::S_CBRANCH_EXECZ:
3039 bool AllowModify)
const {
3040 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3042 TBB =
I->getOperand(0).getMBB();
3048 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3049 CondBB =
I->getOperand(1).getMBB();
3050 Cond.push_back(
I->getOperand(0));
3052 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3053 if (Pred == INVALID_BR)
3056 CondBB =
I->getOperand(0).getMBB();
3058 Cond.push_back(
I->getOperand(1));
3068 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3070 FBB =
I->getOperand(0).getMBB();
3080 bool AllowModify)
const {
3088 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3089 switch (
I->getOpcode()) {
3090 case AMDGPU::S_MOV_B64_term:
3091 case AMDGPU::S_XOR_B64_term:
3092 case AMDGPU::S_OR_B64_term:
3093 case AMDGPU::S_ANDN2_B64_term:
3094 case AMDGPU::S_AND_B64_term:
3095 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3096 case AMDGPU::S_MOV_B32_term:
3097 case AMDGPU::S_XOR_B32_term:
3098 case AMDGPU::S_OR_B32_term:
3099 case AMDGPU::S_ANDN2_B32_term:
3100 case AMDGPU::S_AND_B32_term:
3101 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3104 case AMDGPU::SI_ELSE:
3105 case AMDGPU::SI_KILL_I1_TERMINATOR:
3106 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3123 int *BytesRemoved)
const {
3125 unsigned RemovedSize = 0;
3128 if (
MI.isBranch() ||
MI.isReturn()) {
3130 MI.eraseFromParent();
3136 *BytesRemoved = RemovedSize;
3153 int *BytesAdded)
const {
3154 if (!FBB &&
Cond.empty()) {
3162 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3172 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3209 if (
Cond.size() != 2) {
3224 Register FalseReg,
int &CondCycles,
3225 int &TrueCycles,
int &FalseCycles)
const {
3226 switch (
Cond[0].getImm()) {
3231 if (
MRI.getRegClass(FalseReg) != RC)
3235 CondCycles = TrueCycles = FalseCycles = NumInsts;
3238 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3246 if (
MRI.getRegClass(FalseReg) != RC)
3252 if (NumInsts % 2 == 0)
3255 CondCycles = TrueCycles = FalseCycles = NumInsts;
3267 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3268 if (Pred == VCCZ || Pred == SCC_FALSE) {
3269 Pred =
static_cast<BranchPredicate
>(-Pred);
3275 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3277 if (DstSize == 32) {
3279 if (Pred == SCC_TRUE) {
3294 if (DstSize == 64 && Pred == SCC_TRUE) {
3304 static const int16_t Sub0_15[] = {
3305 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3306 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3307 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3308 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3311 static const int16_t Sub0_15_64[] = {
3312 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3313 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3314 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3315 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3318 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3320 const int16_t *SubIndices = Sub0_15;
3321 int NElts = DstSize / 32;
3325 if (Pred == SCC_TRUE) {
3327 SelOp = AMDGPU::S_CSELECT_B32;
3328 EltRC = &AMDGPU::SGPR_32RegClass;
3330 SelOp = AMDGPU::S_CSELECT_B64;
3331 EltRC = &AMDGPU::SGPR_64RegClass;
3332 SubIndices = Sub0_15_64;
3338 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3343 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3344 Register DstElt =
MRI.createVirtualRegister(EltRC);
3347 unsigned SubIdx = SubIndices[
Idx];
3350 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3353 .
addReg(FalseReg, 0, SubIdx)
3354 .
addReg(TrueReg, 0, SubIdx);
3358 .
addReg(TrueReg, 0, SubIdx)
3359 .
addReg(FalseReg, 0, SubIdx);
3371 switch (
MI.getOpcode()) {
3372 case AMDGPU::V_MOV_B16_t16_e32:
3373 case AMDGPU::V_MOV_B16_t16_e64:
3374 case AMDGPU::V_MOV_B32_e32:
3375 case AMDGPU::V_MOV_B32_e64:
3376 case AMDGPU::V_MOV_B64_PSEUDO:
3377 case AMDGPU::V_MOV_B64_e32:
3378 case AMDGPU::V_MOV_B64_e64:
3379 case AMDGPU::S_MOV_B32:
3380 case AMDGPU::S_MOV_B64:
3381 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3383 case AMDGPU::WWM_COPY:
3384 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3385 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3386 case AMDGPU::V_ACCVGPR_MOV_B32:
3394 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3395 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3396 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3399 unsigned Opc =
MI.getOpcode();
3403 MI.removeOperand(
Idx);
3409 if (!
MRI->hasOneNonDBGUse(Reg))
3412 switch (
DefMI.getOpcode()) {
3415 case AMDGPU::V_MOV_B64_e32:
3416 case AMDGPU::S_MOV_B64:
3417 case AMDGPU::V_MOV_B64_PSEUDO:
3418 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3419 case AMDGPU::V_MOV_B32_e32:
3420 case AMDGPU::S_MOV_B32:
3421 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3428 if (!ImmOp->
isImm())
3431 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3432 int64_t Imm = ImmOp->
getImm();
3433 switch (UseOp.getSubReg()) {
3444 case AMDGPU::sub1_lo16:
3446 case AMDGPU::sub1_hi16:
3451 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3453 unsigned Opc =
UseMI.getOpcode();
3454 if (Opc == AMDGPU::COPY) {
3455 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3459 bool Is16Bit = OpSize == 2;
3460 bool Is64Bit = OpSize == 8;
3462 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3463 : AMDGPU::V_MOV_B32_e32
3464 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3465 : AMDGPU::S_MOV_B32;
3466 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3471 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3478 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3481 UseMI.getOperand(0).setSubReg(0);
3484 UseMI.getOperand(0).setReg(DstReg);
3494 UseMI.setDesc(NewMCID);
3495 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3500 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3501 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3502 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3503 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3504 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3519 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3520 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3522 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3523 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3524 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3532 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3533 if (!RegSrc->
isReg())
3551 if (Def && Def->isMoveImmediate() &&
3556 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3558 : AMDGPU::V_FMAMK_F16)
3559 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3566 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3569 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3575 unsigned SrcSubReg = RegSrc->
getSubReg();
3580 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3581 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3582 Opc == AMDGPU::V_FMAC_F16_e64)
3583 UseMI.untieRegOperand(
3586 Src1->ChangeToImmediate(Imm);
3591 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3593 DefMI.eraseFromParent();
3603 bool Src0Inlined =
false;
3604 if (Src0->
isReg()) {
3609 if (Def && Def->isMoveImmediate() &&
3621 if (Src1->
isReg() && !Src0Inlined) {
3624 if (Def && Def->isMoveImmediate() &&
3635 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3637 : AMDGPU::V_FMAAK_F16)
3638 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3645 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3651 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3652 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3653 Opc == AMDGPU::V_FMAC_F16_e64)
3654 UseMI.untieRegOperand(
3668 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3670 DefMI.eraseFromParent();
3682 if (BaseOps1.
size() != BaseOps2.
size())
3684 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3685 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3693 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3694 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3695 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3697 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3700bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3703 int64_t Offset0, Offset1;
3705 bool Offset0IsScalable, Offset1IsScalable;
3727 "MIa must load from or modify a memory location");
3729 "MIb must load from or modify a memory location");
3748 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3755 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3765 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3779 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3790 if (Reg.isPhysical())
3792 auto *Def =
MRI.getUniqueVRegDef(Reg);
3794 Imm = Def->getOperand(1).getImm();
3814 unsigned NumOps =
MI.getNumOperands();
3815 for (
unsigned I = 1;
I < NumOps; ++
I) {
3817 if (
Op.isReg() &&
Op.isKill())
3827 unsigned Opc =
MI.getOpcode();
3831 if (NewMFMAOpc != -1) {
3834 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3835 MIB.
add(
MI.getOperand(
I));
3841 if (Def.isEarlyClobber() && Def.isReg() &&
3846 auto UpdateDefIndex = [&](
LiveRange &LR) {
3847 auto S = LR.
find(OldIndex);
3848 if (S != LR.end() && S->start == OldIndex) {
3849 assert(S->valno && S->valno->def == OldIndex);
3850 S->start = NewIndex;
3851 S->valno->def = NewIndex;
3855 for (
auto &SR : LI.subranges())
3866 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3876 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3877 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3881 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3882 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3883 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3884 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3885 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3886 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3887 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3888 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3889 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3890 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3891 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3892 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3893 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3894 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3895 bool Src0Literal =
false;
3900 case AMDGPU::V_MAC_F16_e64:
3901 case AMDGPU::V_FMAC_F16_e64:
3902 case AMDGPU::V_FMAC_F16_t16_e64:
3903 case AMDGPU::V_MAC_F32_e64:
3904 case AMDGPU::V_MAC_LEGACY_F32_e64:
3905 case AMDGPU::V_FMAC_F32_e64:
3906 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3907 case AMDGPU::V_FMAC_F64_e64:
3909 case AMDGPU::V_MAC_F16_e32:
3910 case AMDGPU::V_FMAC_F16_e32:
3911 case AMDGPU::V_MAC_F32_e32:
3912 case AMDGPU::V_MAC_LEGACY_F32_e32:
3913 case AMDGPU::V_FMAC_F32_e32:
3914 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3915 case AMDGPU::V_FMAC_F64_e32: {
3917 AMDGPU::OpName::src0);
3944 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3950 const auto killDef = [&]() ->
void {
3954 if (!
MRI.hasOneNonDBGUse(DefReg))
3968 : AMDGPU::V_FMAAK_F16)
3969 : AMDGPU::V_FMAAK_F32)
3970 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3987 : AMDGPU::V_FMAMK_F16)
3988 : AMDGPU::V_FMAMK_F32)
3989 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4035 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4036 : IsF64 ? AMDGPU::V_FMA_F64_e64
4038 ? AMDGPU::V_FMA_LEGACY_F32_e64
4039 : AMDGPU::V_FMA_F32_e64
4040 : IsF16 ? AMDGPU::V_MAD_F16_e64
4041 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4042 : AMDGPU::V_MAD_F32_e64;
4058 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4069 switch (
MI.getOpcode()) {
4070 case AMDGPU::S_SET_GPR_IDX_ON:
4071 case AMDGPU::S_SET_GPR_IDX_MODE:
4072 case AMDGPU::S_SET_GPR_IDX_OFF:
4090 if (
MI.isTerminator() ||
MI.isPosition())
4094 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4097 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4103 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4104 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4105 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4106 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4111 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4122 unsigned Opcode =
MI.getOpcode();
4137 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4138 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4139 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4142 if (
MI.isCall() ||
MI.isInlineAsm())
4158 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4159 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4160 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4161 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4169 if (
MI.isMetaInstruction())
4173 if (
MI.isCopyLike()) {
4178 return MI.readsRegister(AMDGPU::EXEC, &RI);
4189 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4193 switch (Imm.getBitWidth()) {
4213 APInt IntImm = Imm.bitcastToAPInt();
4232 uint8_t OperandType)
const {
4233 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4242 int64_t Imm = MO.
getImm();
4243 switch (OperandType) {
4256 int32_t Trunc =
static_cast<int32_t
>(Imm);
4296 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4301 int16_t Trunc =
static_cast<int16_t
>(Imm);
4312 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4313 int16_t Trunc =
static_cast<int16_t
>(Imm);
4373 AMDGPU::OpName::src2))
4389 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4409 return Mods && Mods->
getImm();
4422 switch (
MI.getOpcode()) {
4423 default:
return false;
4425 case AMDGPU::V_ADDC_U32_e64:
4426 case AMDGPU::V_SUBB_U32_e64:
4427 case AMDGPU::V_SUBBREV_U32_e64: {
4435 case AMDGPU::V_MAC_F16_e64:
4436 case AMDGPU::V_MAC_F32_e64:
4437 case AMDGPU::V_MAC_LEGACY_F32_e64:
4438 case AMDGPU::V_FMAC_F16_e64:
4439 case AMDGPU::V_FMAC_F16_t16_e64:
4440 case AMDGPU::V_FMAC_F32_e64:
4441 case AMDGPU::V_FMAC_F64_e64:
4442 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4448 case AMDGPU::V_CNDMASK_B32_e64:
4480 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4489 unsigned Op32)
const {
4503 Inst32.
add(
MI.getOperand(
I));
4507 int Idx =
MI.getNumExplicitDefs();
4509 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4548 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4553 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4554 MO.
getReg() == AMDGPU::VCC_LO;
4556 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4557 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4566 switch (MO.getReg()) {
4568 case AMDGPU::VCC_LO:
4569 case AMDGPU::VCC_HI:
4571 case AMDGPU::FLAT_SCR:
4584 switch (
MI.getOpcode()) {
4585 case AMDGPU::V_READLANE_B32:
4586 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4587 case AMDGPU::V_WRITELANE_B32:
4588 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4595 if (
MI.isPreISelOpcode() ||
4596 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4611 if (
SubReg.getReg().isPhysical())
4614 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4621 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4631 if (Src0Idx == -1) {
4641 if (!
Desc.isVariadic() &&
4642 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4643 ErrInfo =
"Instruction has wrong number of operands.";
4647 if (
MI.isInlineAsm()) {
4660 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4661 ErrInfo =
"inlineasm operand has incorrect register class.";
4669 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4670 ErrInfo =
"missing memory operand from image instruction.";
4675 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4678 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4679 "all fp values to integers.";
4683 int RegClass =
Desc.operands()[i].RegClass;
4685 switch (
Desc.operands()[i].OperandType) {
4687 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4688 ErrInfo =
"Illegal immediate value for operand.";
4709 ErrInfo =
"Illegal immediate value for operand.";
4716 ErrInfo =
"Expected inline constant for operand.";
4725 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4726 ErrInfo =
"Expected immediate, but got non-immediate";
4748 RI.getSubRegisterClass(RC, MO.
getSubReg());
4756 ErrInfo =
"Subtarget requires even aligned vector registers";
4761 if (RegClass != -1) {
4762 if (Reg.isVirtual())
4767 ErrInfo =
"Operand has incorrect register class.";
4776 ErrInfo =
"SDWA is not supported on this target";
4782 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4790 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4797 "Only reg allowed as operands in SDWA instructions on GFX9+";
4806 if (OMod !=
nullptr &&
4808 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4813 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4814 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4815 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4816 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4819 unsigned Mods = Src0ModsMO->
getImm();
4822 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4828 if (
isVOPC(BasicOpcode)) {
4832 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4833 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4839 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4840 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4846 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4847 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4854 if (DstUnused && DstUnused->isImm() &&
4857 if (!Dst.isReg() || !Dst.isTied()) {
4858 ErrInfo =
"Dst register should have tied register";
4863 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4866 "Dst register should be tied to implicit use of preserved register";
4870 ErrInfo =
"Dst register should use same physical register as preserved";
4902 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4903 if (RegCount > DstSize) {
4904 ErrInfo =
"Image instruction returns too many registers for dst "
4913 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4914 unsigned ConstantBusCount = 0;
4915 bool UsesLiteral =
false;
4922 LiteralVal = &
MI.getOperand(ImmIdx);
4931 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4942 }
else if (!MO.
isFI()) {
4949 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4959 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4960 return !RI.regsOverlap(SGPRUsed, SGPR);
4970 Opcode != AMDGPU::V_WRITELANE_B32) {
4971 ErrInfo =
"VOP* instruction violates constant bus restriction";
4976 ErrInfo =
"VOP3 instruction uses literal";
4983 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4984 unsigned SGPRCount = 0;
4987 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4995 if (MO.
getReg() != SGPRUsed)
5001 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5008 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5009 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5016 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5026 ErrInfo =
"ABS not allowed in VOP3B instructions";
5039 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5046 if (
Desc.isBranch()) {
5048 ErrInfo =
"invalid branch target for SOPK instruction";
5054 if (!isUInt<16>(Imm)) {
5055 ErrInfo =
"invalid immediate for SOPK instruction";
5059 if (!isInt<16>(Imm)) {
5060 ErrInfo =
"invalid immediate for SOPK instruction";
5067 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5068 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5069 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5070 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5071 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5072 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5074 const unsigned StaticNumOps =
5075 Desc.getNumOperands() +
Desc.implicit_uses().size();
5076 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5081 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5082 ErrInfo =
"missing implicit register operands";
5088 if (!Dst->isUse()) {
5089 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5094 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5095 UseOpIdx != StaticNumOps + 1) {
5096 ErrInfo =
"movrel implicit operands should be tied";
5103 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5105 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5106 ErrInfo =
"src0 should be subreg of implicit vector use";
5114 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5115 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5121 if (
MI.mayStore() &&
5126 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5127 ErrInfo =
"scalar stores must use m0 as offset register";
5135 if (
Offset->getImm() != 0) {
5136 ErrInfo =
"subtarget does not support offsets in flat instructions";
5143 if (GDSOp && GDSOp->
getImm() != 0) {
5144 ErrInfo =
"GDS is not supported on this subtarget";
5153 AMDGPU::OpName::vaddr0);
5155 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5164 ErrInfo =
"dim is out of range";
5171 IsA16 = R128A16->
getImm() != 0;
5172 }
else if (ST.
hasA16()) {
5174 IsA16 = A16->
getImm() != 0;
5177 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5179 unsigned AddrWords =
5182 unsigned VAddrWords;
5184 VAddrWords = RsrcIdx - VAddr0Idx;
5187 unsigned LastVAddrIdx = RsrcIdx - 1;
5188 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5196 if (VAddrWords != AddrWords) {
5198 <<
" but got " << VAddrWords <<
"\n");
5199 ErrInfo =
"bad vaddr size";
5207 using namespace AMDGPU::DPP;
5209 unsigned DC = DppCt->
getImm();
5210 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5211 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5212 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5213 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5214 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5215 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5216 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5217 ErrInfo =
"Invalid dpp_ctrl value";
5220 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5222 ErrInfo =
"Invalid dpp_ctrl value: "
5223 "wavefront shifts are not supported on GFX10+";
5226 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5228 ErrInfo =
"Invalid dpp_ctrl value: "
5229 "broadcasts are not supported on GFX10+";
5232 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5234 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5235 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5237 ErrInfo =
"Invalid dpp_ctrl value: "
5238 "row_newbroadcast/row_share is not supported before "
5243 ErrInfo =
"Invalid dpp_ctrl value: "
5244 "row_share and row_xmask are not supported before GFX10";
5249 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5251 ErrInfo =
"Invalid dpp_ctrl value: "
5252 "DP ALU dpp only support row_newbcast";
5259 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5260 : AMDGPU::OpName::vdata;
5269 ErrInfo =
"Invalid register class: "
5270 "vdata and vdst should be both VGPR or AGPR";
5273 if (
Data && Data2 &&
5275 ErrInfo =
"Invalid register class: "
5276 "both data operands should be VGPR or AGPR";
5280 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5283 ErrInfo =
"Invalid register class: "
5284 "agpr loads and stores not supported on this GPU";
5291 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5296 if (Reg.isPhysical())
5303 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5304 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5305 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5307 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5308 ErrInfo =
"Subtarget requires even aligned vector registers "
5309 "for DS_GWS instructions";
5315 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5316 ErrInfo =
"Subtarget requires even aligned vector registers "
5317 "for vaddr operand of image instructions";
5323 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5326 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5327 ErrInfo =
"Invalid register class: "
5328 "v_accvgpr_write with an SGPR is not supported on this GPU";
5333 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5336 ErrInfo =
"pseudo expects only physical SGPRs";
5348 switch (
MI.getOpcode()) {
5349 default:
return AMDGPU::INSTRUCTION_LIST_END;
5350 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5351 case AMDGPU::COPY:
return AMDGPU::COPY;
5352 case AMDGPU::PHI:
return AMDGPU::PHI;
5353 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5354 case AMDGPU::WQM:
return AMDGPU::WQM;
5355 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5356 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5357 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5358 case AMDGPU::S_MOV_B32: {
5360 return MI.getOperand(1).isReg() ||
5362 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5364 case AMDGPU::S_ADD_I32:
5365 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5366 case AMDGPU::S_ADDC_U32:
5367 return AMDGPU::V_ADDC_U32_e32;
5368 case AMDGPU::S_SUB_I32:
5369 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5372 case AMDGPU::S_ADD_U32:
5373 return AMDGPU::V_ADD_CO_U32_e32;
5374 case AMDGPU::S_SUB_U32:
5375 return AMDGPU::V_SUB_CO_U32_e32;
5376 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5377 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5378 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5379 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5380 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5381 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5382 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5383 case AMDGPU::S_XNOR_B32:
5384 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5385 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5386 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5387 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5388 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5389 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5390 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5391 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5392 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5393 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5394 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5395 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5396 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5397 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5398 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5399 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5400 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5401 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5402 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5403 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5404 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5405 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5406 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5407 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5408 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5409 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5410 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5411 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5412 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5413 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5414 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5415 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5416 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5417 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5418 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5419 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5420 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5421 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5422 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5423 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5424 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5425 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5426 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5427 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5428 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5429 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5430 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5431 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5432 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5433 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5434 case AMDGPU::S_CEIL_F16:
5436 : AMDGPU::V_CEIL_F16_fake16_e64;
5437 case AMDGPU::S_FLOOR_F16:
5439 : AMDGPU::V_FLOOR_F16_fake16_e64;
5440 case AMDGPU::S_TRUNC_F16:
5441 return AMDGPU::V_TRUNC_F16_fake16_e64;
5442 case AMDGPU::S_RNDNE_F16:
5443 return AMDGPU::V_RNDNE_F16_fake16_e64;
5444 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5445 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5446 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5447 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5448 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5449 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5450 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5451 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5452 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5453 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5454 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5455 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5456 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5457 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5458 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5459 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5460 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5461 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5462 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5463 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5464 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5465 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5466 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5467 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5468 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5469 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5470 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5471 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5472 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5473 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5474 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5475 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5476 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5477 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5478 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5479 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5480 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5481 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5482 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5483 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5484 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5485 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5486 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5487 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5488 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5489 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5490 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5491 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5492 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5493 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5494 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5495 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5496 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5497 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5498 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5499 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5500 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5503 "Unexpected scalar opcode without corresponding vector one!");
5516 bool IsWave32 = ST.isWave32();
5521 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5522 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5531 const unsigned OrSaveExec =
5532 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5545 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5547 auto ExecRestoreMI =
5557 bool IsAllocatable) {
5558 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5563 case AMDGPU::AV_32RegClassID:
5564 RCID = AMDGPU::VGPR_32RegClassID;
5566 case AMDGPU::AV_64RegClassID:
5567 RCID = AMDGPU::VReg_64RegClassID;
5569 case AMDGPU::AV_96RegClassID:
5570 RCID = AMDGPU::VReg_96RegClassID;
5572 case AMDGPU::AV_128RegClassID:
5573 RCID = AMDGPU::VReg_128RegClassID;
5575 case AMDGPU::AV_160RegClassID:
5576 RCID = AMDGPU::VReg_160RegClassID;
5578 case AMDGPU::AV_512RegClassID:
5579 RCID = AMDGPU::VReg_512RegClassID;
5595 auto RegClass = TID.
operands()[OpNum].RegClass;
5596 bool IsAllocatable =
false;
5606 AMDGPU::OpName::vdst);
5609 : AMDGPU::OpName::vdata);
5610 if (DataIdx != -1) {
5612 TID.
Opcode, AMDGPU::OpName::data1);
5620 unsigned OpNo)
const {
5623 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5624 Desc.operands()[OpNo].RegClass == -1) {
5627 if (Reg.isVirtual())
5628 return MRI.getRegClass(Reg);
5629 return RI.getPhysRegBaseClass(Reg);
5632 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5641 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5643 unsigned Size = RI.getRegSizeInBits(*RC);
5644 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
5645 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
5646 : AMDGPU::V_MOV_B32_e32;
5648 Opcode = AMDGPU::COPY;
5650 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5667 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5678 if (SubIdx == AMDGPU::sub0)
5680 if (SubIdx == AMDGPU::sub1)
5692void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5708 if (Reg.isPhysical())
5719 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5746 MO = &
MI.getOperand(OpIdx);
5758 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5764 if (!SGPRsUsed.
count(SGPR) &&
5767 if (--ConstantBusLimit <= 0)
5773 if (!LiteralLimit--)
5775 if (--ConstantBusLimit <= 0)
5793 unsigned Opc =
MI.getOpcode();
5801 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5802 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5803 MI.getOperand(DataIdx).isReg() &&
5804 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5806 if ((
int)OpIdx == DataIdx) {
5807 if (VDstIdx != -1 &&
5808 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5812 AMDGPU::OpName::data1);
5813 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5814 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5817 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5827 bool Is64BitOp = Is64BitFPOp ||
5840 if (!Is64BitFPOp && (int32_t)Imm < 0)
5858 unsigned Opc =
MI.getOpcode();
5877 if (Opc == AMDGPU::V_WRITELANE_B32) {
5880 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5886 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5903 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5905 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5917 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5919 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5931 if (HasImplicitSGPR || !
MI.isCommutable()) {
5948 if (CommutedOpc == -1) {
5953 MI.setDesc(
get(CommutedOpc));
5957 bool Src0Kill = Src0.
isKill();
5961 else if (Src1.
isReg()) {
5976 unsigned Opc =
MI.getOpcode();
5984 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5985 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5991 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5997 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6008 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6010 SGPRsUsed.
insert(SGPRReg);
6014 for (
int Idx : VOP3Idx) {
6023 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6048 if (ConstantBusLimit > 0) {
6060 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6061 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6070 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6074 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6076 get(TargetOpcode::COPY), NewSrcReg)
6083 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6089 for (
unsigned i = 0; i < SubRegs; ++i) {
6090 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6092 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6099 get(AMDGPU::REG_SEQUENCE), DstReg);
6100 for (
unsigned i = 0; i < SubRegs; ++i) {
6115 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6117 SBase->setReg(SGPR);
6129 if (OldSAddrIdx < 0)
6146 if (NewVAddrIdx < 0)
6153 if (OldVAddrIdx >= 0) {
6155 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6156 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6167 if (OldVAddrIdx == NewVAddrIdx) {
6170 MRI.removeRegOperandFromUseList(&NewVAddr);
6171 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6175 MRI.removeRegOperandFromUseList(&NewVAddr);
6176 MRI.addRegOperandToUseList(&NewVAddr);
6178 assert(OldSAddrIdx == NewVAddrIdx);
6180 if (OldVAddrIdx >= 0) {
6182 AMDGPU::OpName::vdst_in);
6186 if (NewVDstIn != -1) {
6193 if (NewVDstIn != -1) {
6232 unsigned OpSubReg =
Op.getSubReg();
6241 Register DstReg =
MRI.createVirtualRegister(DstRC);
6252 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6255 bool ImpDef = Def->isImplicitDef();
6256 while (!ImpDef && Def && Def->isCopy()) {
6257 if (Def->getOperand(1).getReg().isPhysical())
6259 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6260 ImpDef = Def && Def->isImplicitDef();
6262 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6277 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6278 unsigned SaveExecOpc =
6279 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6280 unsigned XorTermOpc =
6281 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6283 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6284 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6292 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6293 unsigned NumSubRegs =
RegSize / 32;
6294 Register VScalarOp = ScalarOp->getReg();
6296 if (NumSubRegs == 1) {
6297 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6299 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6302 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6304 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6310 CondReg = NewCondReg;
6312 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6320 ScalarOp->setReg(CurReg);
6321 ScalarOp->setIsKill();
6324 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6325 "Unhandled register size");
6327 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6328 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6329 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6332 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6333 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6336 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6337 .
addReg(VScalarOp, VScalarOpUndef,
6338 TRI->getSubRegFromChannel(
Idx + 1));
6344 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6345 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6351 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6352 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6355 if (NumSubRegs <= 2)
6356 Cmp.addReg(VScalarOp);
6358 Cmp.addReg(VScalarOp, VScalarOpUndef,
6359 TRI->getSubRegFromChannel(
Idx, 2));
6363 CondReg = NewCondReg;
6365 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6374 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6375 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6379 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6380 unsigned Channel = 0;
6381 for (
Register Piece : ReadlanePieces) {
6382 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6386 ScalarOp->setReg(SScalarOp);
6387 ScalarOp->setIsKill();
6391 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6392 MRI.setSimpleHint(SaveExec, CondReg);
6423 if (!Begin.isValid())
6425 if (!
End.isValid()) {
6430 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6431 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6432 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6441 std::numeric_limits<unsigned>::max()) !=
6444 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6450 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6459 for (
auto I = Begin;
I != AfterMI;
I++) {
6460 for (
auto &MO :
I->all_uses())
6461 MRI.clearKillFlags(MO.getReg());
6496 for (
auto &Succ : RemainderBB->
successors()) {
6519static std::tuple<unsigned, unsigned>
6527 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6528 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6531 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6532 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6533 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6534 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6535 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6543 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6547 .
addImm(RsrcDataFormat >> 32);
6552 .
addImm(AMDGPU::sub0_sub1)
6558 return std::tuple(RsrcPtr, NewSRsrc);
6595 if (
MI.getOpcode() == AMDGPU::PHI) {
6597 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6598 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6601 MRI.getRegClass(
MI.getOperand(i).getReg());
6616 VRC = &AMDGPU::VReg_1RegClass;
6632 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6634 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6650 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6657 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6659 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6677 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6682 if (DstRC != Src0RC) {
6691 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6699 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6700 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6701 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6702 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6703 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6704 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6705 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
6720 : AMDGPU::OpName::srsrc;
6725 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6734 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6740 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6741 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6746 while (Start->getOpcode() != FrameSetupOpcode)
6749 while (
End->getOpcode() != FrameDestroyOpcode)
6753 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6754 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6762 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6764 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6775 bool isSoffsetLegal =
true;
6778 if (SoffsetIdx != -1) {
6782 isSoffsetLegal =
false;
6786 bool isRsrcLegal =
true;
6789 if (RsrcIdx != -1) {
6792 isRsrcLegal =
false;
6797 if (isRsrcLegal && isSoffsetLegal)
6821 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6822 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6823 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6825 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6826 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6827 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6829 unsigned RsrcPtr, NewSRsrc;
6836 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6843 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6861 "FIXME: Need to emit flat atomics here");
6863 unsigned RsrcPtr, NewSRsrc;
6866 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6889 MIB.
addImm(CPol->getImm());
6894 MIB.
addImm(TFE->getImm());
6914 MI.removeFromParent();
6919 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6921 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6925 if (!isSoffsetLegal) {
6937 if (!isSoffsetLegal) {
6946 InstrList.insert(
MI);
6950 if (RsrcIdx != -1) {
6951 DeferredList.insert(
MI);
6956 return DeferredList.contains(
MI);
6962 while (!Worklist.
empty()) {
6976 "Deferred MachineInstr are not supposed to re-populate worklist");
6994 case AMDGPU::S_ADD_U64_PSEUDO:
6995 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6997 case AMDGPU::S_SUB_U64_PSEUDO:
6998 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
7000 case AMDGPU::S_ADD_I32:
7001 case AMDGPU::S_SUB_I32: {
7005 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7013 case AMDGPU::S_MUL_U64:
7015 splitScalarSMulU64(Worklist, Inst, MDT);
7019 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7020 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7023 splitScalarSMulPseudo(Worklist, Inst, MDT);
7027 case AMDGPU::S_AND_B64:
7028 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7032 case AMDGPU::S_OR_B64:
7033 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7037 case AMDGPU::S_XOR_B64:
7038 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7042 case AMDGPU::S_NAND_B64:
7043 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7047 case AMDGPU::S_NOR_B64:
7048 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7052 case AMDGPU::S_XNOR_B64:
7054 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7056 splitScalar64BitXnor(Worklist, Inst, MDT);
7060 case AMDGPU::S_ANDN2_B64:
7061 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7065 case AMDGPU::S_ORN2_B64:
7066 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7070 case AMDGPU::S_BREV_B64:
7071 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7075 case AMDGPU::S_NOT_B64:
7076 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7080 case AMDGPU::S_BCNT1_I32_B64:
7081 splitScalar64BitBCNT(Worklist, Inst);
7085 case AMDGPU::S_BFE_I64:
7086 splitScalar64BitBFE(Worklist, Inst);
7090 case AMDGPU::S_FLBIT_I32_B64:
7091 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7094 case AMDGPU::S_FF1_I32_B64:
7095 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7099 case AMDGPU::S_LSHL_B32:
7101 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7105 case AMDGPU::S_ASHR_I32:
7107 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7111 case AMDGPU::S_LSHR_B32:
7113 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7117 case AMDGPU::S_LSHL_B64:
7120 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7121 : AMDGPU::V_LSHLREV_B64_e64;
7125 case AMDGPU::S_ASHR_I64:
7127 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7131 case AMDGPU::S_LSHR_B64:
7133 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7138 case AMDGPU::S_ABS_I32:
7139 lowerScalarAbs(Worklist, Inst);
7143 case AMDGPU::S_CBRANCH_SCC0:
7144 case AMDGPU::S_CBRANCH_SCC1: {
7147 bool IsSCC = CondReg == AMDGPU::SCC;
7150 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7153 .
addReg(IsSCC ? VCC : CondReg);
7157 case AMDGPU::S_BFE_U64:
7158 case AMDGPU::S_BFM_B64:
7161 case AMDGPU::S_PACK_LL_B32_B16:
7162 case AMDGPU::S_PACK_LH_B32_B16:
7163 case AMDGPU::S_PACK_HL_B32_B16:
7164 case AMDGPU::S_PACK_HH_B32_B16:
7165 movePackToVALU(Worklist,
MRI, Inst);
7169 case AMDGPU::S_XNOR_B32:
7170 lowerScalarXnor(Worklist, Inst);
7174 case AMDGPU::S_NAND_B32:
7175 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7179 case AMDGPU::S_NOR_B32:
7180 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7184 case AMDGPU::S_ANDN2_B32:
7185 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7189 case AMDGPU::S_ORN2_B32:
7190 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7198 case AMDGPU::S_ADD_CO_PSEUDO:
7199 case AMDGPU::S_SUB_CO_PSEUDO: {
7200 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7201 ? AMDGPU::V_ADDC_U32_e64
7202 : AMDGPU::V_SUBB_U32_e64;
7203 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7206 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7207 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7225 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7229 case AMDGPU::S_UADDO_PSEUDO:
7230 case AMDGPU::S_USUBO_PSEUDO: {
7237 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7238 ? AMDGPU::V_ADD_CO_U32_e64
7239 : AMDGPU::V_SUB_CO_U32_e64;
7242 Register DestReg =
MRI.createVirtualRegister(NewRC);
7250 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7257 case AMDGPU::S_CSELECT_B32:
7258 case AMDGPU::S_CSELECT_B64:
7259 lowerSelect(Worklist, Inst, MDT);
7262 case AMDGPU::S_CMP_EQ_I32:
7263 case AMDGPU::S_CMP_LG_I32:
7264 case AMDGPU::S_CMP_GT_I32:
7265 case AMDGPU::S_CMP_GE_I32:
7266 case AMDGPU::S_CMP_LT_I32:
7267 case AMDGPU::S_CMP_LE_I32:
7268 case AMDGPU::S_CMP_EQ_U32:
7269 case AMDGPU::S_CMP_LG_U32:
7270 case AMDGPU::S_CMP_GT_U32:
7271 case AMDGPU::S_CMP_GE_U32:
7272 case AMDGPU::S_CMP_LT_U32:
7273 case AMDGPU::S_CMP_LE_U32:
7274 case AMDGPU::S_CMP_EQ_U64:
7275 case AMDGPU::S_CMP_LG_U64:
7276 case AMDGPU::S_CMP_LT_F32:
7277 case AMDGPU::S_CMP_EQ_F32:
7278 case AMDGPU::S_CMP_LE_F32:
7279 case AMDGPU::S_CMP_GT_F32:
7280 case AMDGPU::S_CMP_LG_F32:
7281 case AMDGPU::S_CMP_GE_F32:
7282 case AMDGPU::S_CMP_O_F32:
7283 case AMDGPU::S_CMP_U_F32:
7284 case AMDGPU::S_CMP_NGE_F32:
7285 case AMDGPU::S_CMP_NLG_F32:
7286 case AMDGPU::S_CMP_NGT_F32:
7287 case AMDGPU::S_CMP_NLE_F32:
7288 case AMDGPU::S_CMP_NEQ_F32:
7289 case AMDGPU::S_CMP_NLT_F32:
7290 case AMDGPU::S_CMP_LT_F16:
7291 case AMDGPU::S_CMP_EQ_F16:
7292 case AMDGPU::S_CMP_LE_F16:
7293 case AMDGPU::S_CMP_GT_F16:
7294 case AMDGPU::S_CMP_LG_F16:
7295 case AMDGPU::S_CMP_GE_F16:
7296 case AMDGPU::S_CMP_O_F16:
7297 case AMDGPU::S_CMP_U_F16:
7298 case AMDGPU::S_CMP_NGE_F16:
7299 case AMDGPU::S_CMP_NLG_F16:
7300 case AMDGPU::S_CMP_NGT_F16:
7301 case AMDGPU::S_CMP_NLE_F16:
7302 case AMDGPU::S_CMP_NEQ_F16:
7303 case AMDGPU::S_CMP_NLT_F16: {
7309 AMDGPU::OpName::src0_modifiers) >= 0) {
7324 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7328 case AMDGPU::S_CVT_HI_F32_F16: {
7330 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7331 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7342 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7346 case AMDGPU::S_MINIMUM_F32:
7347 case AMDGPU::S_MAXIMUM_F32:
7348 case AMDGPU::S_MINIMUM_F16:
7349 case AMDGPU::S_MAXIMUM_F16: {
7351 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7362 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7368 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7376 if (NewOpcode == Opcode) {
7400 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7412 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7413 MRI.replaceRegWith(DstReg, NewDstReg);
7415 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7426 AMDGPU::OpName::src0_modifiers) >= 0)
7431 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7432 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7434 NewInstr->addOperand(Src);
7437 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7440 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7442 NewInstr.addImm(
Size);
7443 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7447 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7452 "Scalar BFE is only implemented for constant width and offset");
7461 AMDGPU::OpName::src1_modifiers) >= 0)
7466 AMDGPU::OpName::src2_modifiers) >= 0)
7480 NewInstr->addOperand(
Op);
7487 if (
Op.getReg() == AMDGPU::SCC) {
7489 if (
Op.isDef() && !
Op.isDead())
7490 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7492 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7497 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7498 Register DstReg = NewInstr->getOperand(0).getReg();
7503 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7504 MRI.replaceRegWith(DstReg, NewDstReg);
7510 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7514std::pair<bool, MachineBasicBlock *>
7526 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7529 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7531 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7532 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7540 MRI.replaceRegWith(OldDstReg, ResultReg);
7543 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7544 return std::pair(
true, NewBB);
7547 return std::pair(
false,
nullptr);
7564 bool IsSCC = (CondReg == AMDGPU::SCC);
7572 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7580 NewCondReg =
MRI.createVirtualRegister(TC);
7584 bool CopyFound =
false;
7588 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7590 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7592 .
addReg(CandI.getOperand(1).getReg());
7604 : AMDGPU::S_CSELECT_B32;
7614 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7615 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7628 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7630 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7642 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7643 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7646 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7656 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7657 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7672 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7680 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7681 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7687 bool Src0IsSGPR = Src0.
isReg() &&
7689 bool Src1IsSGPR = Src1.
isReg() &&
7692 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7693 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7703 }
else if (Src1IsSGPR) {
7717 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7721 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7727 unsigned Opcode)
const {
7737 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7738 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7750 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7751 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7756 unsigned Opcode)
const {
7766 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7767 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7779 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7780 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7798 &AMDGPU::SGPR_32RegClass;
7801 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7804 AMDGPU::sub0, Src0SubRC);
7809 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7811 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7815 AMDGPU::sub1, Src0SubRC);
7817 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7823 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7830 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7832 Worklist.
insert(&LoHalf);
7833 Worklist.
insert(&HiHalf);
7839 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7850 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7851 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7852 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7863 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7867 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7897 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7903 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7909 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7920 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7936 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7948 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7959 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7960 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7961 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7972 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7976 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7988 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7989 ? AMDGPU::V_MUL_HI_U32_e64
7990 : AMDGPU::V_MUL_HI_I32_e64;
8005 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8013 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8032 &AMDGPU::SGPR_32RegClass;
8035 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8038 &AMDGPU::SGPR_32RegClass;
8041 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8044 AMDGPU::sub0, Src0SubRC);
8046 AMDGPU::sub0, Src1SubRC);
8048 AMDGPU::sub1, Src0SubRC);
8050 AMDGPU::sub1, Src1SubRC);
8055 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8057 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8062 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8067 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8074 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8076 Worklist.
insert(&LoHalf);
8077 Worklist.
insert(&HiHalf);
8080 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8098 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8114 Register NewDest =
MRI.createVirtualRegister(DestRC);
8120 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8138 MRI.getRegClass(Src.getReg()) :
8139 &AMDGPU::SGPR_32RegClass;
8141 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8142 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8145 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8148 AMDGPU::sub0, SrcSubRC);
8150 AMDGPU::sub1, SrcSubRC);
8156 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8160 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8179 Offset == 0 &&
"Not implemented");
8182 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8183 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8184 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8201 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8202 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8207 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8208 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8212 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8215 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8220 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8221 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8242 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8243 unsigned OpcodeAdd =
8244 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8247 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8249 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8256 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8257 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8258 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8259 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8266 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8272 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8274 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8276 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8279void SIInstrInfo::addUsersToMoveToVALUWorklist(
8283 E =
MRI.use_end();
I != E;) {
8288 switch (
UseMI.getOpcode()) {
8291 case AMDGPU::SOFT_WQM:
8292 case AMDGPU::STRICT_WWM:
8293 case AMDGPU::STRICT_WQM:
8294 case AMDGPU::REG_SEQUENCE:
8296 case AMDGPU::INSERT_SUBREG:
8299 OpNo =
I.getOperandNo();
8308 }
while (
I != E &&
I->getParent() == &
UseMI);
8318 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8325 case AMDGPU::S_PACK_LL_B32_B16: {
8326 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8327 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8344 case AMDGPU::S_PACK_LH_B32_B16: {
8345 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8354 case AMDGPU::S_PACK_HL_B32_B16: {
8355 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8365 case AMDGPU::S_PACK_HH_B32_B16: {
8366 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8367 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8384 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8385 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8394 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8395 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8403 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8407 Register DestReg =
MI.getOperand(0).getReg();
8409 MRI.replaceRegWith(DestReg, NewCond);
8414 MI.getOperand(SCCIdx).setReg(NewCond);
8420 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8423 for (
auto &Copy : CopyToDelete)
8424 Copy->eraseFromParent();
8432void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8441 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8443 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8460 case AMDGPU::REG_SEQUENCE:
8461 case AMDGPU::INSERT_SUBREG:
8463 case AMDGPU::SOFT_WQM:
8464 case AMDGPU::STRICT_WWM:
8465 case AMDGPU::STRICT_WQM: {
8473 case AMDGPU::REG_SEQUENCE:
8474 case AMDGPU::INSERT_SUBREG:
8484 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8501 int OpIndices[3])
const {
8520 for (
unsigned i = 0; i < 3; ++i) {
8521 int Idx = OpIndices[i];
8558 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8559 SGPRReg = UsedSGPRs[0];
8562 if (!SGPRReg && UsedSGPRs[1]) {
8563 if (UsedSGPRs[1] == UsedSGPRs[2])
8564 SGPRReg = UsedSGPRs[1];
8571 unsigned OperandName)
const {
8576 return &
MI.getOperand(
Idx);
8593 RsrcDataFormat |= (1ULL << 56);
8598 RsrcDataFormat |= (2ULL << 59);
8601 return RsrcDataFormat;
8623 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8629 unsigned Opc =
MI.getOpcode();
8635 return get(Opc).mayLoad() &&
8640 int &FrameIndex)
const {
8648 FrameIndex =
Addr->getIndex();
8653 int &FrameIndex)
const {
8656 FrameIndex =
Addr->getIndex();
8661 int &FrameIndex)
const {
8675 int &FrameIndex)
const {
8692 while (++
I != E &&
I->isInsideBundle()) {
8693 assert(!
I->isBundle() &&
"No nested bundle!");
8701 unsigned Opc =
MI.getOpcode();
8703 unsigned DescSize =
Desc.getSize();
8708 unsigned Size = DescSize;
8723 bool HasLiteral =
false;
8724 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8732 return HasLiteral ? DescSize + 4 : DescSize;
8742 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8746 case TargetOpcode::BUNDLE:
8748 case TargetOpcode::INLINEASM:
8749 case TargetOpcode::INLINEASM_BR: {
8751 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8755 if (
MI.isMetaInstruction())
8765 if (
MI.memoperands_empty())
8776 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8788 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8791 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8792 .
add(Branch->getOperand(0))
8793 .
add(Branch->getOperand(1));
8795 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8814 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8819 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8821 if (PMBB == LoopEnd) {
8822 HeaderPHIBuilder.
addReg(BackEdgeReg);
8827 HeaderPHIBuilder.
addReg(ZeroReg);
8829 HeaderPHIBuilder.
addMBB(PMBB);
8833 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8835 .
add(Branch->getOperand(0));
8837 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8843 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8844 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8850 static const std::pair<int, const char *> TargetIndices[] = {
8888std::pair<unsigned, unsigned>
8895 static const std::pair<unsigned, const char *> TargetFlags[] = {
8910 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8924 return AMDGPU::WWM_COPY;
8926 return AMDGPU::COPY;
8937 bool IsNullOrVectorRegister =
true;
8946 return IsNullOrVectorRegister &&
8947 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8948 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8961 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8992 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8993 case AMDGPU::SI_KILL_I1_TERMINATOR:
9002 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9003 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9004 case AMDGPU::SI_KILL_I1_PSEUDO:
9005 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9017 const unsigned OffsetBits =
9019 return (1 << OffsetBits) - 1;
9026 if (
MI.isInlineAsm())
9029 for (
auto &
Op :
MI.implicit_operands()) {
9030 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9031 Op.setReg(AMDGPU::VCC_LO);
9044 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9062 if (Imm <= MaxImm + 64) {
9064 Overflow = Imm - MaxImm;
9154std::pair<int64_t, int64_t>
9157 int64_t RemainderOffset = COffsetVal;
9158 int64_t ImmField = 0;
9163 if (AllowNegative) {
9165 int64_t
D = 1LL << NumBits;
9166 RemainderOffset = (COffsetVal /
D) *
D;
9167 ImmField = COffsetVal - RemainderOffset;
9171 (ImmField % 4) != 0) {
9173 RemainderOffset += ImmField % 4;
9174 ImmField -= ImmField % 4;
9176 }
else if (COffsetVal >= 0) {
9177 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9178 RemainderOffset = COffsetVal - ImmField;
9182 assert(RemainderOffset + ImmField == COffsetVal);
9183 return {ImmField, RemainderOffset};
9195 switch (ST.getGeneration()) {
9220 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9221 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9222 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9223 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9224 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9225 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9226 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9227 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9263 if (
isMAI(Opcode)) {
9308 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9309 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9310 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9322 switch (
MI.getOpcode()) {
9324 case AMDGPU::REG_SEQUENCE:
9328 case AMDGPU::INSERT_SUBREG:
9329 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9346 if (!
P.Reg.isVirtual())
9350 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9351 while (
auto *
MI = DefInst) {
9353 switch (
MI->getOpcode()) {
9355 case AMDGPU::V_MOV_B32_e32: {
9356 auto &Op1 =
MI->getOperand(1);
9361 DefInst =
MRI.getVRegDef(RSR.Reg);
9369 DefInst =
MRI.getVRegDef(RSR.Reg);
9382 assert(
MRI.isSSA() &&
"Must be run on SSA");
9384 auto *
TRI =
MRI.getTargetRegisterInfo();
9385 auto *DefBB =
DefMI.getParent();
9389 if (
UseMI.getParent() != DefBB)
9392 const int MaxInstScan = 20;
9396 auto E =
UseMI.getIterator();
9397 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9398 if (
I->isDebugInstr())
9401 if (++NumInst > MaxInstScan)
9404 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9414 assert(
MRI.isSSA() &&
"Must be run on SSA");
9416 auto *
TRI =
MRI.getTargetRegisterInfo();
9417 auto *DefBB =
DefMI.getParent();
9419 const int MaxUseScan = 10;
9422 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9423 auto &UseInst = *
Use.getParent();
9426 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9429 if (++NumUse > MaxUseScan)
9436 const int MaxInstScan = 20;
9440 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9443 if (
I->isDebugInstr())
9446 if (++NumInst > MaxInstScan)
9459 if (Reg == VReg && --NumUse == 0)
9461 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9473 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9476 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9485 if (InsPt !=
MBB.
end() &&
9486 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9487 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9488 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9489 InsPt->definesRegister(Src,
nullptr)) {
9493 : AMDGPU::S_MOV_B64_term),
9495 .
addReg(Src, 0, SrcSubReg)
9520 if (isFullCopyInstr(
MI)) {
9529 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9533 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9544 unsigned *PredCost)
const {
9545 if (
MI.isBundle()) {
9548 unsigned Lat = 0, Count = 0;
9549 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9551 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9553 return Lat + Count - 1;
9556 return SchedModel.computeInstrLatency(&
MI);
9561 unsigned opcode =
MI.getOpcode();
9562 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9563 auto IID = GI->getIntrinsicID();
9570 case Intrinsic::amdgcn_if:
9571 case Intrinsic::amdgcn_else:
9585 if (opcode == AMDGPU::G_LOAD) {
9586 if (
MI.memoperands_empty())
9590 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9591 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9599 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9600 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9601 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9614 unsigned opcode =
MI.getOpcode();
9615 if (opcode == AMDGPU::V_READLANE_B32 ||
9616 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9617 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9620 if (isCopyInstr(
MI)) {
9624 RI.getPhysRegBaseClass(srcOp.
getReg());
9632 if (
MI.isPreISelOpcode())
9647 if (
MI.memoperands_empty())
9651 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9652 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9667 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9673 if (!Reg || !
SrcOp.readsReg())
9679 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9716 Register &SrcReg2, int64_t &CmpMask,
9717 int64_t &CmpValue)
const {
9718 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9721 switch (
MI.getOpcode()) {
9724 case AMDGPU::S_CMP_EQ_U32:
9725 case AMDGPU::S_CMP_EQ_I32:
9726 case AMDGPU::S_CMP_LG_U32:
9727 case AMDGPU::S_CMP_LG_I32:
9728 case AMDGPU::S_CMP_LT_U32:
9729 case AMDGPU::S_CMP_LT_I32:
9730 case AMDGPU::S_CMP_GT_U32:
9731 case AMDGPU::S_CMP_GT_I32:
9732 case AMDGPU::S_CMP_LE_U32:
9733 case AMDGPU::S_CMP_LE_I32:
9734 case AMDGPU::S_CMP_GE_U32:
9735 case AMDGPU::S_CMP_GE_I32:
9736 case AMDGPU::S_CMP_EQ_U64:
9737 case AMDGPU::S_CMP_LG_U64:
9738 SrcReg =
MI.getOperand(0).getReg();
9739 if (
MI.getOperand(1).isReg()) {
9740 if (
MI.getOperand(1).getSubReg())
9742 SrcReg2 =
MI.getOperand(1).getReg();
9744 }
else if (
MI.getOperand(1).isImm()) {
9746 CmpValue =
MI.getOperand(1).getImm();
9752 case AMDGPU::S_CMPK_EQ_U32:
9753 case AMDGPU::S_CMPK_EQ_I32:
9754 case AMDGPU::S_CMPK_LG_U32:
9755 case AMDGPU::S_CMPK_LG_I32:
9756 case AMDGPU::S_CMPK_LT_U32:
9757 case AMDGPU::S_CMPK_LT_I32:
9758 case AMDGPU::S_CMPK_GT_U32:
9759 case AMDGPU::S_CMPK_GT_I32:
9760 case AMDGPU::S_CMPK_LE_U32:
9761 case AMDGPU::S_CMPK_LE_I32:
9762 case AMDGPU::S_CMPK_GE_U32:
9763 case AMDGPU::S_CMPK_GE_I32:
9764 SrcReg =
MI.getOperand(0).getReg();
9766 CmpValue =
MI.getOperand(1).getImm();
9784 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9785 this](int64_t ExpectedValue,
unsigned SrcSize,
9786 bool IsReversible,
bool IsSigned) ->
bool {
9811 if (!Def || Def->getParent() != CmpInstr.
getParent())
9814 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9815 Def->getOpcode() != AMDGPU::S_AND_B64)
9819 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9830 SrcOp = &Def->getOperand(2);
9831 else if (isMask(&Def->getOperand(2)))
9832 SrcOp = &Def->getOperand(1);
9837 if (IsSigned && BitNo == SrcSize - 1)
9840 ExpectedValue <<= BitNo;
9842 bool IsReversedCC =
false;
9843 if (CmpValue != ExpectedValue) {
9846 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9851 Register DefReg = Def->getOperand(0).getReg();
9852 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9855 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9857 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9858 I->killsRegister(AMDGPU::SCC, &RI))
9863 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9867 if (!
MRI->use_nodbg_empty(DefReg)) {
9875 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9876 : AMDGPU::S_BITCMP1_B32
9877 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9878 : AMDGPU::S_BITCMP1_B64;
9883 Def->eraseFromParent();
9891 case AMDGPU::S_CMP_EQ_U32:
9892 case AMDGPU::S_CMP_EQ_I32:
9893 case AMDGPU::S_CMPK_EQ_U32:
9894 case AMDGPU::S_CMPK_EQ_I32:
9895 return optimizeCmpAnd(1, 32,
true,
false);
9896 case AMDGPU::S_CMP_GE_U32:
9897 case AMDGPU::S_CMPK_GE_U32:
9898 return optimizeCmpAnd(1, 32,
false,
false);
9899 case AMDGPU::S_CMP_GE_I32:
9900 case AMDGPU::S_CMPK_GE_I32:
9901 return optimizeCmpAnd(1, 32,
false,
true);
9902 case AMDGPU::S_CMP_EQ_U64:
9903 return optimizeCmpAnd(1, 64,
true,
false);
9904 case AMDGPU::S_CMP_LG_U32:
9905 case AMDGPU::S_CMP_LG_I32:
9906 case AMDGPU::S_CMPK_LG_U32:
9907 case AMDGPU::S_CMPK_LG_I32:
9908 return optimizeCmpAnd(0, 32,
true,
false);
9909 case AMDGPU::S_CMP_GT_U32:
9910 case AMDGPU::S_CMPK_GT_U32:
9911 return optimizeCmpAnd(0, 32,
false,
false);
9912 case AMDGPU::S_CMP_GT_I32:
9913 case AMDGPU::S_CMPK_GT_I32:
9914 return optimizeCmpAnd(0, 32,
false,
true);
9915 case AMDGPU::S_CMP_LG_U64:
9916 return optimizeCmpAnd(0, 64,
true,
false);
9941 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9944 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9945 : &AMDGPU::VReg_64_Align2RegClass);
9947 .
addReg(DataReg, 0,
Op.getSubReg())
9952 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.