31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
525 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
533 if (MO1->getAddrSpace() != MO2->getAddrSpace())
536 const auto *Base1 = MO1->getValue();
537 const auto *Base2 = MO2->getValue();
538 if (!Base1 || !Base2)
543 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
546 return Base1 == Base2;
550 int64_t Offset1,
bool OffsetIsScalable1,
552 int64_t Offset2,
bool OffsetIsScalable2,
553 unsigned ClusterSize,
554 unsigned NumBytes)
const {
567 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
586 const unsigned LoadSize = NumBytes / ClusterSize;
587 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
588 return NumDWords <= MaxMemoryClusterDWords;
602 int64_t Offset0, int64_t Offset1,
603 unsigned NumLoads)
const {
604 assert(Offset1 > Offset0 &&
605 "Second offset should be larger than first offset!");
610 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
617 const char *Msg =
"illegal VGPR to SGPR copy") {
621 C.diagnose(IllegalCopy);
638 assert((
TII.getSubtarget().hasMAIInsts() &&
639 !
TII.getSubtarget().hasGFX90AInsts()) &&
640 "Expected GFX908 subtarget.");
643 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
644 "Source register of the copy should be either an SGPR or an AGPR.");
647 "Destination register of the copy should be an AGPR.");
656 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
659 if (!Def->modifiesRegister(SrcReg, &RI))
662 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
663 Def->getOperand(0).getReg() != SrcReg)
670 bool SafeToPropagate =
true;
673 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
674 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
675 SafeToPropagate =
false;
677 if (!SafeToPropagate)
689 if (ImpUseSuperReg) {
690 Builder.
addReg(ImpUseSuperReg,
708 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
712 "VGPR used for an intermediate copy should have been reserved.");
727 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
728 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
729 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
736 if (ImpUseSuperReg) {
737 UseBuilder.
addReg(ImpUseSuperReg,
759 int16_t SubIdx = BaseIndices[
Idx];
760 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
761 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
762 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
763 unsigned Opcode = AMDGPU::S_MOV_B32;
766 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
767 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
768 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
772 DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 Opcode = AMDGPU::S_MOV_B64;
790 assert(FirstMI && LastMI);
798 LastMI->addRegisterKilled(SrcReg, &RI);
805 bool RenamableDest,
bool RenamableSrc)
const {
807 unsigned Size = RI.getRegSizeInBits(*RC);
809 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
815 if (((
Size == 16) != (SrcSize == 16))) {
822 if (DestReg == SrcReg) {
828 RC = RI.getPhysRegBaseClass(DestReg);
829 Size = RI.getRegSizeInBits(*RC);
830 SrcRC = RI.getPhysRegBaseClass(SrcReg);
831 SrcSize = RI.getRegSizeInBits(*SrcRC);
835 if (RC == &AMDGPU::VGPR_32RegClass) {
837 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
838 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
839 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
840 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
846 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
847 RC == &AMDGPU::SReg_32RegClass) {
848 if (SrcReg == AMDGPU::SCC) {
855 if (DestReg == AMDGPU::VCC_LO) {
856 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
880 if (RC == &AMDGPU::SReg_64RegClass) {
881 if (SrcReg == AMDGPU::SCC) {
888 if (DestReg == AMDGPU::VCC) {
889 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
903 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
913 if (DestReg == AMDGPU::SCC) {
916 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
934 if (RC == &AMDGPU::AGPR_32RegClass) {
935 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
936 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
951 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
958 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
959 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
961 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
962 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
963 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
964 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
981 if (IsAGPRDst || IsAGPRSrc) {
982 if (!DstLow || !SrcLow) {
984 "Cannot use hi16 subreg with an AGPR!");
997 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
998 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1011 if (!DstLow || !SrcLow) {
1013 "Cannot use hi16 subreg on VI!");
1064 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1070 unsigned EltSize = 4;
1071 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1079 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1081 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1087 Opcode = AMDGPU::V_MOV_B64_e32;
1090 Opcode = AMDGPU::V_PK_MOV_B32;
1100 std::unique_ptr<RegScavenger> RS;
1101 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1102 RS = std::make_unique<RegScavenger>();
1108 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1109 const bool CanKillSuperReg = KillSrc && !Overlap;
1114 SubIdx = SubIndices[
Idx];
1116 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1117 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1118 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1119 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1121 bool IsFirstSubreg =
Idx == 0;
1122 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1124 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1128 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1129 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1176 int64_t
Value)
const {
1179 if (RegClass == &AMDGPU::SReg_32RegClass ||
1180 RegClass == &AMDGPU::SGPR_32RegClass ||
1181 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1182 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1188 if (RegClass == &AMDGPU::SReg_64RegClass ||
1189 RegClass == &AMDGPU::SGPR_64RegClass ||
1190 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1196 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1207 unsigned EltSize = 4;
1208 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1210 if (RI.getRegSizeInBits(*RegClass) > 32) {
1211 Opcode = AMDGPU::S_MOV_B64;
1214 Opcode = AMDGPU::S_MOV_B32;
1221 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1224 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1225 Builder.
addImm(IdxValue);
1231 return &AMDGPU::VGPR_32RegClass;
1242 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1243 "Not a VGPR32 reg");
1245 if (
Cond.size() == 1) {
1246 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 }
else if (
Cond.size() == 2) {
1257 switch (
Cond[0].getImm()) {
1258 case SIInstrInfo::SCC_TRUE: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1261 : AMDGPU::S_CSELECT_B64), SReg)
1272 case SIInstrInfo::SCC_FALSE: {
1273 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1275 : AMDGPU::S_CSELECT_B64), SReg)
1286 case SIInstrInfo::VCCNZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1300 case SIInstrInfo::VCCZ: {
1303 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1314 case SIInstrInfo::EXECNZ: {
1315 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1318 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1321 : AMDGPU::S_CSELECT_B64), SReg)
1332 case SIInstrInfo::EXECZ: {
1333 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1336 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1339 : AMDGPU::S_CSELECT_B64), SReg)
1388 return AMDGPU::COPY;
1389 if (RI.getRegSizeInBits(*DstRC) == 16) {
1392 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1394 if (RI.getRegSizeInBits(*DstRC) == 32)
1395 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1396 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1397 return AMDGPU::S_MOV_B64;
1398 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1399 return AMDGPU::V_MOV_B64_PSEUDO;
1400 return AMDGPU::COPY;
1405 bool IsIndirectSrc)
const {
1406 if (IsIndirectSrc) {
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1429 if (VecSize <= 1024)
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1457 if (VecSize <= 1024)
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1486 if (VecSize <= 1024)
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1515 if (VecSize <= 1024)
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1530 if (VecSize <= 1024)
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1538 bool IsSGPR)
const {
1550 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1557 return AMDGPU::SI_SPILL_S32_SAVE;
1559 return AMDGPU::SI_SPILL_S64_SAVE;
1561 return AMDGPU::SI_SPILL_S96_SAVE;
1563 return AMDGPU::SI_SPILL_S128_SAVE;
1565 return AMDGPU::SI_SPILL_S160_SAVE;
1567 return AMDGPU::SI_SPILL_S192_SAVE;
1569 return AMDGPU::SI_SPILL_S224_SAVE;
1571 return AMDGPU::SI_SPILL_S256_SAVE;
1573 return AMDGPU::SI_SPILL_S288_SAVE;
1575 return AMDGPU::SI_SPILL_S320_SAVE;
1577 return AMDGPU::SI_SPILL_S352_SAVE;
1579 return AMDGPU::SI_SPILL_S384_SAVE;
1581 return AMDGPU::SI_SPILL_S512_SAVE;
1583 return AMDGPU::SI_SPILL_S1024_SAVE;
1592 return AMDGPU::SI_SPILL_V32_SAVE;
1594 return AMDGPU::SI_SPILL_V64_SAVE;
1596 return AMDGPU::SI_SPILL_V96_SAVE;
1598 return AMDGPU::SI_SPILL_V128_SAVE;
1600 return AMDGPU::SI_SPILL_V160_SAVE;
1602 return AMDGPU::SI_SPILL_V192_SAVE;
1604 return AMDGPU::SI_SPILL_V224_SAVE;
1606 return AMDGPU::SI_SPILL_V256_SAVE;
1608 return AMDGPU::SI_SPILL_V288_SAVE;
1610 return AMDGPU::SI_SPILL_V320_SAVE;
1612 return AMDGPU::SI_SPILL_V352_SAVE;
1614 return AMDGPU::SI_SPILL_V384_SAVE;
1616 return AMDGPU::SI_SPILL_V512_SAVE;
1618 return AMDGPU::SI_SPILL_V1024_SAVE;
1627 return AMDGPU::SI_SPILL_A32_SAVE;
1629 return AMDGPU::SI_SPILL_A64_SAVE;
1631 return AMDGPU::SI_SPILL_A96_SAVE;
1633 return AMDGPU::SI_SPILL_A128_SAVE;
1635 return AMDGPU::SI_SPILL_A160_SAVE;
1637 return AMDGPU::SI_SPILL_A192_SAVE;
1639 return AMDGPU::SI_SPILL_A224_SAVE;
1641 return AMDGPU::SI_SPILL_A256_SAVE;
1643 return AMDGPU::SI_SPILL_A288_SAVE;
1645 return AMDGPU::SI_SPILL_A320_SAVE;
1647 return AMDGPU::SI_SPILL_A352_SAVE;
1649 return AMDGPU::SI_SPILL_A384_SAVE;
1651 return AMDGPU::SI_SPILL_A512_SAVE;
1653 return AMDGPU::SI_SPILL_A1024_SAVE;
1662 return AMDGPU::SI_SPILL_AV32_SAVE;
1664 return AMDGPU::SI_SPILL_AV64_SAVE;
1666 return AMDGPU::SI_SPILL_AV96_SAVE;
1668 return AMDGPU::SI_SPILL_AV128_SAVE;
1670 return AMDGPU::SI_SPILL_AV160_SAVE;
1672 return AMDGPU::SI_SPILL_AV192_SAVE;
1674 return AMDGPU::SI_SPILL_AV224_SAVE;
1676 return AMDGPU::SI_SPILL_AV256_SAVE;
1678 return AMDGPU::SI_SPILL_AV288_SAVE;
1680 return AMDGPU::SI_SPILL_AV320_SAVE;
1682 return AMDGPU::SI_SPILL_AV352_SAVE;
1684 return AMDGPU::SI_SPILL_AV384_SAVE;
1686 return AMDGPU::SI_SPILL_AV512_SAVE;
1688 return AMDGPU::SI_SPILL_AV1024_SAVE;
1695 bool IsVectorSuperClass) {
1700 if (IsVectorSuperClass)
1701 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1703 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1711 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1717 if (IsVectorSuperClass)
1738 FrameInfo.getObjectAlign(FrameIndex));
1739 unsigned SpillSize =
TRI->getSpillSize(*RC);
1744 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1745 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1746 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1754 if (SrcReg.
isVirtual() && SpillSize == 4) {
1755 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1770 SpillSize, RI, *MFI);
1784 return AMDGPU::SI_SPILL_S32_RESTORE;
1786 return AMDGPU::SI_SPILL_S64_RESTORE;
1788 return AMDGPU::SI_SPILL_S96_RESTORE;
1790 return AMDGPU::SI_SPILL_S128_RESTORE;
1792 return AMDGPU::SI_SPILL_S160_RESTORE;
1794 return AMDGPU::SI_SPILL_S192_RESTORE;
1796 return AMDGPU::SI_SPILL_S224_RESTORE;
1798 return AMDGPU::SI_SPILL_S256_RESTORE;
1800 return AMDGPU::SI_SPILL_S288_RESTORE;
1802 return AMDGPU::SI_SPILL_S320_RESTORE;
1804 return AMDGPU::SI_SPILL_S352_RESTORE;
1806 return AMDGPU::SI_SPILL_S384_RESTORE;
1808 return AMDGPU::SI_SPILL_S512_RESTORE;
1810 return AMDGPU::SI_SPILL_S1024_RESTORE;
1819 return AMDGPU::SI_SPILL_V32_RESTORE;
1821 return AMDGPU::SI_SPILL_V64_RESTORE;
1823 return AMDGPU::SI_SPILL_V96_RESTORE;
1825 return AMDGPU::SI_SPILL_V128_RESTORE;
1827 return AMDGPU::SI_SPILL_V160_RESTORE;
1829 return AMDGPU::SI_SPILL_V192_RESTORE;
1831 return AMDGPU::SI_SPILL_V224_RESTORE;
1833 return AMDGPU::SI_SPILL_V256_RESTORE;
1835 return AMDGPU::SI_SPILL_V288_RESTORE;
1837 return AMDGPU::SI_SPILL_V320_RESTORE;
1839 return AMDGPU::SI_SPILL_V352_RESTORE;
1841 return AMDGPU::SI_SPILL_V384_RESTORE;
1843 return AMDGPU::SI_SPILL_V512_RESTORE;
1845 return AMDGPU::SI_SPILL_V1024_RESTORE;
1854 return AMDGPU::SI_SPILL_A32_RESTORE;
1856 return AMDGPU::SI_SPILL_A64_RESTORE;
1858 return AMDGPU::SI_SPILL_A96_RESTORE;
1860 return AMDGPU::SI_SPILL_A128_RESTORE;
1862 return AMDGPU::SI_SPILL_A160_RESTORE;
1864 return AMDGPU::SI_SPILL_A192_RESTORE;
1866 return AMDGPU::SI_SPILL_A224_RESTORE;
1868 return AMDGPU::SI_SPILL_A256_RESTORE;
1870 return AMDGPU::SI_SPILL_A288_RESTORE;
1872 return AMDGPU::SI_SPILL_A320_RESTORE;
1874 return AMDGPU::SI_SPILL_A352_RESTORE;
1876 return AMDGPU::SI_SPILL_A384_RESTORE;
1878 return AMDGPU::SI_SPILL_A512_RESTORE;
1880 return AMDGPU::SI_SPILL_A1024_RESTORE;
1889 return AMDGPU::SI_SPILL_AV32_RESTORE;
1891 return AMDGPU::SI_SPILL_AV64_RESTORE;
1893 return AMDGPU::SI_SPILL_AV96_RESTORE;
1895 return AMDGPU::SI_SPILL_AV128_RESTORE;
1897 return AMDGPU::SI_SPILL_AV160_RESTORE;
1899 return AMDGPU::SI_SPILL_AV192_RESTORE;
1901 return AMDGPU::SI_SPILL_AV224_RESTORE;
1903 return AMDGPU::SI_SPILL_AV256_RESTORE;
1905 return AMDGPU::SI_SPILL_AV288_RESTORE;
1907 return AMDGPU::SI_SPILL_AV320_RESTORE;
1909 return AMDGPU::SI_SPILL_AV352_RESTORE;
1911 return AMDGPU::SI_SPILL_AV384_RESTORE;
1913 return AMDGPU::SI_SPILL_AV512_RESTORE;
1915 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1922 bool IsVectorSuperClass) {
1927 if (IsVectorSuperClass)
1928 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1930 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1937 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1943 if (IsVectorSuperClass)
1961 unsigned SpillSize =
TRI->getSpillSize(*RC);
1968 FrameInfo.getObjectAlign(FrameIndex));
1972 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1973 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1974 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1979 if (DestReg.
isVirtual() && SpillSize == 4) {
1981 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1995 SpillSize, RI, *MFI);
2010 unsigned Quantity)
const {
2012 while (Quantity > 0) {
2013 unsigned Arg = std::min(Quantity, 8u);
2027 if (HasNoTerminator) {
2028 if (
Info->returnsVoid()) {
2042 constexpr unsigned DoorbellIDMask = 0x3ff;
2043 constexpr unsigned ECQueueWaveAbort = 0x400;
2061 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2065 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2068 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2069 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2073 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2074 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2075 .
addUse(DoorbellRegMasked)
2076 .
addImm(ECQueueWaveAbort);
2077 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2078 .
addUse(SetWaveAbortBit);
2081 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2096 switch (
MI.getOpcode()) {
2098 if (
MI.isMetaInstruction())
2103 return MI.getOperand(0).getImm() + 1;
2112 switch (
MI.getOpcode()) {
2114 case AMDGPU::S_MOV_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2120 case AMDGPU::S_MOV_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2126 case AMDGPU::S_XOR_B64_term:
2129 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2132 case AMDGPU::S_XOR_B32_term:
2135 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2137 case AMDGPU::S_OR_B64_term:
2140 MI.setDesc(
get(AMDGPU::S_OR_B64));
2142 case AMDGPU::S_OR_B32_term:
2145 MI.setDesc(
get(AMDGPU::S_OR_B32));
2148 case AMDGPU::S_ANDN2_B64_term:
2151 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2154 case AMDGPU::S_ANDN2_B32_term:
2157 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2160 case AMDGPU::S_AND_B64_term:
2163 MI.setDesc(
get(AMDGPU::S_AND_B64));
2166 case AMDGPU::S_AND_B32_term:
2169 MI.setDesc(
get(AMDGPU::S_AND_B32));
2172 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2175 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2178 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2181 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2184 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2185 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2188 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2189 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2192 case AMDGPU::V_MOV_B64_PSEUDO: {
2194 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2195 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2201 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2206 if (
SrcOp.isImm()) {
2208 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2209 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2252 MI.eraseFromParent();
2255 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2259 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2264 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2269 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2270 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2272 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2273 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2280 MI.eraseFromParent();
2283 case AMDGPU::V_SET_INACTIVE_B32: {
2287 .
add(
MI.getOperand(3))
2288 .
add(
MI.getOperand(4))
2289 .
add(
MI.getOperand(1))
2290 .
add(
MI.getOperand(2))
2291 .
add(
MI.getOperand(5));
2292 MI.eraseFromParent();
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2328 Opc = AMDGPU::V_MOVRELD_B32_e32;
2330 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2331 : AMDGPU::S_MOVRELD_B32;
2336 bool IsUndef =
MI.getOperand(1).isUndef();
2337 unsigned SubReg =
MI.getOperand(3).getImm();
2338 assert(VecReg ==
MI.getOperand(1).getReg());
2343 .
add(
MI.getOperand(2))
2347 const int ImpDefIdx =
2349 const int ImpUseIdx = ImpDefIdx + 1;
2351 MI.eraseFromParent();
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2368 bool IsUndef =
MI.getOperand(1).isUndef();
2377 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2381 .
add(
MI.getOperand(2))
2386 const int ImpDefIdx =
2388 const int ImpUseIdx = ImpDefIdx + 1;
2395 MI.eraseFromParent();
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2413 bool IsUndef =
MI.getOperand(1).isUndef();
2431 MI.eraseFromParent();
2434 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2437 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2438 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2461 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2468 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2478 MI.eraseFromParent();
2481 case AMDGPU::ENTER_STRICT_WWM: {
2485 : AMDGPU::S_OR_SAVEEXEC_B64));
2488 case AMDGPU::ENTER_STRICT_WQM: {
2491 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2492 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2493 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2497 MI.eraseFromParent();
2500 case AMDGPU::EXIT_STRICT_WWM:
2501 case AMDGPU::EXIT_STRICT_WQM: {
2504 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2507 case AMDGPU::SI_RETURN: {
2521 MI.eraseFromParent();
2525 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2526 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2527 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2530 case AMDGPU::S_GETPC_B64_pseudo:
2531 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2534 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2556 case AMDGPU::S_LOAD_DWORDX16_IMM:
2557 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2570 for (
auto &CandMO :
I->operands()) {
2571 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2579 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2587 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2589 unsigned NewOpcode = -1;
2590 if (SubregSize == 256)
2591 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2592 else if (SubregSize == 128)
2593 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2600 MRI.setRegClass(DestReg, NewRC);
2603 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2608 MI->getOperand(0).setReg(DestReg);
2609 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2613 OffsetMO->
setImm(FinalOffset);
2619 MI->setMemRefs(*MF, NewMMOs);
2632std::pair<MachineInstr*, MachineInstr*>
2634 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2639 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2640 return std::pair(&
MI,
nullptr);
2651 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2653 if (Dst.isPhysical()) {
2654 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2657 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2661 for (
unsigned I = 1;
I <= 2; ++
I) {
2664 if (
SrcOp.isImm()) {
2666 Imm.ashrInPlace(Part * 32);
2667 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2671 if (Src.isPhysical())
2672 MovDPP.addReg(RI.getSubReg(Src, Sub));
2679 MovDPP.addImm(MO.getImm());
2681 Split[Part] = MovDPP;
2685 if (Dst.isVirtual())
2692 MI.eraseFromParent();
2693 return std::pair(Split[0], Split[1]);
2696std::optional<DestSourcePair>
2698 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2701 return std::nullopt;
2706 unsigned Src0OpName,
2708 unsigned Src1OpName)
const {
2715 "All commutable instructions have both src0 and src1 modifiers");
2717 int Src0ModsVal = Src0Mods->
getImm();
2718 int Src1ModsVal = Src1Mods->
getImm();
2720 Src1Mods->
setImm(Src0ModsVal);
2721 Src0Mods->
setImm(Src1ModsVal);
2730 bool IsKill = RegOp.
isKill();
2732 bool IsUndef = RegOp.
isUndef();
2733 bool IsDebug = RegOp.
isDebug();
2735 if (NonRegOp.
isImm())
2737 else if (NonRegOp.
isFI())
2758 int64_t NonRegVal = NonRegOp1.
getImm();
2761 NonRegOp2.
setImm(NonRegVal);
2778 unsigned Opc =
MI.getOpcode();
2786 if ((
int)OpIdx0 == Src0Idx && !MO0->
isReg() &&
2789 if ((
int)OpIdx1 == Src0Idx && !MO1->
isReg() &&
2794 if ((
int)OpIdx1 != Src0Idx && MO0->
isReg()) {
2799 if ((
int)OpIdx0 != Src0Idx && MO1->
isReg()) {
2813 unsigned Src1Idx)
const {
2814 assert(!NewMI &&
"this should never be used");
2816 unsigned Opc =
MI.getOpcode();
2818 if (CommutedOpcode == -1)
2821 if (Src0Idx > Src1Idx)
2825 static_cast<int>(Src0Idx) &&
2827 static_cast<int>(Src1Idx) &&
2828 "inconsistency with findCommutedOpIndices");
2853 Src1, AMDGPU::OpName::src1_modifiers);
2856 AMDGPU::OpName::src1_sel);
2868 unsigned &SrcOpIdx0,
2869 unsigned &SrcOpIdx1)
const {
2874 unsigned &SrcOpIdx0,
2875 unsigned &SrcOpIdx1)
const {
2876 if (!
Desc.isCommutable())
2879 unsigned Opc =
Desc.getOpcode();
2888 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2892 int64_t BrOffset)
const {
2895 assert(BranchOp != AMDGPU::S_SETPC_B64);
2909 return MI.getOperand(0).getMBB();
2914 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2915 MI.getOpcode() == AMDGPU::SI_LOOP)
2926 assert(RS &&
"RegScavenger required for long branching");
2928 "new block should be inserted for expanding unconditional branch");
2931 "restore block should be inserted for restoring clobbered registers");
2939 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2947 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2948 if (FlushSGPRWrites)
2956 ApplyHazardWorkarounds();
2960 MCCtx.createTempSymbol(
"post_getpc",
true);
2964 MCCtx.createTempSymbol(
"offset_lo",
true);
2966 MCCtx.createTempSymbol(
"offset_hi",
true);
2969 .
addReg(PCReg, 0, AMDGPU::sub0)
2973 .
addReg(PCReg, 0, AMDGPU::sub1)
2975 ApplyHazardWorkarounds();
3016 if (LongBranchReservedReg) {
3018 Scav = LongBranchReservedReg;
3027 MRI.replaceRegWith(PCReg, Scav);
3028 MRI.clearVirtRegs();
3034 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3035 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3036 MRI.clearVirtRegs();
3051unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3053 case SIInstrInfo::SCC_TRUE:
3054 return AMDGPU::S_CBRANCH_SCC1;
3055 case SIInstrInfo::SCC_FALSE:
3056 return AMDGPU::S_CBRANCH_SCC0;
3057 case SIInstrInfo::VCCNZ:
3058 return AMDGPU::S_CBRANCH_VCCNZ;
3059 case SIInstrInfo::VCCZ:
3060 return AMDGPU::S_CBRANCH_VCCZ;
3061 case SIInstrInfo::EXECNZ:
3062 return AMDGPU::S_CBRANCH_EXECNZ;
3063 case SIInstrInfo::EXECZ:
3064 return AMDGPU::S_CBRANCH_EXECZ;
3070SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3072 case AMDGPU::S_CBRANCH_SCC0:
3074 case AMDGPU::S_CBRANCH_SCC1:
3076 case AMDGPU::S_CBRANCH_VCCNZ:
3078 case AMDGPU::S_CBRANCH_VCCZ:
3080 case AMDGPU::S_CBRANCH_EXECNZ:
3082 case AMDGPU::S_CBRANCH_EXECZ:
3094 bool AllowModify)
const {
3095 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3097 TBB =
I->getOperand(0).getMBB();
3101 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3102 if (Pred == INVALID_BR)
3107 Cond.push_back(
I->getOperand(1));
3117 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3119 FBB =
I->getOperand(0).getMBB();
3129 bool AllowModify)
const {
3137 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3138 switch (
I->getOpcode()) {
3139 case AMDGPU::S_MOV_B64_term:
3140 case AMDGPU::S_XOR_B64_term:
3141 case AMDGPU::S_OR_B64_term:
3142 case AMDGPU::S_ANDN2_B64_term:
3143 case AMDGPU::S_AND_B64_term:
3144 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3145 case AMDGPU::S_MOV_B32_term:
3146 case AMDGPU::S_XOR_B32_term:
3147 case AMDGPU::S_OR_B32_term:
3148 case AMDGPU::S_ANDN2_B32_term:
3149 case AMDGPU::S_AND_B32_term:
3150 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3153 case AMDGPU::SI_ELSE:
3154 case AMDGPU::SI_KILL_I1_TERMINATOR:
3155 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3172 int *BytesRemoved)
const {
3174 unsigned RemovedSize = 0;
3177 if (
MI.isBranch() ||
MI.isReturn()) {
3179 MI.eraseFromParent();
3185 *BytesRemoved = RemovedSize;
3202 int *BytesAdded)
const {
3203 if (!FBB &&
Cond.empty()) {
3214 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3251 if (
Cond.size() != 2) {
3266 Register FalseReg,
int &CondCycles,
3267 int &TrueCycles,
int &FalseCycles)
const {
3268 switch (
Cond[0].getImm()) {
3273 if (
MRI.getRegClass(FalseReg) != RC)
3277 CondCycles = TrueCycles = FalseCycles = NumInsts;
3280 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3288 if (
MRI.getRegClass(FalseReg) != RC)
3294 if (NumInsts % 2 == 0)
3297 CondCycles = TrueCycles = FalseCycles = NumInsts;
3309 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3310 if (Pred == VCCZ || Pred == SCC_FALSE) {
3311 Pred =
static_cast<BranchPredicate
>(-Pred);
3317 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3319 if (DstSize == 32) {
3321 if (Pred == SCC_TRUE) {
3336 if (DstSize == 64 && Pred == SCC_TRUE) {
3346 static const int16_t Sub0_15[] = {
3347 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3348 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3349 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3350 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3353 static const int16_t Sub0_15_64[] = {
3354 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3355 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3356 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3357 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3360 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3362 const int16_t *SubIndices = Sub0_15;
3363 int NElts = DstSize / 32;
3367 if (Pred == SCC_TRUE) {
3369 SelOp = AMDGPU::S_CSELECT_B32;
3370 EltRC = &AMDGPU::SGPR_32RegClass;
3372 SelOp = AMDGPU::S_CSELECT_B64;
3373 EltRC = &AMDGPU::SGPR_64RegClass;
3374 SubIndices = Sub0_15_64;
3380 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3385 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3386 Register DstElt =
MRI.createVirtualRegister(EltRC);
3389 unsigned SubIdx = SubIndices[
Idx];
3392 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3395 .
addReg(FalseReg, 0, SubIdx)
3396 .
addReg(TrueReg, 0, SubIdx);
3400 .
addReg(TrueReg, 0, SubIdx)
3401 .
addReg(FalseReg, 0, SubIdx);
3413 switch (
MI.getOpcode()) {
3414 case AMDGPU::V_MOV_B16_t16_e32:
3415 case AMDGPU::V_MOV_B16_t16_e64:
3416 case AMDGPU::V_MOV_B32_e32:
3417 case AMDGPU::V_MOV_B32_e64:
3418 case AMDGPU::V_MOV_B64_PSEUDO:
3419 case AMDGPU::V_MOV_B64_e32:
3420 case AMDGPU::V_MOV_B64_e64:
3421 case AMDGPU::S_MOV_B32:
3422 case AMDGPU::S_MOV_B64:
3423 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3425 case AMDGPU::WWM_COPY:
3426 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3427 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3428 case AMDGPU::V_ACCVGPR_MOV_B32:
3436 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3437 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3438 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3441 unsigned Opc =
MI.getOpcode();
3445 MI.removeOperand(
Idx);
3451 if (!
MRI->hasOneNonDBGUse(Reg))
3454 switch (
DefMI.getOpcode()) {
3457 case AMDGPU::V_MOV_B64_e32:
3458 case AMDGPU::S_MOV_B64:
3459 case AMDGPU::V_MOV_B64_PSEUDO:
3460 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3461 case AMDGPU::V_MOV_B32_e32:
3462 case AMDGPU::S_MOV_B32:
3463 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3470 if (!ImmOp->
isImm())
3473 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3474 int64_t Imm = ImmOp->
getImm();
3475 switch (UseOp.getSubReg()) {
3483 return SignExtend64<16>(Imm);
3485 return SignExtend64<16>(Imm >> 16);
3486 case AMDGPU::sub1_lo16:
3487 return SignExtend64<16>(Imm >> 32);
3488 case AMDGPU::sub1_hi16:
3489 return SignExtend64<16>(Imm >> 48);
3493 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3495 unsigned Opc =
UseMI.getOpcode();
3496 if (Opc == AMDGPU::COPY) {
3497 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3501 bool Is16Bit = OpSize == 2;
3502 bool Is64Bit = OpSize == 8;
3504 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3505 : AMDGPU::V_MOV_B32_e32
3506 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3507 : AMDGPU::S_MOV_B32;
3508 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)),
3514 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3521 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3524 UseMI.getOperand(0).setSubReg(0);
3527 UseMI.getOperand(0).setReg(DstReg);
3537 UseMI.setDesc(NewMCID);
3538 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3543 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3544 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3545 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3546 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547 Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3562 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3563 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3565 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3566 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3567 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3575 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3576 if (!RegSrc->
isReg())
3594 if (Def && Def->isMoveImmediate() &&
3599 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3601 : AMDGPU::V_FMAMK_F16)
3602 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3609 if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3612 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3618 unsigned SrcSubReg = RegSrc->
getSubReg();
3623 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3624 Opc == AMDGPU::V_FMAC_F32_e64 ||
3625 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3626 UseMI.untieRegOperand(
3629 Src1->ChangeToImmediate(Imm);
3634 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3636 DefMI.eraseFromParent();
3646 bool Src0Inlined =
false;
3647 if (Src0->
isReg()) {
3652 if (Def && Def->isMoveImmediate() &&
3664 if (Src1->
isReg() && !Src0Inlined) {
3667 if (Def && Def->isMoveImmediate() &&
3678 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3680 : AMDGPU::V_FMAAK_F16)
3681 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3688 if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3694 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3695 Opc == AMDGPU::V_FMAC_F32_e64 ||
3696 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3697 UseMI.untieRegOperand(
3711 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3713 DefMI.eraseFromParent();
3725 if (BaseOps1.
size() != BaseOps2.
size())
3727 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3728 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3736 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3737 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3738 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3740 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3743bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3746 int64_t Offset0, Offset1;
3748 bool Offset0IsScalable, Offset1IsScalable;
3770 "MIa must load from or modify a memory location");
3772 "MIb must load from or modify a memory location");
3791 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3798 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3808 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3822 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3833 if (Reg.isPhysical())
3835 auto *Def =
MRI.getUniqueVRegDef(Reg);
3837 Imm = Def->getOperand(1).getImm();
3857 unsigned NumOps =
MI.getNumOperands();
3858 for (
unsigned I = 1;
I < NumOps; ++
I) {
3860 if (
Op.isReg() &&
Op.isKill())
3868 case AMDGPU::V_MAC_F16_e32:
3869 case AMDGPU::V_MAC_F16_e64:
3870 return AMDGPU::V_MAD_F16_e64;
3871 case AMDGPU::V_MAC_F32_e32:
3872 case AMDGPU::V_MAC_F32_e64:
3873 return AMDGPU::V_MAD_F32_e64;
3874 case AMDGPU::V_MAC_LEGACY_F32_e32:
3875 case AMDGPU::V_MAC_LEGACY_F32_e64:
3876 return AMDGPU::V_MAD_LEGACY_F32_e64;
3877 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3878 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3879 return AMDGPU::V_FMA_LEGACY_F32_e64;
3880 case AMDGPU::V_FMAC_F16_e32:
3881 case AMDGPU::V_FMAC_F16_e64:
3882 case AMDGPU::V_FMAC_F16_fake16_e64:
3883 return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3884 : AMDGPU::V_FMA_F16_gfx9_e64;
3885 case AMDGPU::V_FMAC_F32_e32:
3886 case AMDGPU::V_FMAC_F32_e64:
3887 return AMDGPU::V_FMA_F32_e64;
3888 case AMDGPU::V_FMAC_F64_e32:
3889 case AMDGPU::V_FMAC_F64_e64:
3890 return AMDGPU::V_FMA_F64_e64;
3900 unsigned Opc =
MI.getOpcode();
3904 if (NewMFMAOpc != -1) {
3907 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3908 MIB.
add(
MI.getOperand(
I));
3914 if (Def.isEarlyClobber() && Def.isReg() &&
3919 auto UpdateDefIndex = [&](
LiveRange &LR) {
3920 auto *S = LR.
find(OldIndex);
3921 if (S != LR.end() && S->start == OldIndex) {
3922 assert(S->valno && S->valno->def == OldIndex);
3923 S->start = NewIndex;
3924 S->valno->def = NewIndex;
3928 for (
auto &SR : LI.subranges())
3939 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3950 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951 "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3955 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3956 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3957 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3958 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3959 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3960 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3961 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3962 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3963 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3964 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3965 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3966 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3967 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3968 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3969 bool Src0Literal =
false;
3974 case AMDGPU::V_MAC_F16_e64:
3975 case AMDGPU::V_FMAC_F16_e64:
3976 case AMDGPU::V_FMAC_F16_fake16_e64:
3977 case AMDGPU::V_MAC_F32_e64:
3978 case AMDGPU::V_MAC_LEGACY_F32_e64:
3979 case AMDGPU::V_FMAC_F32_e64:
3980 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3981 case AMDGPU::V_FMAC_F64_e64:
3983 case AMDGPU::V_MAC_F16_e32:
3984 case AMDGPU::V_FMAC_F16_e32:
3985 case AMDGPU::V_MAC_F32_e32:
3986 case AMDGPU::V_MAC_LEGACY_F32_e32:
3987 case AMDGPU::V_FMAC_F32_e32:
3988 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3989 case AMDGPU::V_FMAC_F64_e32: {
3991 AMDGPU::OpName::src0);
4018 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
4024 const auto killDef = [&]() ->
void {
4029 if (
MRI.hasOneNonDBGUse(DefReg)) {
4046 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4048 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4049 MIOp.setIsUndef(
true);
4050 MIOp.setReg(DummyReg);
4062 : AMDGPU::V_FMAAK_F16)
4063 : AMDGPU::V_FMAAK_F32)
4064 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4081 : AMDGPU::V_FMAMK_F16)
4082 : AMDGPU::V_FMAMK_F32)
4083 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4148 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4159 switch (
MI.getOpcode()) {
4160 case AMDGPU::S_SET_GPR_IDX_ON:
4161 case AMDGPU::S_SET_GPR_IDX_MODE:
4162 case AMDGPU::S_SET_GPR_IDX_OFF:
4180 if (
MI.isTerminator() ||
MI.isPosition())
4184 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4187 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4193 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4194 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4195 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4196 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4201 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4212 unsigned Opcode =
MI.getOpcode();
4227 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4228 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4229 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4232 if (
MI.isCall() ||
MI.isInlineAsm())
4248 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4249 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4250 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4251 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4259 if (
MI.isMetaInstruction())
4263 if (
MI.isCopyLike()) {
4268 return MI.readsRegister(AMDGPU::EXEC, &RI);
4279 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4283 switch (Imm.getBitWidth()) {
4303 APInt IntImm = Imm.bitcastToAPInt();
4323 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4332 int64_t Imm = MO.
getImm();
4333 switch (OperandType) {
4346 int32_t Trunc =
static_cast<int32_t
>(Imm);
4386 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4391 int16_t Trunc =
static_cast<int16_t
>(Imm);
4402 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4403 int16_t Trunc =
static_cast<int16_t
>(Imm);
4463 AMDGPU::OpName::src2))
4479 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4499 return Mods && Mods->
getImm();
4512 switch (
MI.getOpcode()) {
4513 default:
return false;
4515 case AMDGPU::V_ADDC_U32_e64:
4516 case AMDGPU::V_SUBB_U32_e64:
4517 case AMDGPU::V_SUBBREV_U32_e64: {
4525 case AMDGPU::V_MAC_F16_e64:
4526 case AMDGPU::V_MAC_F32_e64:
4527 case AMDGPU::V_MAC_LEGACY_F32_e64:
4528 case AMDGPU::V_FMAC_F16_e64:
4529 case AMDGPU::V_FMAC_F16_fake16_e64:
4530 case AMDGPU::V_FMAC_F32_e64:
4531 case AMDGPU::V_FMAC_F64_e64:
4532 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4538 case AMDGPU::V_CNDMASK_B32_e64:
4574 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4583 unsigned Op32)
const {
4597 Inst32.
add(
MI.getOperand(
I));
4601 int Idx =
MI.getNumExplicitDefs();
4603 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4641 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4646 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4647 MO.
getReg() == AMDGPU::VCC_LO;
4649 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4650 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4659 switch (MO.getReg()) {
4661 case AMDGPU::VCC_LO:
4662 case AMDGPU::VCC_HI:
4664 case AMDGPU::FLAT_SCR:
4677 switch (
MI.getOpcode()) {
4678 case AMDGPU::V_READLANE_B32:
4679 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4680 case AMDGPU::V_WRITELANE_B32:
4681 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4688 if (
MI.isPreISelOpcode() ||
4689 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4704 if (
SubReg.getReg().isPhysical())
4707 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4719 ErrInfo =
"illegal copy from vector register to SGPR";
4737 if (!
MRI.isSSA() &&
MI.isCopy())
4738 return verifyCopy(
MI,
MRI, ErrInfo);
4740 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4747 if (Src0Idx == -1) {
4757 if (!
Desc.isVariadic() &&
4758 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4759 ErrInfo =
"Instruction has wrong number of operands.";
4763 if (
MI.isInlineAsm()) {
4776 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4777 ErrInfo =
"inlineasm operand has incorrect register class.";
4785 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4786 ErrInfo =
"missing memory operand from image instruction.";
4791 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4794 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4795 "all fp values to integers.";
4799 int RegClass =
Desc.operands()[i].RegClass;
4801 switch (
Desc.operands()[i].OperandType) {
4803 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4804 ErrInfo =
"Illegal immediate value for operand.";
4825 ErrInfo =
"Illegal immediate value for operand.";
4832 ErrInfo =
"Expected inline constant for operand.";
4841 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4842 ErrInfo =
"Expected immediate, but got non-immediate";
4864 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
4873 ErrInfo =
"Subtarget requires even aligned vector registers";
4878 if (RegClass != -1) {
4879 if (Reg.isVirtual())
4884 ErrInfo =
"Operand has incorrect register class.";
4893 ErrInfo =
"SDWA is not supported on this target";
4899 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4907 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4914 "Only reg allowed as operands in SDWA instructions on GFX9+";
4923 if (OMod !=
nullptr &&
4925 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4930 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4931 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4932 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4933 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4936 unsigned Mods = Src0ModsMO->
getImm();
4939 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4945 if (
isVOPC(BasicOpcode)) {
4949 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4950 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4956 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4957 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4963 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4964 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4971 if (DstUnused && DstUnused->isImm() &&
4974 if (!Dst.isReg() || !Dst.isTied()) {
4975 ErrInfo =
"Dst register should have tied register";
4980 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4983 "Dst register should be tied to implicit use of preserved register";
4987 ErrInfo =
"Dst register should use same physical register as preserved";
5019 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5020 if (RegCount > DstSize) {
5021 ErrInfo =
"Image instruction returns too many registers for dst "
5030 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5031 unsigned ConstantBusCount = 0;
5032 bool UsesLiteral =
false;
5039 LiteralVal = &
MI.getOperand(ImmIdx);
5048 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5059 }
else if (!MO.
isFI()) {
5066 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5076 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5077 return !RI.regsOverlap(SGPRUsed, SGPR);
5087 Opcode != AMDGPU::V_WRITELANE_B32) {
5088 ErrInfo =
"VOP* instruction violates constant bus restriction";
5093 ErrInfo =
"VOP3 instruction uses literal";
5100 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5101 unsigned SGPRCount = 0;
5104 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5112 if (MO.
getReg() != SGPRUsed)
5118 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5125 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5126 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5133 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5143 ErrInfo =
"ABS not allowed in VOP3B instructions";
5156 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5163 if (
Desc.isBranch()) {
5165 ErrInfo =
"invalid branch target for SOPK instruction";
5171 if (!isUInt<16>(Imm)) {
5172 ErrInfo =
"invalid immediate for SOPK instruction";
5176 if (!isInt<16>(Imm)) {
5177 ErrInfo =
"invalid immediate for SOPK instruction";
5184 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5185 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5186 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5187 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5188 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5189 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5191 const unsigned StaticNumOps =
5192 Desc.getNumOperands() +
Desc.implicit_uses().size();
5193 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5198 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5199 ErrInfo =
"missing implicit register operands";
5205 if (!Dst->isUse()) {
5206 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5211 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5212 UseOpIdx != StaticNumOps + 1) {
5213 ErrInfo =
"movrel implicit operands should be tied";
5220 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5222 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5223 ErrInfo =
"src0 should be subreg of implicit vector use";
5231 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5232 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5238 if (
MI.mayStore() &&
5243 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5244 ErrInfo =
"scalar stores must use m0 as offset register";
5252 if (
Offset->getImm() != 0) {
5253 ErrInfo =
"subtarget does not support offsets in flat instructions";
5260 if (GDSOp && GDSOp->
getImm() != 0) {
5261 ErrInfo =
"GDS is not supported on this subtarget";
5270 AMDGPU::OpName::vaddr0);
5272 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5281 ErrInfo =
"dim is out of range";
5288 IsA16 = R128A16->
getImm() != 0;
5289 }
else if (ST.
hasA16()) {
5291 IsA16 = A16->
getImm() != 0;
5294 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5296 unsigned AddrWords =
5299 unsigned VAddrWords;
5301 VAddrWords = RsrcIdx - VAddr0Idx;
5304 unsigned LastVAddrIdx = RsrcIdx - 1;
5305 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5313 if (VAddrWords != AddrWords) {
5315 <<
" but got " << VAddrWords <<
"\n");
5316 ErrInfo =
"bad vaddr size";
5324 using namespace AMDGPU::DPP;
5326 unsigned DC = DppCt->
getImm();
5327 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5328 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5329 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5330 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5331 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5332 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5333 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5334 ErrInfo =
"Invalid dpp_ctrl value";
5337 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5339 ErrInfo =
"Invalid dpp_ctrl value: "
5340 "wavefront shifts are not supported on GFX10+";
5343 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5345 ErrInfo =
"Invalid dpp_ctrl value: "
5346 "broadcasts are not supported on GFX10+";
5349 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5351 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5352 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5354 ErrInfo =
"Invalid dpp_ctrl value: "
5355 "row_newbroadcast/row_share is not supported before "
5360 ErrInfo =
"Invalid dpp_ctrl value: "
5361 "row_share and row_xmask are not supported before GFX10";
5366 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5368 ErrInfo =
"Invalid dpp_ctrl value: "
5369 "DP ALU dpp only support row_newbcast";
5376 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5377 : AMDGPU::OpName::vdata;
5386 ErrInfo =
"Invalid register class: "
5387 "vdata and vdst should be both VGPR or AGPR";
5390 if (
Data && Data2 &&
5392 ErrInfo =
"Invalid register class: "
5393 "both data operands should be VGPR or AGPR";
5397 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5400 ErrInfo =
"Invalid register class: "
5401 "agpr loads and stores not supported on this GPU";
5408 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5413 if (Reg.isPhysical())
5420 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5421 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5422 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5424 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5425 ErrInfo =
"Subtarget requires even aligned vector registers "
5426 "for DS_GWS instructions";
5432 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5433 ErrInfo =
"Subtarget requires even aligned vector registers "
5434 "for vaddr operand of image instructions";
5440 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5443 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5444 ErrInfo =
"Invalid register class: "
5445 "v_accvgpr_write with an SGPR is not supported on this GPU";
5450 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5453 ErrInfo =
"pseudo expects only physical SGPRs";
5465 switch (
MI.getOpcode()) {
5466 default:
return AMDGPU::INSTRUCTION_LIST_END;
5467 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5468 case AMDGPU::COPY:
return AMDGPU::COPY;
5469 case AMDGPU::PHI:
return AMDGPU::PHI;
5470 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5471 case AMDGPU::WQM:
return AMDGPU::WQM;
5472 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5473 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5474 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5475 case AMDGPU::S_MOV_B32: {
5477 return MI.getOperand(1).isReg() ||
5479 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5481 case AMDGPU::S_ADD_I32:
5482 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5483 case AMDGPU::S_ADDC_U32:
5484 return AMDGPU::V_ADDC_U32_e32;
5485 case AMDGPU::S_SUB_I32:
5486 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5489 case AMDGPU::S_ADD_U32:
5490 return AMDGPU::V_ADD_CO_U32_e32;
5491 case AMDGPU::S_SUB_U32:
5492 return AMDGPU::V_SUB_CO_U32_e32;
5493 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5494 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5495 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5496 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5497 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5498 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5499 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5500 case AMDGPU::S_XNOR_B32:
5501 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5502 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5503 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5504 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5505 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5506 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5507 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5508 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5509 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5510 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5511 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5512 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5513 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5514 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5515 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5516 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5517 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5518 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5519 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5520 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5521 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5522 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5523 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5524 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5525 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5526 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5527 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5528 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5529 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5530 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5531 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5532 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5533 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5534 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5535 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5536 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5537 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5538 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5539 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5540 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5541 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5542 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5543 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5544 case AMDGPU::S_CVT_F32_F16:
5545 case AMDGPU::S_CVT_HI_F32_F16:
5547 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5548 case AMDGPU::S_CVT_F16_F32:
5550 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5551 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5552 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5553 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5554 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5555 case AMDGPU::S_CEIL_F16:
5557 : AMDGPU::V_CEIL_F16_fake16_e64;
5558 case AMDGPU::S_FLOOR_F16:
5560 : AMDGPU::V_FLOOR_F16_fake16_e64;
5561 case AMDGPU::S_TRUNC_F16:
5562 return AMDGPU::V_TRUNC_F16_fake16_e64;
5563 case AMDGPU::S_RNDNE_F16:
5564 return AMDGPU::V_RNDNE_F16_fake16_e64;
5565 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5566 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5567 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5568 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5569 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5570 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5571 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5572 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5573 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5574 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5575 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5576 case AMDGPU::S_MINIMUM_F16:
5578 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5579 case AMDGPU::S_MAXIMUM_F16:
5581 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5582 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5583 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5584 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5585 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_fake16_e64;
5586 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5587 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5588 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5589 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5590 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5591 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5592 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5593 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5594 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5595 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5596 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5597 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5598 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5599 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5600 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5601 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5602 case AMDGPU::S_CMP_LT_F16:
5604 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5605 case AMDGPU::S_CMP_EQ_F16:
5607 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5608 case AMDGPU::S_CMP_LE_F16:
5610 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5611 case AMDGPU::S_CMP_GT_F16:
5613 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5614 case AMDGPU::S_CMP_LG_F16:
5616 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5617 case AMDGPU::S_CMP_GE_F16:
5619 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5620 case AMDGPU::S_CMP_O_F16:
5622 : AMDGPU::V_CMP_O_F16_fake16_e64;
5623 case AMDGPU::S_CMP_U_F16:
5625 : AMDGPU::V_CMP_U_F16_fake16_e64;
5626 case AMDGPU::S_CMP_NGE_F16:
5628 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5629 case AMDGPU::S_CMP_NLG_F16:
5631 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5632 case AMDGPU::S_CMP_NGT_F16:
5634 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5635 case AMDGPU::S_CMP_NLE_F16:
5637 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5638 case AMDGPU::S_CMP_NEQ_F16:
5640 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5641 case AMDGPU::S_CMP_NLT_F16:
5643 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5644 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5645 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5646 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5647 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5648 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5649 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5650 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5651 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5652 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5653 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5656 "Unexpected scalar opcode without corresponding vector one!");
5669 bool IsWave32 = ST.isWave32();
5674 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5675 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5684 const unsigned OrSaveExec =
5685 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5698 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5700 auto ExecRestoreMI =
5710 bool IsAllocatable) {
5711 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5716 case AMDGPU::AV_32RegClassID:
5717 RCID = AMDGPU::VGPR_32RegClassID;
5719 case AMDGPU::AV_64RegClassID:
5720 RCID = AMDGPU::VReg_64RegClassID;
5722 case AMDGPU::AV_96RegClassID:
5723 RCID = AMDGPU::VReg_96RegClassID;
5725 case AMDGPU::AV_128RegClassID:
5726 RCID = AMDGPU::VReg_128RegClassID;
5728 case AMDGPU::AV_160RegClassID:
5729 RCID = AMDGPU::VReg_160RegClassID;
5731 case AMDGPU::AV_512RegClassID:
5732 RCID = AMDGPU::VReg_512RegClassID;
5748 auto RegClass = TID.
operands()[OpNum].RegClass;
5749 bool IsAllocatable =
false;
5759 AMDGPU::OpName::vdst);
5762 : AMDGPU::OpName::vdata);
5763 if (DataIdx != -1) {
5765 TID.
Opcode, AMDGPU::OpName::data1);
5773 unsigned OpNo)
const {
5776 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5777 Desc.operands()[OpNo].RegClass == -1) {
5780 if (Reg.isVirtual())
5781 return MRI.getRegClass(Reg);
5782 return RI.getPhysRegBaseClass(Reg);
5785 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5794 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5796 unsigned Size = RI.getRegSizeInBits(*RC);
5797 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
5798 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
5799 : AMDGPU::V_MOV_B32_e32;
5801 Opcode = AMDGPU::COPY;
5803 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5817 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
5823 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5834 if (SubIdx == AMDGPU::sub0)
5836 if (SubIdx == AMDGPU::sub1)
5848void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5864 if (Reg.isPhysical())
5875 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5886 unsigned Opc =
MI.getOpcode();
5901 Opc,
isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5902 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5903 MI.getOperand(DataIdx).isReg() &&
5904 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5906 if ((
int)OpIdx == DataIdx) {
5907 if (VDstIdx != -1 &&
5908 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5912 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5913 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5918 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5945 MO = &
MI.getOperand(OpIdx);
5957 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5963 if (!SGPRsUsed.
count(SGPR) &&
5966 if (--ConstantBusLimit <= 0)
5972 if (!LiteralLimit--)
5974 if (--ConstantBusLimit <= 0)
5993 bool Is64BitOp = Is64BitFPOp ||
6006 if (!Is64BitFPOp && (int32_t)Imm < 0)
6024 unsigned Opc =
MI.getOpcode();
6043 if (Opc == AMDGPU::V_WRITELANE_B32) {
6046 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6052 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6069 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
6071 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6083 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6085 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6097 if (HasImplicitSGPR || !
MI.isCommutable()) {
6114 if (CommutedOpc == -1) {
6119 MI.setDesc(
get(CommutedOpc));
6123 bool Src0Kill = Src0.
isKill();
6127 else if (Src1.
isReg()) {
6142 unsigned Opc =
MI.getOpcode();
6150 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6151 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
6157 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6163 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6174 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6176 SGPRsUsed.
insert(SGPRReg);
6180 for (
int Idx : VOP3Idx) {
6189 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6214 if (ConstantBusLimit > 0) {
6226 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6227 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6237 SRC = RI.getCommonSubClass(SRC, DstRC);
6240 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6244 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6246 get(TargetOpcode::COPY), NewSrcReg)
6253 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6259 for (
unsigned i = 0; i < SubRegs; ++i) {
6260 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6262 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6269 get(AMDGPU::REG_SEQUENCE), DstReg);
6270 for (
unsigned i = 0; i < SubRegs; ++i) {
6285 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6287 SBase->setReg(SGPR);
6299 if (OldSAddrIdx < 0)
6316 if (NewVAddrIdx < 0)
6323 if (OldVAddrIdx >= 0) {
6325 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6326 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6337 if (OldVAddrIdx == NewVAddrIdx) {
6340 MRI.removeRegOperandFromUseList(&NewVAddr);
6341 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6345 MRI.removeRegOperandFromUseList(&NewVAddr);
6346 MRI.addRegOperandToUseList(&NewVAddr);
6348 assert(OldSAddrIdx == NewVAddrIdx);
6350 if (OldVAddrIdx >= 0) {
6352 AMDGPU::OpName::vdst_in);
6356 if (NewVDstIn != -1) {
6363 if (NewVDstIn != -1) {
6405 unsigned OpSubReg =
Op.getSubReg();
6414 Register DstReg =
MRI.createVirtualRegister(DstRC);
6424 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6427 bool ImpDef = Def->isImplicitDef();
6428 while (!ImpDef && Def && Def->isCopy()) {
6429 if (Def->getOperand(1).getReg().isPhysical())
6431 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6432 ImpDef = Def && Def->isImplicitDef();
6434 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6452 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6453 unsigned SaveExecOpc =
6454 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6455 unsigned XorTermOpc =
6456 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6458 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6459 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6465 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6466 unsigned NumSubRegs =
RegSize / 32;
6467 Register VScalarOp = ScalarOp->getReg();
6469 if (NumSubRegs == 1) {
6470 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6472 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6475 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6477 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6483 CondReg = NewCondReg;
6485 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6493 ScalarOp->setReg(CurReg);
6494 ScalarOp->setIsKill();
6498 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6499 "Unhandled register size");
6501 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6502 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6503 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6506 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6507 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6510 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6511 .
addReg(VScalarOp, VScalarOpUndef,
6512 TRI->getSubRegFromChannel(
Idx + 1));
6518 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6519 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6525 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6526 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6529 if (NumSubRegs <= 2)
6530 Cmp.addReg(VScalarOp);
6532 Cmp.addReg(VScalarOp, VScalarOpUndef,
6533 TRI->getSubRegFromChannel(
Idx, 2));
6537 CondReg = NewCondReg;
6539 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6547 const auto *SScalarOpRC =
6548 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6549 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6553 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6554 unsigned Channel = 0;
6555 for (
Register Piece : ReadlanePieces) {
6556 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6560 ScalarOp->setReg(SScalarOp);
6561 ScalarOp->setIsKill();
6565 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6566 MRI.setSimpleHint(SaveExec, CondReg);
6597 if (!Begin.isValid())
6599 if (!
End.isValid()) {
6604 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6605 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6606 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6615 std::numeric_limits<unsigned>::max()) !=
6618 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6624 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6633 for (
auto I = Begin;
I != AfterMI;
I++) {
6634 for (
auto &MO :
I->all_uses())
6635 MRI.clearKillFlags(MO.getReg());
6670 for (
auto &Succ : RemainderBB->
successors()) {
6693static std::tuple<unsigned, unsigned>
6701 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6702 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6705 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6706 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6707 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6708 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6709 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6726 .
addImm(AMDGPU::sub0_sub1)
6732 return std::tuple(RsrcPtr, NewSRsrc);
6769 if (
MI.getOpcode() == AMDGPU::PHI) {
6771 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6772 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6775 MRI.getRegClass(
MI.getOperand(i).getReg());
6790 VRC = &AMDGPU::VReg_1RegClass;
6806 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6808 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6824 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6831 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6833 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6851 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6856 if (DstRC != Src0RC) {
6865 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6873 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6874 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6875 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6876 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6877 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6878 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6879 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
6894 : AMDGPU::OpName::srsrc;
6899 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6908 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6914 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6915 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6920 while (Start->getOpcode() != FrameSetupOpcode)
6923 while (
End->getOpcode() != FrameDestroyOpcode)
6927 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6928 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6936 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6938 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6949 bool isSoffsetLegal =
true;
6952 if (SoffsetIdx != -1) {
6956 isSoffsetLegal =
false;
6960 bool isRsrcLegal =
true;
6963 if (RsrcIdx != -1) {
6966 isRsrcLegal =
false;
6970 if (isRsrcLegal && isSoffsetLegal)
6994 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6995 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6996 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6999 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7000 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7002 unsigned RsrcPtr, NewSRsrc;
7009 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7016 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7034 "FIXME: Need to emit flat atomics here");
7036 unsigned RsrcPtr, NewSRsrc;
7039 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7062 MIB.
addImm(CPol->getImm());
7067 MIB.
addImm(TFE->getImm());
7087 MI.removeFromParent();
7092 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7094 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7098 if (!isSoffsetLegal) {
7110 if (!isSoffsetLegal) {
7119 InstrList.insert(
MI);
7123 if (RsrcIdx != -1) {
7124 DeferredList.insert(
MI);
7129 return DeferredList.contains(
MI);
7135 while (!Worklist.
empty()) {
7149 "Deferred MachineInstr are not supposed to re-populate worklist");
7167 case AMDGPU::S_ADD_U64_PSEUDO:
7168 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
7170 case AMDGPU::S_SUB_U64_PSEUDO:
7171 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
7173 case AMDGPU::S_ADD_I32:
7174 case AMDGPU::S_SUB_I32: {
7178 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7186 case AMDGPU::S_MUL_U64:
7188 splitScalarSMulU64(Worklist, Inst, MDT);
7192 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7193 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7196 splitScalarSMulPseudo(Worklist, Inst, MDT);
7200 case AMDGPU::S_AND_B64:
7201 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7205 case AMDGPU::S_OR_B64:
7206 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7210 case AMDGPU::S_XOR_B64:
7211 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7215 case AMDGPU::S_NAND_B64:
7216 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7220 case AMDGPU::S_NOR_B64:
7221 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7225 case AMDGPU::S_XNOR_B64:
7227 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7229 splitScalar64BitXnor(Worklist, Inst, MDT);
7233 case AMDGPU::S_ANDN2_B64:
7234 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7238 case AMDGPU::S_ORN2_B64:
7239 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7243 case AMDGPU::S_BREV_B64:
7244 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7248 case AMDGPU::S_NOT_B64:
7249 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7253 case AMDGPU::S_BCNT1_I32_B64:
7254 splitScalar64BitBCNT(Worklist, Inst);
7258 case AMDGPU::S_BFE_I64:
7259 splitScalar64BitBFE(Worklist, Inst);
7263 case AMDGPU::S_FLBIT_I32_B64:
7264 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7267 case AMDGPU::S_FF1_I32_B64:
7268 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7272 case AMDGPU::S_LSHL_B32:
7274 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7278 case AMDGPU::S_ASHR_I32:
7280 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7284 case AMDGPU::S_LSHR_B32:
7286 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7290 case AMDGPU::S_LSHL_B64:
7293 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7294 : AMDGPU::V_LSHLREV_B64_e64;
7298 case AMDGPU::S_ASHR_I64:
7300 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7304 case AMDGPU::S_LSHR_B64:
7306 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7311 case AMDGPU::S_ABS_I32:
7312 lowerScalarAbs(Worklist, Inst);
7316 case AMDGPU::S_CBRANCH_SCC0:
7317 case AMDGPU::S_CBRANCH_SCC1: {
7320 bool IsSCC = CondReg == AMDGPU::SCC;
7323 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7326 .
addReg(IsSCC ? VCC : CondReg);
7330 case AMDGPU::S_BFE_U64:
7331 case AMDGPU::S_BFM_B64:
7334 case AMDGPU::S_PACK_LL_B32_B16:
7335 case AMDGPU::S_PACK_LH_B32_B16:
7336 case AMDGPU::S_PACK_HL_B32_B16:
7337 case AMDGPU::S_PACK_HH_B32_B16:
7338 movePackToVALU(Worklist,
MRI, Inst);
7342 case AMDGPU::S_XNOR_B32:
7343 lowerScalarXnor(Worklist, Inst);
7347 case AMDGPU::S_NAND_B32:
7348 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7352 case AMDGPU::S_NOR_B32:
7353 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7357 case AMDGPU::S_ANDN2_B32:
7358 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7362 case AMDGPU::S_ORN2_B32:
7363 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7371 case AMDGPU::S_ADD_CO_PSEUDO:
7372 case AMDGPU::S_SUB_CO_PSEUDO: {
7373 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7374 ? AMDGPU::V_ADDC_U32_e64
7375 : AMDGPU::V_SUBB_U32_e64;
7379 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7380 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7398 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7402 case AMDGPU::S_UADDO_PSEUDO:
7403 case AMDGPU::S_USUBO_PSEUDO: {
7410 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7411 ? AMDGPU::V_ADD_CO_U32_e64
7412 : AMDGPU::V_SUB_CO_U32_e64;
7415 Register DestReg =
MRI.createVirtualRegister(NewRC);
7423 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7430 case AMDGPU::S_CSELECT_B32:
7431 case AMDGPU::S_CSELECT_B64:
7432 lowerSelect(Worklist, Inst, MDT);
7435 case AMDGPU::S_CMP_EQ_I32:
7436 case AMDGPU::S_CMP_LG_I32:
7437 case AMDGPU::S_CMP_GT_I32:
7438 case AMDGPU::S_CMP_GE_I32:
7439 case AMDGPU::S_CMP_LT_I32:
7440 case AMDGPU::S_CMP_LE_I32:
7441 case AMDGPU::S_CMP_EQ_U32:
7442 case AMDGPU::S_CMP_LG_U32:
7443 case AMDGPU::S_CMP_GT_U32:
7444 case AMDGPU::S_CMP_GE_U32:
7445 case AMDGPU::S_CMP_LT_U32:
7446 case AMDGPU::S_CMP_LE_U32:
7447 case AMDGPU::S_CMP_EQ_U64:
7448 case AMDGPU::S_CMP_LG_U64:
7449 case AMDGPU::S_CMP_LT_F32:
7450 case AMDGPU::S_CMP_EQ_F32:
7451 case AMDGPU::S_CMP_LE_F32:
7452 case AMDGPU::S_CMP_GT_F32:
7453 case AMDGPU::S_CMP_LG_F32:
7454 case AMDGPU::S_CMP_GE_F32:
7455 case AMDGPU::S_CMP_O_F32:
7456 case AMDGPU::S_CMP_U_F32:
7457 case AMDGPU::S_CMP_NGE_F32:
7458 case AMDGPU::S_CMP_NLG_F32:
7459 case AMDGPU::S_CMP_NGT_F32:
7460 case AMDGPU::S_CMP_NLE_F32:
7461 case AMDGPU::S_CMP_NEQ_F32:
7462 case AMDGPU::S_CMP_NLT_F32: {
7481 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7485 case AMDGPU::S_CMP_LT_F16:
7486 case AMDGPU::S_CMP_EQ_F16:
7487 case AMDGPU::S_CMP_LE_F16:
7488 case AMDGPU::S_CMP_GT_F16:
7489 case AMDGPU::S_CMP_LG_F16:
7490 case AMDGPU::S_CMP_GE_F16:
7491 case AMDGPU::S_CMP_O_F16:
7492 case AMDGPU::S_CMP_U_F16:
7493 case AMDGPU::S_CMP_NGE_F16:
7494 case AMDGPU::S_CMP_NLG_F16:
7495 case AMDGPU::S_CMP_NGT_F16:
7496 case AMDGPU::S_CMP_NLE_F16:
7497 case AMDGPU::S_CMP_NEQ_F16:
7498 case AMDGPU::S_CMP_NLT_F16: {
7520 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7524 case AMDGPU::S_CVT_HI_F32_F16: {
7526 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7527 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7533 .
addReg(TmpReg, 0, AMDGPU::hi16)
7549 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7553 case AMDGPU::S_MINIMUM_F32:
7554 case AMDGPU::S_MAXIMUM_F32: {
7556 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7567 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7571 case AMDGPU::S_MINIMUM_F16:
7572 case AMDGPU::S_MAXIMUM_F16: {
7575 ? &AMDGPU::VGPR_16RegClass
7576 : &AMDGPU::VGPR_32RegClass);
7587 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7593 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7601 if (NewOpcode == Opcode) {
7625 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7637 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7638 MRI.replaceRegWith(DstReg, NewDstReg);
7640 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7651 AMDGPU::OpName::src0_modifiers) >= 0)
7656 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7657 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7659 NewInstr->addOperand(Src);
7662 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7665 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7667 NewInstr.addImm(
Size);
7668 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7672 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7677 "Scalar BFE is only implemented for constant width and offset");
7686 AMDGPU::OpName::src1_modifiers) >= 0)
7691 AMDGPU::OpName::src2_modifiers) >= 0)
7705 NewInstr->addOperand(
Op);
7712 if (
Op.getReg() == AMDGPU::SCC) {
7714 if (
Op.isDef() && !
Op.isDead())
7715 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7717 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7722 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7723 Register DstReg = NewInstr->getOperand(0).getReg();
7728 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7729 MRI.replaceRegWith(DstReg, NewDstReg);
7735 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7739std::pair<bool, MachineBasicBlock *>
7751 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7754 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7756 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7757 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7765 MRI.replaceRegWith(OldDstReg, ResultReg);
7768 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7769 return std::pair(
true, NewBB);
7772 return std::pair(
false,
nullptr);
7789 bool IsSCC = (CondReg == AMDGPU::SCC);
7797 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7804 NewCondReg =
MRI.createVirtualRegister(TC);
7808 bool CopyFound =
false;
7812 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7814 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7816 .
addReg(CandI.getOperand(1).getReg());
7828 ST.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
7838 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7839 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7852 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7854 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7866 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7867 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7870 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7880 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7881 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7896 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7904 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7905 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7911 bool Src0IsSGPR = Src0.
isReg() &&
7913 bool Src1IsSGPR = Src1.
isReg() &&
7916 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7917 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7927 }
else if (Src1IsSGPR) {
7941 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7945 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7951 unsigned Opcode)
const {
7961 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7962 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7974 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7975 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7980 unsigned Opcode)
const {
7990 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7991 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8003 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8004 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8022 &AMDGPU::SGPR_32RegClass;
8025 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8028 AMDGPU::sub0, Src0SubRC);
8033 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8035 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8039 AMDGPU::sub1, Src0SubRC);
8041 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8047 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8054 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8056 Worklist.
insert(&LoHalf);
8057 Worklist.
insert(&HiHalf);
8063 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8074 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8075 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8076 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8087 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8091 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8121 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8127 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8133 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8144 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8160 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8172 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8183 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8184 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8185 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8196 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8200 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8212 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8213 ? AMDGPU::V_MUL_HI_U32_e64
8214 : AMDGPU::V_MUL_HI_I32_e64;
8229 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8237 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8256 &AMDGPU::SGPR_32RegClass;
8259 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8262 &AMDGPU::SGPR_32RegClass;
8265 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8268 AMDGPU::sub0, Src0SubRC);
8270 AMDGPU::sub0, Src1SubRC);
8272 AMDGPU::sub1, Src0SubRC);
8274 AMDGPU::sub1, Src1SubRC);
8279 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8281 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8286 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8291 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8298 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8300 Worklist.
insert(&LoHalf);
8301 Worklist.
insert(&HiHalf);
8304 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8322 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8338 Register NewDest =
MRI.createVirtualRegister(DestRC);
8344 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8362 MRI.getRegClass(Src.getReg()) :
8363 &AMDGPU::SGPR_32RegClass;
8365 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8366 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8369 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8372 AMDGPU::sub0, SrcSubRC);
8374 AMDGPU::sub1, SrcSubRC);
8380 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8384 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8403 Offset == 0 &&
"Not implemented");
8406 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8407 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8408 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8425 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8426 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8431 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8432 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8436 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8439 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8444 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8445 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8466 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8467 unsigned OpcodeAdd =
8468 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8471 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8473 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8480 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8481 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8482 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8483 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8490 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8496 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8498 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8500 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8503void SIInstrInfo::addUsersToMoveToVALUWorklist(
8507 E =
MRI.use_end();
I != E;) {
8512 switch (
UseMI.getOpcode()) {
8515 case AMDGPU::SOFT_WQM:
8516 case AMDGPU::STRICT_WWM:
8517 case AMDGPU::STRICT_WQM:
8518 case AMDGPU::REG_SEQUENCE:
8520 case AMDGPU::INSERT_SUBREG:
8523 OpNo =
I.getOperandNo();
8532 }
while (
I != E &&
I->getParent() == &
UseMI);
8542 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8549 case AMDGPU::S_PACK_LL_B32_B16: {
8550 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8551 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8568 case AMDGPU::S_PACK_LH_B32_B16: {
8569 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8578 case AMDGPU::S_PACK_HL_B32_B16: {
8579 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8589 case AMDGPU::S_PACK_HH_B32_B16: {
8590 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8591 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8608 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8609 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8618 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8619 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8627 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8631 Register DestReg =
MI.getOperand(0).getReg();
8633 MRI.replaceRegWith(DestReg, NewCond);
8638 MI.getOperand(SCCIdx).setReg(NewCond);
8644 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8647 for (
auto &Copy : CopyToDelete)
8648 Copy->eraseFromParent();
8656void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8665 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8667 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8684 case AMDGPU::REG_SEQUENCE:
8685 case AMDGPU::INSERT_SUBREG:
8687 case AMDGPU::SOFT_WQM:
8688 case AMDGPU::STRICT_WWM:
8689 case AMDGPU::STRICT_WQM: {
8697 case AMDGPU::REG_SEQUENCE:
8698 case AMDGPU::INSERT_SUBREG:
8708 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8725 int OpIndices[3])
const {
8744 for (
unsigned i = 0; i < 3; ++i) {
8745 int Idx = OpIndices[i];
8782 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8783 SGPRReg = UsedSGPRs[0];
8786 if (!SGPRReg && UsedSGPRs[1]) {
8787 if (UsedSGPRs[1] == UsedSGPRs[2])
8788 SGPRReg = UsedSGPRs[1];
8795 unsigned OperandName)
const {
8800 return &
MI.getOperand(
Idx);
8817 RsrcDataFormat |= (1ULL << 56);
8822 RsrcDataFormat |= (2ULL << 59);
8825 return RsrcDataFormat;
8847 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8853 unsigned Opc =
MI.getOpcode();
8859 return get(Opc).mayLoad() &&
8864 int &FrameIndex)
const {
8872 FrameIndex =
Addr->getIndex();
8877 int &FrameIndex)
const {
8880 FrameIndex =
Addr->getIndex();
8885 int &FrameIndex)
const {
8899 int &FrameIndex)
const {
8916 while (++
I != E &&
I->isInsideBundle()) {
8917 assert(!
I->isBundle() &&
"No nested bundle!");
8925 unsigned Opc =
MI.getOpcode();
8927 unsigned DescSize =
Desc.getSize();
8932 unsigned Size = DescSize;
8947 bool HasLiteral =
false;
8948 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8956 return HasLiteral ? DescSize + 4 : DescSize;
8966 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8970 case TargetOpcode::BUNDLE:
8972 case TargetOpcode::INLINEASM:
8973 case TargetOpcode::INLINEASM_BR: {
8975 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8979 if (
MI.isMetaInstruction())
8989 if (
MI.memoperands_empty())
9001 static const std::pair<int, const char *> TargetIndices[] = {
9039std::pair<unsigned, unsigned>
9046 static const std::pair<unsigned, const char *> TargetFlags[] = {
9061 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9075 return AMDGPU::WWM_COPY;
9077 return AMDGPU::COPY;
9089 bool IsNullOrVectorRegister =
true;
9097 return IsNullOrVectorRegister &&
9099 (Opcode == AMDGPU::IMPLICIT_DEF &&
9101 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9102 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9115 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
9146 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9147 case AMDGPU::SI_KILL_I1_TERMINATOR:
9156 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9157 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9158 case AMDGPU::SI_KILL_I1_PSEUDO:
9159 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9171 const unsigned OffsetBits =
9173 return (1 << OffsetBits) - 1;
9180 if (
MI.isInlineAsm())
9183 for (
auto &
Op :
MI.implicit_operands()) {
9184 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9185 Op.setReg(AMDGPU::VCC_LO);
9198 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9216 if (Imm <= MaxImm + 64) {
9218 Overflow = Imm - MaxImm;
9308std::pair<int64_t, int64_t>
9311 int64_t RemainderOffset = COffsetVal;
9312 int64_t ImmField = 0;
9317 if (AllowNegative) {
9319 int64_t
D = 1LL << NumBits;
9320 RemainderOffset = (COffsetVal /
D) *
D;
9321 ImmField = COffsetVal - RemainderOffset;
9325 (ImmField % 4) != 0) {
9327 RemainderOffset += ImmField % 4;
9328 ImmField -= ImmField % 4;
9330 }
else if (COffsetVal >= 0) {
9331 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9332 RemainderOffset = COffsetVal - ImmField;
9336 assert(RemainderOffset + ImmField == COffsetVal);
9337 return {ImmField, RemainderOffset};
9349 switch (ST.getGeneration()) {
9374 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9375 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9376 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9377 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9378 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9379 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9380 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9381 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9388#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9389 case OPCODE##_dpp: \
9390 case OPCODE##_e32: \
9391 case OPCODE##_e64: \
9392 case OPCODE##_e64_dpp: \
9407 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9408 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9409 case AMDGPU::V_FMA_F16_gfx9_e64:
9410 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9411 case AMDGPU::V_INTERP_P2_F16:
9412 case AMDGPU::V_MAD_F16_e64:
9413 case AMDGPU::V_MAD_U16_e64:
9414 case AMDGPU::V_MAD_I16_e64:
9449 if (
isMAI(Opcode)) {
9494 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9495 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9496 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9508 switch (
MI.getOpcode()) {
9510 case AMDGPU::REG_SEQUENCE:
9514 case AMDGPU::INSERT_SUBREG:
9515 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9532 if (!
P.Reg.isVirtual())
9536 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9537 while (
auto *
MI = DefInst) {
9539 switch (
MI->getOpcode()) {
9541 case AMDGPU::V_MOV_B32_e32: {
9542 auto &Op1 =
MI->getOperand(1);
9547 DefInst =
MRI.getVRegDef(RSR.Reg);
9555 DefInst =
MRI.getVRegDef(RSR.Reg);
9568 assert(
MRI.isSSA() &&
"Must be run on SSA");
9570 auto *
TRI =
MRI.getTargetRegisterInfo();
9571 auto *DefBB =
DefMI.getParent();
9575 if (
UseMI.getParent() != DefBB)
9578 const int MaxInstScan = 20;
9582 auto E =
UseMI.getIterator();
9583 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9584 if (
I->isDebugInstr())
9587 if (++NumInst > MaxInstScan)
9590 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9600 assert(
MRI.isSSA() &&
"Must be run on SSA");
9602 auto *
TRI =
MRI.getTargetRegisterInfo();
9603 auto *DefBB =
DefMI.getParent();
9605 const int MaxUseScan = 10;
9608 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9609 auto &UseInst = *
Use.getParent();
9612 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9615 if (++NumUse > MaxUseScan)
9622 const int MaxInstScan = 20;
9626 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9629 if (
I->isDebugInstr())
9632 if (++NumInst > MaxInstScan)
9645 if (Reg == VReg && --NumUse == 0)
9647 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9659 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9662 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9671 if (InsPt !=
MBB.
end() &&
9672 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9673 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9674 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9675 InsPt->definesRegister(Src,
nullptr)) {
9679 : AMDGPU::S_MOV_B64_term),
9681 .
addReg(Src, 0, SrcSubReg)
9706 if (isFullCopyInstr(
MI)) {
9715 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9719 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9730 unsigned *PredCost)
const {
9731 if (
MI.isBundle()) {
9734 unsigned Lat = 0, Count = 0;
9735 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9737 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9739 return Lat + Count - 1;
9742 return SchedModel.computeInstrLatency(&
MI);
9747 unsigned opcode =
MI.getOpcode();
9748 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9749 auto IID = GI->getIntrinsicID();
9756 case Intrinsic::amdgcn_if:
9757 case Intrinsic::amdgcn_else:
9771 if (opcode == AMDGPU::G_LOAD) {
9772 if (
MI.memoperands_empty())
9776 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9777 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9785 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9786 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9787 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9800 unsigned opcode =
MI.getOpcode();
9801 if (opcode == AMDGPU::V_READLANE_B32 ||
9802 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9803 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9806 if (isCopyInstr(
MI)) {
9810 RI.getPhysRegBaseClass(srcOp.
getReg());
9818 if (
MI.isPreISelOpcode())
9833 if (
MI.memoperands_empty())
9837 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9838 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9853 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9859 if (!Reg || !
SrcOp.readsReg())
9865 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9902 Register &SrcReg2, int64_t &CmpMask,
9903 int64_t &CmpValue)
const {
9904 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9907 switch (
MI.getOpcode()) {
9910 case AMDGPU::S_CMP_EQ_U32:
9911 case AMDGPU::S_CMP_EQ_I32:
9912 case AMDGPU::S_CMP_LG_U32:
9913 case AMDGPU::S_CMP_LG_I32:
9914 case AMDGPU::S_CMP_LT_U32:
9915 case AMDGPU::S_CMP_LT_I32:
9916 case AMDGPU::S_CMP_GT_U32:
9917 case AMDGPU::S_CMP_GT_I32:
9918 case AMDGPU::S_CMP_LE_U32:
9919 case AMDGPU::S_CMP_LE_I32:
9920 case AMDGPU::S_CMP_GE_U32:
9921 case AMDGPU::S_CMP_GE_I32:
9922 case AMDGPU::S_CMP_EQ_U64:
9923 case AMDGPU::S_CMP_LG_U64:
9924 SrcReg =
MI.getOperand(0).getReg();
9925 if (
MI.getOperand(1).isReg()) {
9926 if (
MI.getOperand(1).getSubReg())
9928 SrcReg2 =
MI.getOperand(1).getReg();
9930 }
else if (
MI.getOperand(1).isImm()) {
9932 CmpValue =
MI.getOperand(1).getImm();
9938 case AMDGPU::S_CMPK_EQ_U32:
9939 case AMDGPU::S_CMPK_EQ_I32:
9940 case AMDGPU::S_CMPK_LG_U32:
9941 case AMDGPU::S_CMPK_LG_I32:
9942 case AMDGPU::S_CMPK_LT_U32:
9943 case AMDGPU::S_CMPK_LT_I32:
9944 case AMDGPU::S_CMPK_GT_U32:
9945 case AMDGPU::S_CMPK_GT_I32:
9946 case AMDGPU::S_CMPK_LE_U32:
9947 case AMDGPU::S_CMPK_LE_I32:
9948 case AMDGPU::S_CMPK_GE_U32:
9949 case AMDGPU::S_CMPK_GE_I32:
9950 SrcReg =
MI.getOperand(0).getReg();
9952 CmpValue =
MI.getOperand(1).getImm();
9970 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9971 this](int64_t ExpectedValue,
unsigned SrcSize,
9972 bool IsReversible,
bool IsSigned) ->
bool {
9997 if (!Def || Def->getParent() != CmpInstr.
getParent())
10000 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10001 Def->getOpcode() != AMDGPU::S_AND_B64)
10005 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10016 SrcOp = &Def->getOperand(2);
10017 else if (isMask(&Def->getOperand(2)))
10018 SrcOp = &Def->getOperand(1);
10024 assert(llvm::has_single_bit<uint64_t>(Mask) &&
"Invalid mask.");
10026 if (IsSigned && BitNo == SrcSize - 1)
10029 ExpectedValue <<= BitNo;
10031 bool IsReversedCC =
false;
10032 if (CmpValue != ExpectedValue) {
10035 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10040 Register DefReg = Def->getOperand(0).getReg();
10041 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10044 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10046 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10047 I->killsRegister(AMDGPU::SCC, &RI))
10052 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10056 if (!
MRI->use_nodbg_empty(DefReg)) {
10064 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10065 : AMDGPU::S_BITCMP1_B32
10066 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10067 : AMDGPU::S_BITCMP1_B64;
10072 Def->eraseFromParent();
10080 case AMDGPU::S_CMP_EQ_U32:
10081 case AMDGPU::S_CMP_EQ_I32:
10082 case AMDGPU::S_CMPK_EQ_U32:
10083 case AMDGPU::S_CMPK_EQ_I32:
10084 return optimizeCmpAnd(1, 32,
true,
false);
10085 case AMDGPU::S_CMP_GE_U32:
10086 case AMDGPU::S_CMPK_GE_U32:
10087 return optimizeCmpAnd(1, 32,
false,
false);
10088 case AMDGPU::S_CMP_GE_I32:
10089 case AMDGPU::S_CMPK_GE_I32:
10090 return optimizeCmpAnd(1, 32,
false,
true);
10091 case AMDGPU::S_CMP_EQ_U64:
10092 return optimizeCmpAnd(1, 64,
true,
false);
10093 case AMDGPU::S_CMP_LG_U32:
10094 case AMDGPU::S_CMP_LG_I32:
10095 case AMDGPU::S_CMPK_LG_U32:
10096 case AMDGPU::S_CMPK_LG_I32:
10097 return optimizeCmpAnd(0, 32,
true,
false);
10098 case AMDGPU::S_CMP_GT_U32:
10099 case AMDGPU::S_CMPK_GT_U32:
10100 return optimizeCmpAnd(0, 32,
false,
false);
10101 case AMDGPU::S_CMP_GT_I32:
10102 case AMDGPU::S_CMPK_GT_I32:
10103 return optimizeCmpAnd(0, 32,
false,
true);
10104 case AMDGPU::S_CMP_LG_U64:
10105 return optimizeCmpAnd(0, 64,
true,
false);
10112 unsigned OpName)
const {
10130 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10133 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10134 : &AMDGPU::VReg_64_Align2RegClass);
10136 .
addReg(DataReg, 0,
Op.getSubReg())
10141 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(const MachineInstr &MI)
unsigned const TargetRegisterInfo * TRI
TargetInstrInfo::RegSubRegPair RegSubRegPair
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
bool hasVALUReadSGPRHazard() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, const MachineOperand *fromMO, unsigned toIdx, const MachineOperand *toMO) const
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.