31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
525 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
533 if (MO1->getAddrSpace() != MO2->getAddrSpace())
536 const auto *Base1 = MO1->getValue();
537 const auto *Base2 = MO2->getValue();
538 if (!Base1 || !Base2)
543 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
546 return Base1 == Base2;
550 int64_t Offset1,
bool OffsetIsScalable1,
552 int64_t Offset2,
bool OffsetIsScalable2,
553 unsigned ClusterSize,
554 unsigned NumBytes)
const {
567 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
586 const unsigned LoadSize = NumBytes / ClusterSize;
587 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
588 return NumDWords <= MaxMemoryClusterDWords;
602 int64_t Offset0, int64_t Offset1,
603 unsigned NumLoads)
const {
604 assert(Offset1 > Offset0 &&
605 "Second offset should be larger than first offset!");
610 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
617 const char *Msg =
"illegal VGPR to SGPR copy") {
621 C.diagnose(IllegalCopy);
638 assert((
TII.getSubtarget().hasMAIInsts() &&
639 !
TII.getSubtarget().hasGFX90AInsts()) &&
640 "Expected GFX908 subtarget.");
643 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
644 "Source register of the copy should be either an SGPR or an AGPR.");
647 "Destination register of the copy should be an AGPR.");
656 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
659 if (!Def->modifiesRegister(SrcReg, &RI))
662 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
663 Def->getOperand(0).getReg() != SrcReg)
670 bool SafeToPropagate =
true;
673 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
674 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
675 SafeToPropagate =
false;
677 if (!SafeToPropagate)
689 if (ImpUseSuperReg) {
690 Builder.
addReg(ImpUseSuperReg,
708 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
712 "VGPR used for an intermediate copy should have been reserved.");
727 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
728 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
729 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
736 if (ImpUseSuperReg) {
737 UseBuilder.
addReg(ImpUseSuperReg,
759 int16_t SubIdx = BaseIndices[
Idx];
760 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
761 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
762 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
763 unsigned Opcode = AMDGPU::S_MOV_B32;
766 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
767 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
768 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
772 DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 Opcode = AMDGPU::S_MOV_B64;
790 assert(FirstMI && LastMI);
798 LastMI->addRegisterKilled(SrcReg, &RI);
805 bool RenamableDest,
bool RenamableSrc)
const {
807 unsigned Size = RI.getRegSizeInBits(*RC);
809 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
815 if (((
Size == 16) != (SrcSize == 16))) {
822 if (DestReg == SrcReg) {
828 RC = RI.getPhysRegBaseClass(DestReg);
829 Size = RI.getRegSizeInBits(*RC);
830 SrcRC = RI.getPhysRegBaseClass(SrcReg);
831 SrcSize = RI.getRegSizeInBits(*SrcRC);
835 if (RC == &AMDGPU::VGPR_32RegClass) {
837 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
838 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
839 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
840 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
846 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
847 RC == &AMDGPU::SReg_32RegClass) {
848 if (SrcReg == AMDGPU::SCC) {
855 if (DestReg == AMDGPU::VCC_LO) {
856 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
880 if (RC == &AMDGPU::SReg_64RegClass) {
881 if (SrcReg == AMDGPU::SCC) {
888 if (DestReg == AMDGPU::VCC) {
889 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
903 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
913 if (DestReg == AMDGPU::SCC) {
916 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
934 if (RC == &AMDGPU::AGPR_32RegClass) {
935 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
936 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
951 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
958 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
959 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
961 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
962 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
963 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
964 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
981 if (IsAGPRDst || IsAGPRSrc) {
982 if (!DstLow || !SrcLow) {
984 "Cannot use hi16 subreg with an AGPR!");
997 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
998 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1011 if (!DstLow || !SrcLow) {
1013 "Cannot use hi16 subreg on VI!");
1064 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1070 unsigned EltSize = 4;
1071 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1079 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1081 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1087 Opcode = AMDGPU::V_MOV_B64_e32;
1090 Opcode = AMDGPU::V_PK_MOV_B32;
1100 std::unique_ptr<RegScavenger> RS;
1101 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1102 RS = std::make_unique<RegScavenger>();
1108 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1109 const bool CanKillSuperReg = KillSrc && !Overlap;
1114 SubIdx = SubIndices[
Idx];
1116 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1117 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1118 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1119 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1121 bool IsFirstSubreg =
Idx == 0;
1122 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1124 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1128 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1129 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1176 int64_t
Value)
const {
1179 if (RegClass == &AMDGPU::SReg_32RegClass ||
1180 RegClass == &AMDGPU::SGPR_32RegClass ||
1181 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1182 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1188 if (RegClass == &AMDGPU::SReg_64RegClass ||
1189 RegClass == &AMDGPU::SGPR_64RegClass ||
1190 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1196 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1207 unsigned EltSize = 4;
1208 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1210 if (RI.getRegSizeInBits(*RegClass) > 32) {
1211 Opcode = AMDGPU::S_MOV_B64;
1214 Opcode = AMDGPU::S_MOV_B32;
1221 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1224 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1225 Builder.
addImm(IdxValue);
1231 return &AMDGPU::VGPR_32RegClass;
1242 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1243 "Not a VGPR32 reg");
1245 if (
Cond.size() == 1) {
1246 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 }
else if (
Cond.size() == 2) {
1257 switch (
Cond[0].getImm()) {
1258 case SIInstrInfo::SCC_TRUE: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1261 : AMDGPU::S_CSELECT_B64), SReg)
1272 case SIInstrInfo::SCC_FALSE: {
1273 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1275 : AMDGPU::S_CSELECT_B64), SReg)
1286 case SIInstrInfo::VCCNZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1300 case SIInstrInfo::VCCZ: {
1303 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1314 case SIInstrInfo::EXECNZ: {
1315 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1318 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1321 : AMDGPU::S_CSELECT_B64), SReg)
1332 case SIInstrInfo::EXECZ: {
1333 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1336 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1339 : AMDGPU::S_CSELECT_B64), SReg)
1388 return AMDGPU::COPY;
1389 if (RI.getRegSizeInBits(*DstRC) == 16) {
1392 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1394 if (RI.getRegSizeInBits(*DstRC) == 32)
1395 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1396 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1397 return AMDGPU::S_MOV_B64;
1398 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1399 return AMDGPU::V_MOV_B64_PSEUDO;
1400 return AMDGPU::COPY;
1405 bool IsIndirectSrc)
const {
1406 if (IsIndirectSrc) {
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1429 if (VecSize <= 1024)
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1457 if (VecSize <= 1024)
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1486 if (VecSize <= 1024)
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1515 if (VecSize <= 1024)
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1530 if (VecSize <= 1024)
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1538 bool IsSGPR)
const {
1550 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1557 return AMDGPU::SI_SPILL_S32_SAVE;
1559 return AMDGPU::SI_SPILL_S64_SAVE;
1561 return AMDGPU::SI_SPILL_S96_SAVE;
1563 return AMDGPU::SI_SPILL_S128_SAVE;
1565 return AMDGPU::SI_SPILL_S160_SAVE;
1567 return AMDGPU::SI_SPILL_S192_SAVE;
1569 return AMDGPU::SI_SPILL_S224_SAVE;
1571 return AMDGPU::SI_SPILL_S256_SAVE;
1573 return AMDGPU::SI_SPILL_S288_SAVE;
1575 return AMDGPU::SI_SPILL_S320_SAVE;
1577 return AMDGPU::SI_SPILL_S352_SAVE;
1579 return AMDGPU::SI_SPILL_S384_SAVE;
1581 return AMDGPU::SI_SPILL_S512_SAVE;
1583 return AMDGPU::SI_SPILL_S1024_SAVE;
1592 return AMDGPU::SI_SPILL_V32_SAVE;
1594 return AMDGPU::SI_SPILL_V64_SAVE;
1596 return AMDGPU::SI_SPILL_V96_SAVE;
1598 return AMDGPU::SI_SPILL_V128_SAVE;
1600 return AMDGPU::SI_SPILL_V160_SAVE;
1602 return AMDGPU::SI_SPILL_V192_SAVE;
1604 return AMDGPU::SI_SPILL_V224_SAVE;
1606 return AMDGPU::SI_SPILL_V256_SAVE;
1608 return AMDGPU::SI_SPILL_V288_SAVE;
1610 return AMDGPU::SI_SPILL_V320_SAVE;
1612 return AMDGPU::SI_SPILL_V352_SAVE;
1614 return AMDGPU::SI_SPILL_V384_SAVE;
1616 return AMDGPU::SI_SPILL_V512_SAVE;
1618 return AMDGPU::SI_SPILL_V1024_SAVE;
1627 return AMDGPU::SI_SPILL_A32_SAVE;
1629 return AMDGPU::SI_SPILL_A64_SAVE;
1631 return AMDGPU::SI_SPILL_A96_SAVE;
1633 return AMDGPU::SI_SPILL_A128_SAVE;
1635 return AMDGPU::SI_SPILL_A160_SAVE;
1637 return AMDGPU::SI_SPILL_A192_SAVE;
1639 return AMDGPU::SI_SPILL_A224_SAVE;
1641 return AMDGPU::SI_SPILL_A256_SAVE;
1643 return AMDGPU::SI_SPILL_A288_SAVE;
1645 return AMDGPU::SI_SPILL_A320_SAVE;
1647 return AMDGPU::SI_SPILL_A352_SAVE;
1649 return AMDGPU::SI_SPILL_A384_SAVE;
1651 return AMDGPU::SI_SPILL_A512_SAVE;
1653 return AMDGPU::SI_SPILL_A1024_SAVE;
1662 return AMDGPU::SI_SPILL_AV32_SAVE;
1664 return AMDGPU::SI_SPILL_AV64_SAVE;
1666 return AMDGPU::SI_SPILL_AV96_SAVE;
1668 return AMDGPU::SI_SPILL_AV128_SAVE;
1670 return AMDGPU::SI_SPILL_AV160_SAVE;
1672 return AMDGPU::SI_SPILL_AV192_SAVE;
1674 return AMDGPU::SI_SPILL_AV224_SAVE;
1676 return AMDGPU::SI_SPILL_AV256_SAVE;
1678 return AMDGPU::SI_SPILL_AV288_SAVE;
1680 return AMDGPU::SI_SPILL_AV320_SAVE;
1682 return AMDGPU::SI_SPILL_AV352_SAVE;
1684 return AMDGPU::SI_SPILL_AV384_SAVE;
1686 return AMDGPU::SI_SPILL_AV512_SAVE;
1688 return AMDGPU::SI_SPILL_AV1024_SAVE;
1695 bool IsVectorSuperClass) {
1700 if (IsVectorSuperClass)
1701 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1703 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1711 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1717 if (IsVectorSuperClass)
1737 FrameInfo.getObjectAlign(FrameIndex));
1738 unsigned SpillSize =
TRI->getSpillSize(*RC);
1743 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1744 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1745 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1753 if (SrcReg.
isVirtual() && SpillSize == 4) {
1754 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1769 SpillSize, RI, *MFI);
1783 return AMDGPU::SI_SPILL_S32_RESTORE;
1785 return AMDGPU::SI_SPILL_S64_RESTORE;
1787 return AMDGPU::SI_SPILL_S96_RESTORE;
1789 return AMDGPU::SI_SPILL_S128_RESTORE;
1791 return AMDGPU::SI_SPILL_S160_RESTORE;
1793 return AMDGPU::SI_SPILL_S192_RESTORE;
1795 return AMDGPU::SI_SPILL_S224_RESTORE;
1797 return AMDGPU::SI_SPILL_S256_RESTORE;
1799 return AMDGPU::SI_SPILL_S288_RESTORE;
1801 return AMDGPU::SI_SPILL_S320_RESTORE;
1803 return AMDGPU::SI_SPILL_S352_RESTORE;
1805 return AMDGPU::SI_SPILL_S384_RESTORE;
1807 return AMDGPU::SI_SPILL_S512_RESTORE;
1809 return AMDGPU::SI_SPILL_S1024_RESTORE;
1818 return AMDGPU::SI_SPILL_V32_RESTORE;
1820 return AMDGPU::SI_SPILL_V64_RESTORE;
1822 return AMDGPU::SI_SPILL_V96_RESTORE;
1824 return AMDGPU::SI_SPILL_V128_RESTORE;
1826 return AMDGPU::SI_SPILL_V160_RESTORE;
1828 return AMDGPU::SI_SPILL_V192_RESTORE;
1830 return AMDGPU::SI_SPILL_V224_RESTORE;
1832 return AMDGPU::SI_SPILL_V256_RESTORE;
1834 return AMDGPU::SI_SPILL_V288_RESTORE;
1836 return AMDGPU::SI_SPILL_V320_RESTORE;
1838 return AMDGPU::SI_SPILL_V352_RESTORE;
1840 return AMDGPU::SI_SPILL_V384_RESTORE;
1842 return AMDGPU::SI_SPILL_V512_RESTORE;
1844 return AMDGPU::SI_SPILL_V1024_RESTORE;
1853 return AMDGPU::SI_SPILL_A32_RESTORE;
1855 return AMDGPU::SI_SPILL_A64_RESTORE;
1857 return AMDGPU::SI_SPILL_A96_RESTORE;
1859 return AMDGPU::SI_SPILL_A128_RESTORE;
1861 return AMDGPU::SI_SPILL_A160_RESTORE;
1863 return AMDGPU::SI_SPILL_A192_RESTORE;
1865 return AMDGPU::SI_SPILL_A224_RESTORE;
1867 return AMDGPU::SI_SPILL_A256_RESTORE;
1869 return AMDGPU::SI_SPILL_A288_RESTORE;
1871 return AMDGPU::SI_SPILL_A320_RESTORE;
1873 return AMDGPU::SI_SPILL_A352_RESTORE;
1875 return AMDGPU::SI_SPILL_A384_RESTORE;
1877 return AMDGPU::SI_SPILL_A512_RESTORE;
1879 return AMDGPU::SI_SPILL_A1024_RESTORE;
1888 return AMDGPU::SI_SPILL_AV32_RESTORE;
1890 return AMDGPU::SI_SPILL_AV64_RESTORE;
1892 return AMDGPU::SI_SPILL_AV96_RESTORE;
1894 return AMDGPU::SI_SPILL_AV128_RESTORE;
1896 return AMDGPU::SI_SPILL_AV160_RESTORE;
1898 return AMDGPU::SI_SPILL_AV192_RESTORE;
1900 return AMDGPU::SI_SPILL_AV224_RESTORE;
1902 return AMDGPU::SI_SPILL_AV256_RESTORE;
1904 return AMDGPU::SI_SPILL_AV288_RESTORE;
1906 return AMDGPU::SI_SPILL_AV320_RESTORE;
1908 return AMDGPU::SI_SPILL_AV352_RESTORE;
1910 return AMDGPU::SI_SPILL_AV384_RESTORE;
1912 return AMDGPU::SI_SPILL_AV512_RESTORE;
1914 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1921 bool IsVectorSuperClass) {
1926 if (IsVectorSuperClass)
1927 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1929 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1936 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1942 if (IsVectorSuperClass)
1959 unsigned SpillSize =
TRI->getSpillSize(*RC);
1966 FrameInfo.getObjectAlign(FrameIndex));
1970 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1971 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1972 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1977 if (DestReg.
isVirtual() && SpillSize == 4) {
1979 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1993 SpillSize, RI, *MFI);
2008 unsigned Quantity)
const {
2010 while (Quantity > 0) {
2011 unsigned Arg = std::min(Quantity, 8u);
2025 if (HasNoTerminator) {
2026 if (
Info->returnsVoid()) {
2040 constexpr unsigned DoorbellIDMask = 0x3ff;
2041 constexpr unsigned ECQueueWaveAbort = 0x400;
2059 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2063 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2066 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2067 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2071 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2072 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2073 .
addUse(DoorbellRegMasked)
2074 .
addImm(ECQueueWaveAbort);
2075 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2076 .
addUse(SetWaveAbortBit);
2079 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2094 switch (
MI.getOpcode()) {
2096 if (
MI.isMetaInstruction())
2101 return MI.getOperand(0).getImm() + 1;
2110 switch (
MI.getOpcode()) {
2112 case AMDGPU::S_MOV_B64_term:
2115 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2118 case AMDGPU::S_MOV_B32_term:
2121 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2124 case AMDGPU::S_XOR_B64_term:
2127 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2130 case AMDGPU::S_XOR_B32_term:
2133 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2135 case AMDGPU::S_OR_B64_term:
2138 MI.setDesc(
get(AMDGPU::S_OR_B64));
2140 case AMDGPU::S_OR_B32_term:
2143 MI.setDesc(
get(AMDGPU::S_OR_B32));
2146 case AMDGPU::S_ANDN2_B64_term:
2149 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2152 case AMDGPU::S_ANDN2_B32_term:
2155 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2158 case AMDGPU::S_AND_B64_term:
2161 MI.setDesc(
get(AMDGPU::S_AND_B64));
2164 case AMDGPU::S_AND_B32_term:
2167 MI.setDesc(
get(AMDGPU::S_AND_B32));
2170 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2173 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2176 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2179 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2182 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2183 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2186 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2187 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2190 case AMDGPU::V_MOV_B64_PSEUDO: {
2192 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2193 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2199 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2204 if (
SrcOp.isImm()) {
2206 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2207 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2250 MI.eraseFromParent();
2253 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2257 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2262 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2267 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2268 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2270 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2271 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2278 MI.eraseFromParent();
2281 case AMDGPU::V_SET_INACTIVE_B32: {
2285 .
add(
MI.getOperand(3))
2286 .
add(
MI.getOperand(4))
2287 .
add(
MI.getOperand(1))
2288 .
add(
MI.getOperand(2))
2289 .
add(
MI.getOperand(5));
2290 MI.eraseFromParent();
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2326 Opc = AMDGPU::V_MOVRELD_B32_e32;
2328 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2329 : AMDGPU::S_MOVRELD_B32;
2334 bool IsUndef =
MI.getOperand(1).isUndef();
2335 unsigned SubReg =
MI.getOperand(3).getImm();
2336 assert(VecReg ==
MI.getOperand(1).getReg());
2341 .
add(
MI.getOperand(2))
2345 const int ImpDefIdx =
2347 const int ImpUseIdx = ImpDefIdx + 1;
2349 MI.eraseFromParent();
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2366 bool IsUndef =
MI.getOperand(1).isUndef();
2375 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2379 .
add(
MI.getOperand(2))
2384 const int ImpDefIdx =
2386 const int ImpUseIdx = ImpDefIdx + 1;
2393 MI.eraseFromParent();
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2411 bool IsUndef =
MI.getOperand(1).isUndef();
2429 MI.eraseFromParent();
2432 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2435 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2436 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2459 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2466 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2476 MI.eraseFromParent();
2479 case AMDGPU::ENTER_STRICT_WWM: {
2483 : AMDGPU::S_OR_SAVEEXEC_B64));
2486 case AMDGPU::ENTER_STRICT_WQM: {
2489 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2490 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2491 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2495 MI.eraseFromParent();
2498 case AMDGPU::EXIT_STRICT_WWM:
2499 case AMDGPU::EXIT_STRICT_WQM: {
2502 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2505 case AMDGPU::SI_RETURN: {
2519 MI.eraseFromParent();
2523 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2524 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2525 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2528 case AMDGPU::S_GETPC_B64_pseudo:
2529 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2532 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2554 case AMDGPU::S_LOAD_DWORDX16_IMM:
2555 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2568 for (
auto &CandMO :
I->operands()) {
2569 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2577 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2585 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2587 unsigned NewOpcode = -1;
2588 if (SubregSize == 256)
2589 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2590 else if (SubregSize == 128)
2591 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2598 MRI.setRegClass(DestReg, NewRC);
2601 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2606 MI->getOperand(0).setReg(DestReg);
2607 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2611 OffsetMO->
setImm(FinalOffset);
2617 MI->setMemRefs(*MF, NewMMOs);
2630std::pair<MachineInstr*, MachineInstr*>
2632 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2637 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2638 return std::pair(&
MI,
nullptr);
2649 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2651 if (Dst.isPhysical()) {
2652 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2655 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2659 for (
unsigned I = 1;
I <= 2; ++
I) {
2662 if (
SrcOp.isImm()) {
2664 Imm.ashrInPlace(Part * 32);
2665 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2669 if (Src.isPhysical())
2670 MovDPP.addReg(RI.getSubReg(Src, Sub));
2677 MovDPP.addImm(MO.getImm());
2679 Split[Part] = MovDPP;
2683 if (Dst.isVirtual())
2690 MI.eraseFromParent();
2691 return std::pair(Split[0], Split[1]);
2694std::optional<DestSourcePair>
2696 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2699 return std::nullopt;
2704 unsigned Src0OpName,
2706 unsigned Src1OpName)
const {
2713 "All commutable instructions have both src0 and src1 modifiers");
2715 int Src0ModsVal = Src0Mods->
getImm();
2716 int Src1ModsVal = Src1Mods->
getImm();
2718 Src1Mods->
setImm(Src0ModsVal);
2719 Src0Mods->
setImm(Src1ModsVal);
2728 bool IsKill = RegOp.
isKill();
2730 bool IsUndef = RegOp.
isUndef();
2731 bool IsDebug = RegOp.
isDebug();
2733 if (NonRegOp.
isImm())
2735 else if (NonRegOp.
isFI())
2754 unsigned Src1Idx)
const {
2755 assert(!NewMI &&
"this should never be used");
2757 unsigned Opc =
MI.getOpcode();
2759 if (CommutedOpcode == -1)
2762 if (Src0Idx > Src1Idx)
2766 static_cast<int>(Src0Idx) &&
2768 static_cast<int>(Src1Idx) &&
2769 "inconsistency with findCommutedOpIndices");
2795 Src1, AMDGPU::OpName::src1_modifiers);
2798 AMDGPU::OpName::src1_sel);
2810 unsigned &SrcOpIdx0,
2811 unsigned &SrcOpIdx1)
const {
2816 unsigned &SrcOpIdx0,
2817 unsigned &SrcOpIdx1)
const {
2818 if (!
Desc.isCommutable())
2821 unsigned Opc =
Desc.getOpcode();
2830 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2834 int64_t BrOffset)
const {
2837 assert(BranchOp != AMDGPU::S_SETPC_B64);
2851 return MI.getOperand(0).getMBB();
2856 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2857 MI.getOpcode() == AMDGPU::SI_LOOP)
2868 assert(RS &&
"RegScavenger required for long branching");
2870 "new block should be inserted for expanding unconditional branch");
2873 "restore block should be inserted for restoring clobbered registers");
2881 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2889 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2890 if (FlushSGPRWrites)
2898 ApplyHazardWorkarounds();
2902 MCCtx.createTempSymbol(
"post_getpc",
true);
2906 MCCtx.createTempSymbol(
"offset_lo",
true);
2908 MCCtx.createTempSymbol(
"offset_hi",
true);
2911 .
addReg(PCReg, 0, AMDGPU::sub0)
2915 .
addReg(PCReg, 0, AMDGPU::sub1)
2917 ApplyHazardWorkarounds();
2958 if (LongBranchReservedReg) {
2960 Scav = LongBranchReservedReg;
2969 MRI.replaceRegWith(PCReg, Scav);
2970 MRI.clearVirtRegs();
2976 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2977 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2978 MRI.clearVirtRegs();
2993unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2995 case SIInstrInfo::SCC_TRUE:
2996 return AMDGPU::S_CBRANCH_SCC1;
2997 case SIInstrInfo::SCC_FALSE:
2998 return AMDGPU::S_CBRANCH_SCC0;
2999 case SIInstrInfo::VCCNZ:
3000 return AMDGPU::S_CBRANCH_VCCNZ;
3001 case SIInstrInfo::VCCZ:
3002 return AMDGPU::S_CBRANCH_VCCZ;
3003 case SIInstrInfo::EXECNZ:
3004 return AMDGPU::S_CBRANCH_EXECNZ;
3005 case SIInstrInfo::EXECZ:
3006 return AMDGPU::S_CBRANCH_EXECZ;
3012SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3014 case AMDGPU::S_CBRANCH_SCC0:
3016 case AMDGPU::S_CBRANCH_SCC1:
3018 case AMDGPU::S_CBRANCH_VCCNZ:
3020 case AMDGPU::S_CBRANCH_VCCZ:
3022 case AMDGPU::S_CBRANCH_EXECNZ:
3024 case AMDGPU::S_CBRANCH_EXECZ:
3036 bool AllowModify)
const {
3037 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3039 TBB =
I->getOperand(0).getMBB();
3043 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3044 if (Pred == INVALID_BR)
3049 Cond.push_back(
I->getOperand(1));
3059 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3061 FBB =
I->getOperand(0).getMBB();
3071 bool AllowModify)
const {
3079 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3080 switch (
I->getOpcode()) {
3081 case AMDGPU::S_MOV_B64_term:
3082 case AMDGPU::S_XOR_B64_term:
3083 case AMDGPU::S_OR_B64_term:
3084 case AMDGPU::S_ANDN2_B64_term:
3085 case AMDGPU::S_AND_B64_term:
3086 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3087 case AMDGPU::S_MOV_B32_term:
3088 case AMDGPU::S_XOR_B32_term:
3089 case AMDGPU::S_OR_B32_term:
3090 case AMDGPU::S_ANDN2_B32_term:
3091 case AMDGPU::S_AND_B32_term:
3092 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3095 case AMDGPU::SI_ELSE:
3096 case AMDGPU::SI_KILL_I1_TERMINATOR:
3097 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3114 int *BytesRemoved)
const {
3116 unsigned RemovedSize = 0;
3119 if (
MI.isBranch() ||
MI.isReturn()) {
3121 MI.eraseFromParent();
3127 *BytesRemoved = RemovedSize;
3144 int *BytesAdded)
const {
3145 if (!FBB &&
Cond.empty()) {
3156 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3193 if (
Cond.size() != 2) {
3208 Register FalseReg,
int &CondCycles,
3209 int &TrueCycles,
int &FalseCycles)
const {
3210 switch (
Cond[0].getImm()) {
3215 if (
MRI.getRegClass(FalseReg) != RC)
3219 CondCycles = TrueCycles = FalseCycles = NumInsts;
3222 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3230 if (
MRI.getRegClass(FalseReg) != RC)
3236 if (NumInsts % 2 == 0)
3239 CondCycles = TrueCycles = FalseCycles = NumInsts;
3251 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3252 if (Pred == VCCZ || Pred == SCC_FALSE) {
3253 Pred =
static_cast<BranchPredicate
>(-Pred);
3259 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3261 if (DstSize == 32) {
3263 if (Pred == SCC_TRUE) {
3278 if (DstSize == 64 && Pred == SCC_TRUE) {
3288 static const int16_t Sub0_15[] = {
3289 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3290 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3291 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3292 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3295 static const int16_t Sub0_15_64[] = {
3296 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3297 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3298 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3299 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3302 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3304 const int16_t *SubIndices = Sub0_15;
3305 int NElts = DstSize / 32;
3309 if (Pred == SCC_TRUE) {
3311 SelOp = AMDGPU::S_CSELECT_B32;
3312 EltRC = &AMDGPU::SGPR_32RegClass;
3314 SelOp = AMDGPU::S_CSELECT_B64;
3315 EltRC = &AMDGPU::SGPR_64RegClass;
3316 SubIndices = Sub0_15_64;
3322 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3327 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3328 Register DstElt =
MRI.createVirtualRegister(EltRC);
3331 unsigned SubIdx = SubIndices[
Idx];
3334 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3337 .
addReg(FalseReg, 0, SubIdx)
3338 .
addReg(TrueReg, 0, SubIdx);
3342 .
addReg(TrueReg, 0, SubIdx)
3343 .
addReg(FalseReg, 0, SubIdx);
3355 switch (
MI.getOpcode()) {
3356 case AMDGPU::V_MOV_B16_t16_e32:
3357 case AMDGPU::V_MOV_B16_t16_e64:
3358 case AMDGPU::V_MOV_B32_e32:
3359 case AMDGPU::V_MOV_B32_e64:
3360 case AMDGPU::V_MOV_B64_PSEUDO:
3361 case AMDGPU::V_MOV_B64_e32:
3362 case AMDGPU::V_MOV_B64_e64:
3363 case AMDGPU::S_MOV_B32:
3364 case AMDGPU::S_MOV_B64:
3365 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3367 case AMDGPU::WWM_COPY:
3368 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3369 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3370 case AMDGPU::V_ACCVGPR_MOV_B32:
3378 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3379 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3380 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3383 unsigned Opc =
MI.getOpcode();
3387 MI.removeOperand(
Idx);
3393 if (!
MRI->hasOneNonDBGUse(Reg))
3396 switch (
DefMI.getOpcode()) {
3399 case AMDGPU::V_MOV_B64_e32:
3400 case AMDGPU::S_MOV_B64:
3401 case AMDGPU::V_MOV_B64_PSEUDO:
3402 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3403 case AMDGPU::V_MOV_B32_e32:
3404 case AMDGPU::S_MOV_B32:
3405 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3412 if (!ImmOp->
isImm())
3415 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3416 int64_t Imm = ImmOp->
getImm();
3417 switch (UseOp.getSubReg()) {
3425 return SignExtend64<16>(Imm);
3427 return SignExtend64<16>(Imm >> 16);
3428 case AMDGPU::sub1_lo16:
3429 return SignExtend64<16>(Imm >> 32);
3430 case AMDGPU::sub1_hi16:
3431 return SignExtend64<16>(Imm >> 48);
3435 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3437 unsigned Opc =
UseMI.getOpcode();
3438 if (Opc == AMDGPU::COPY) {
3439 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3443 bool Is16Bit = OpSize == 2;
3444 bool Is64Bit = OpSize == 8;
3446 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3447 : AMDGPU::V_MOV_B32_e32
3448 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3449 : AMDGPU::S_MOV_B32;
3450 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)),
3456 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3463 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3466 UseMI.getOperand(0).setSubReg(0);
3469 UseMI.getOperand(0).setReg(DstReg);
3479 UseMI.setDesc(NewMCID);
3480 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3485 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3486 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3487 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3488 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3489 Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3504 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3505 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3507 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3508 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3509 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3517 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3518 if (!RegSrc->
isReg())
3536 if (Def && Def->isMoveImmediate() &&
3541 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3543 : AMDGPU::V_FMAMK_F16)
3544 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3551 if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3554 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3560 unsigned SrcSubReg = RegSrc->
getSubReg();
3565 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3566 Opc == AMDGPU::V_FMAC_F32_e64 ||
3567 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3568 UseMI.untieRegOperand(
3571 Src1->ChangeToImmediate(Imm);
3576 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3578 DefMI.eraseFromParent();
3588 bool Src0Inlined =
false;
3589 if (Src0->
isReg()) {
3594 if (Def && Def->isMoveImmediate() &&
3606 if (Src1->
isReg() && !Src0Inlined) {
3609 if (Def && Def->isMoveImmediate() &&
3620 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3622 : AMDGPU::V_FMAAK_F16)
3623 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3630 if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3636 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3637 Opc == AMDGPU::V_FMAC_F32_e64 ||
3638 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3639 UseMI.untieRegOperand(
3653 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3655 DefMI.eraseFromParent();
3667 if (BaseOps1.
size() != BaseOps2.
size())
3669 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3670 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3678 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3679 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3680 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3682 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3685bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3688 int64_t Offset0, Offset1;
3690 bool Offset0IsScalable, Offset1IsScalable;
3712 "MIa must load from or modify a memory location");
3714 "MIb must load from or modify a memory location");
3733 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3740 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3750 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3764 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3775 if (Reg.isPhysical())
3777 auto *Def =
MRI.getUniqueVRegDef(Reg);
3779 Imm = Def->getOperand(1).getImm();
3799 unsigned NumOps =
MI.getNumOperands();
3800 for (
unsigned I = 1;
I < NumOps; ++
I) {
3802 if (
Op.isReg() &&
Op.isKill())
3810 case AMDGPU::V_MAC_F16_e32:
3811 case AMDGPU::V_MAC_F16_e64:
3812 return AMDGPU::V_MAD_F16_e64;
3813 case AMDGPU::V_MAC_F32_e32:
3814 case AMDGPU::V_MAC_F32_e64:
3815 return AMDGPU::V_MAD_F32_e64;
3816 case AMDGPU::V_MAC_LEGACY_F32_e32:
3817 case AMDGPU::V_MAC_LEGACY_F32_e64:
3818 return AMDGPU::V_MAD_LEGACY_F32_e64;
3819 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3820 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3821 return AMDGPU::V_FMA_LEGACY_F32_e64;
3822 case AMDGPU::V_FMAC_F16_e32:
3823 case AMDGPU::V_FMAC_F16_e64:
3824 case AMDGPU::V_FMAC_F16_fake16_e64:
3825 return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3826 : AMDGPU::V_FMA_F16_gfx9_e64;
3827 case AMDGPU::V_FMAC_F32_e32:
3828 case AMDGPU::V_FMAC_F32_e64:
3829 return AMDGPU::V_FMA_F32_e64;
3830 case AMDGPU::V_FMAC_F64_e32:
3831 case AMDGPU::V_FMAC_F64_e64:
3832 return AMDGPU::V_FMA_F64_e64;
3842 unsigned Opc =
MI.getOpcode();
3846 if (NewMFMAOpc != -1) {
3849 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3850 MIB.
add(
MI.getOperand(
I));
3856 if (Def.isEarlyClobber() && Def.isReg() &&
3861 auto UpdateDefIndex = [&](
LiveRange &LR) {
3862 auto *S = LR.
find(OldIndex);
3863 if (S != LR.end() && S->start == OldIndex) {
3864 assert(S->valno && S->valno->def == OldIndex);
3865 S->start = NewIndex;
3866 S->valno->def = NewIndex;
3870 for (
auto &SR : LI.subranges())
3881 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3892 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3893 "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3897 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3898 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3899 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3900 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3901 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3902 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3903 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3904 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3905 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3906 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3907 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3908 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3909 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3910 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3911 bool Src0Literal =
false;
3916 case AMDGPU::V_MAC_F16_e64:
3917 case AMDGPU::V_FMAC_F16_e64:
3918 case AMDGPU::V_FMAC_F16_fake16_e64:
3919 case AMDGPU::V_MAC_F32_e64:
3920 case AMDGPU::V_MAC_LEGACY_F32_e64:
3921 case AMDGPU::V_FMAC_F32_e64:
3922 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3923 case AMDGPU::V_FMAC_F64_e64:
3925 case AMDGPU::V_MAC_F16_e32:
3926 case AMDGPU::V_FMAC_F16_e32:
3927 case AMDGPU::V_MAC_F32_e32:
3928 case AMDGPU::V_MAC_LEGACY_F32_e32:
3929 case AMDGPU::V_FMAC_F32_e32:
3930 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3931 case AMDGPU::V_FMAC_F64_e32: {
3933 AMDGPU::OpName::src0);
3960 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3966 const auto killDef = [&]() ->
void {
3971 if (
MRI.hasOneNonDBGUse(DefReg)) {
3988 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
3990 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
3991 MIOp.setIsUndef(
true);
3992 MIOp.setReg(DummyReg);
4004 : AMDGPU::V_FMAAK_F16)
4005 : AMDGPU::V_FMAAK_F32)
4006 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4023 : AMDGPU::V_FMAMK_F16)
4024 : AMDGPU::V_FMAMK_F32)
4025 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4090 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4101 switch (
MI.getOpcode()) {
4102 case AMDGPU::S_SET_GPR_IDX_ON:
4103 case AMDGPU::S_SET_GPR_IDX_MODE:
4104 case AMDGPU::S_SET_GPR_IDX_OFF:
4122 if (
MI.isTerminator() ||
MI.isPosition())
4126 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4129 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4135 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4136 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4137 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4138 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4143 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4154 unsigned Opcode =
MI.getOpcode();
4169 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4170 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4171 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4174 if (
MI.isCall() ||
MI.isInlineAsm())
4190 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4191 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4192 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4193 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4201 if (
MI.isMetaInstruction())
4205 if (
MI.isCopyLike()) {
4210 return MI.readsRegister(AMDGPU::EXEC, &RI);
4221 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4225 switch (Imm.getBitWidth()) {
4245 APInt IntImm = Imm.bitcastToAPInt();
4265 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4274 int64_t Imm = MO.
getImm();
4275 switch (OperandType) {
4288 int32_t Trunc =
static_cast<int32_t
>(Imm);
4328 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4333 int16_t Trunc =
static_cast<int16_t
>(Imm);
4344 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4345 int16_t Trunc =
static_cast<int16_t
>(Imm);
4405 AMDGPU::OpName::src2))
4421 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4441 return Mods && Mods->
getImm();
4454 switch (
MI.getOpcode()) {
4455 default:
return false;
4457 case AMDGPU::V_ADDC_U32_e64:
4458 case AMDGPU::V_SUBB_U32_e64:
4459 case AMDGPU::V_SUBBREV_U32_e64: {
4467 case AMDGPU::V_MAC_F16_e64:
4468 case AMDGPU::V_MAC_F32_e64:
4469 case AMDGPU::V_MAC_LEGACY_F32_e64:
4470 case AMDGPU::V_FMAC_F16_e64:
4471 case AMDGPU::V_FMAC_F16_fake16_e64:
4472 case AMDGPU::V_FMAC_F32_e64:
4473 case AMDGPU::V_FMAC_F64_e64:
4474 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4480 case AMDGPU::V_CNDMASK_B32_e64:
4516 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4525 unsigned Op32)
const {
4539 Inst32.
add(
MI.getOperand(
I));
4543 int Idx =
MI.getNumExplicitDefs();
4545 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4583 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4588 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4589 MO.
getReg() == AMDGPU::VCC_LO;
4591 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4592 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4601 switch (MO.getReg()) {
4603 case AMDGPU::VCC_LO:
4604 case AMDGPU::VCC_HI:
4606 case AMDGPU::FLAT_SCR:
4619 switch (
MI.getOpcode()) {
4620 case AMDGPU::V_READLANE_B32:
4621 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4622 case AMDGPU::V_WRITELANE_B32:
4623 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4630 if (
MI.isPreISelOpcode() ||
4631 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4646 if (
SubReg.getReg().isPhysical())
4649 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4661 ErrInfo =
"illegal copy from vector register to SGPR";
4679 if (!
MRI.isSSA() &&
MI.isCopy())
4680 return verifyCopy(
MI,
MRI, ErrInfo);
4682 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4689 if (Src0Idx == -1) {
4699 if (!
Desc.isVariadic() &&
4700 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4701 ErrInfo =
"Instruction has wrong number of operands.";
4705 if (
MI.isInlineAsm()) {
4718 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4719 ErrInfo =
"inlineasm operand has incorrect register class.";
4727 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4728 ErrInfo =
"missing memory operand from image instruction.";
4733 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4736 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4737 "all fp values to integers.";
4741 int RegClass =
Desc.operands()[i].RegClass;
4743 switch (
Desc.operands()[i].OperandType) {
4745 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4746 ErrInfo =
"Illegal immediate value for operand.";
4767 ErrInfo =
"Illegal immediate value for operand.";
4774 ErrInfo =
"Expected inline constant for operand.";
4783 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4784 ErrInfo =
"Expected immediate, but got non-immediate";
4806 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
4815 ErrInfo =
"Subtarget requires even aligned vector registers";
4820 if (RegClass != -1) {
4821 if (Reg.isVirtual())
4826 ErrInfo =
"Operand has incorrect register class.";
4835 ErrInfo =
"SDWA is not supported on this target";
4841 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4849 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4856 "Only reg allowed as operands in SDWA instructions on GFX9+";
4865 if (OMod !=
nullptr &&
4867 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4872 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4873 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4874 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4875 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4878 unsigned Mods = Src0ModsMO->
getImm();
4881 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4887 if (
isVOPC(BasicOpcode)) {
4891 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4892 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4898 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4899 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4905 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4906 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4913 if (DstUnused && DstUnused->isImm() &&
4916 if (!Dst.isReg() || !Dst.isTied()) {
4917 ErrInfo =
"Dst register should have tied register";
4922 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4925 "Dst register should be tied to implicit use of preserved register";
4929 ErrInfo =
"Dst register should use same physical register as preserved";
4961 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4962 if (RegCount > DstSize) {
4963 ErrInfo =
"Image instruction returns too many registers for dst "
4972 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4973 unsigned ConstantBusCount = 0;
4974 bool UsesLiteral =
false;
4981 LiteralVal = &
MI.getOperand(ImmIdx);
4990 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5001 }
else if (!MO.
isFI()) {
5008 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5018 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5019 return !RI.regsOverlap(SGPRUsed, SGPR);
5029 Opcode != AMDGPU::V_WRITELANE_B32) {
5030 ErrInfo =
"VOP* instruction violates constant bus restriction";
5035 ErrInfo =
"VOP3 instruction uses literal";
5042 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5043 unsigned SGPRCount = 0;
5046 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5054 if (MO.
getReg() != SGPRUsed)
5060 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5067 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5068 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5075 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5085 ErrInfo =
"ABS not allowed in VOP3B instructions";
5098 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5105 if (
Desc.isBranch()) {
5107 ErrInfo =
"invalid branch target for SOPK instruction";
5113 if (!isUInt<16>(Imm)) {
5114 ErrInfo =
"invalid immediate for SOPK instruction";
5118 if (!isInt<16>(Imm)) {
5119 ErrInfo =
"invalid immediate for SOPK instruction";
5126 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5127 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5128 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5129 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5130 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5131 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5133 const unsigned StaticNumOps =
5134 Desc.getNumOperands() +
Desc.implicit_uses().size();
5135 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5140 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5141 ErrInfo =
"missing implicit register operands";
5147 if (!Dst->isUse()) {
5148 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5153 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5154 UseOpIdx != StaticNumOps + 1) {
5155 ErrInfo =
"movrel implicit operands should be tied";
5162 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5164 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5165 ErrInfo =
"src0 should be subreg of implicit vector use";
5173 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5174 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5180 if (
MI.mayStore() &&
5185 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5186 ErrInfo =
"scalar stores must use m0 as offset register";
5194 if (
Offset->getImm() != 0) {
5195 ErrInfo =
"subtarget does not support offsets in flat instructions";
5202 if (GDSOp && GDSOp->
getImm() != 0) {
5203 ErrInfo =
"GDS is not supported on this subtarget";
5212 AMDGPU::OpName::vaddr0);
5214 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5223 ErrInfo =
"dim is out of range";
5230 IsA16 = R128A16->
getImm() != 0;
5231 }
else if (ST.
hasA16()) {
5233 IsA16 = A16->
getImm() != 0;
5236 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5238 unsigned AddrWords =
5241 unsigned VAddrWords;
5243 VAddrWords = RsrcIdx - VAddr0Idx;
5246 unsigned LastVAddrIdx = RsrcIdx - 1;
5247 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5255 if (VAddrWords != AddrWords) {
5257 <<
" but got " << VAddrWords <<
"\n");
5258 ErrInfo =
"bad vaddr size";
5266 using namespace AMDGPU::DPP;
5268 unsigned DC = DppCt->
getImm();
5269 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5270 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5271 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5272 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5273 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5274 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5275 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5276 ErrInfo =
"Invalid dpp_ctrl value";
5279 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5281 ErrInfo =
"Invalid dpp_ctrl value: "
5282 "wavefront shifts are not supported on GFX10+";
5285 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5287 ErrInfo =
"Invalid dpp_ctrl value: "
5288 "broadcasts are not supported on GFX10+";
5291 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5293 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5294 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5296 ErrInfo =
"Invalid dpp_ctrl value: "
5297 "row_newbroadcast/row_share is not supported before "
5302 ErrInfo =
"Invalid dpp_ctrl value: "
5303 "row_share and row_xmask are not supported before GFX10";
5308 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5310 ErrInfo =
"Invalid dpp_ctrl value: "
5311 "DP ALU dpp only support row_newbcast";
5318 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5319 : AMDGPU::OpName::vdata;
5328 ErrInfo =
"Invalid register class: "
5329 "vdata and vdst should be both VGPR or AGPR";
5332 if (
Data && Data2 &&
5334 ErrInfo =
"Invalid register class: "
5335 "both data operands should be VGPR or AGPR";
5339 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5342 ErrInfo =
"Invalid register class: "
5343 "agpr loads and stores not supported on this GPU";
5350 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5355 if (Reg.isPhysical())
5362 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5363 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5364 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5366 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5367 ErrInfo =
"Subtarget requires even aligned vector registers "
5368 "for DS_GWS instructions";
5374 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5375 ErrInfo =
"Subtarget requires even aligned vector registers "
5376 "for vaddr operand of image instructions";
5382 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5385 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5386 ErrInfo =
"Invalid register class: "
5387 "v_accvgpr_write with an SGPR is not supported on this GPU";
5392 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5395 ErrInfo =
"pseudo expects only physical SGPRs";
5407 switch (
MI.getOpcode()) {
5408 default:
return AMDGPU::INSTRUCTION_LIST_END;
5409 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5410 case AMDGPU::COPY:
return AMDGPU::COPY;
5411 case AMDGPU::PHI:
return AMDGPU::PHI;
5412 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5413 case AMDGPU::WQM:
return AMDGPU::WQM;
5414 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5415 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5416 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5417 case AMDGPU::S_MOV_B32: {
5419 return MI.getOperand(1).isReg() ||
5421 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5423 case AMDGPU::S_ADD_I32:
5424 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5425 case AMDGPU::S_ADDC_U32:
5426 return AMDGPU::V_ADDC_U32_e32;
5427 case AMDGPU::S_SUB_I32:
5428 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5431 case AMDGPU::S_ADD_U32:
5432 return AMDGPU::V_ADD_CO_U32_e32;
5433 case AMDGPU::S_SUB_U32:
5434 return AMDGPU::V_SUB_CO_U32_e32;
5435 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5436 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5437 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5438 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5439 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5440 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5441 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5442 case AMDGPU::S_XNOR_B32:
5443 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5444 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5445 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5446 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5447 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5448 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5449 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5450 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5451 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5452 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5453 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5454 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5455 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5456 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5457 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5458 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5459 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5460 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5461 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5462 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5463 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5464 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5465 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5466 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5467 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5468 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5469 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5470 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5471 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5472 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5473 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5474 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5475 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5476 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5477 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5478 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5479 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5480 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5481 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5482 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5483 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5484 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5485 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5486 case AMDGPU::S_CVT_F32_F16:
5487 case AMDGPU::S_CVT_HI_F32_F16:
5489 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5490 case AMDGPU::S_CVT_F16_F32:
5492 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5493 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5494 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5495 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5496 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5497 case AMDGPU::S_CEIL_F16:
5499 : AMDGPU::V_CEIL_F16_fake16_e64;
5500 case AMDGPU::S_FLOOR_F16:
5502 : AMDGPU::V_FLOOR_F16_fake16_e64;
5503 case AMDGPU::S_TRUNC_F16:
5504 return AMDGPU::V_TRUNC_F16_fake16_e64;
5505 case AMDGPU::S_RNDNE_F16:
5506 return AMDGPU::V_RNDNE_F16_fake16_e64;
5507 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5508 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5509 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5510 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5511 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5512 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5513 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5514 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5515 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5516 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5517 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5518 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5519 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5520 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5521 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5522 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5523 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_fake16_e64;
5524 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5525 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5526 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5527 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5528 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5529 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5530 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5531 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5532 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5533 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5534 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5535 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5536 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5537 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5538 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5539 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5540 case AMDGPU::S_CMP_LT_F16:
5542 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5543 case AMDGPU::S_CMP_EQ_F16:
5545 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5546 case AMDGPU::S_CMP_LE_F16:
5548 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5549 case AMDGPU::S_CMP_GT_F16:
5551 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5552 case AMDGPU::S_CMP_LG_F16:
5554 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5555 case AMDGPU::S_CMP_GE_F16:
5557 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5558 case AMDGPU::S_CMP_O_F16:
5560 : AMDGPU::V_CMP_O_F16_fake16_e64;
5561 case AMDGPU::S_CMP_U_F16:
5563 : AMDGPU::V_CMP_U_F16_fake16_e64;
5564 case AMDGPU::S_CMP_NGE_F16:
5566 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5567 case AMDGPU::S_CMP_NLG_F16:
5569 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5570 case AMDGPU::S_CMP_NGT_F16:
5572 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5573 case AMDGPU::S_CMP_NLE_F16:
5575 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5576 case AMDGPU::S_CMP_NEQ_F16:
5578 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5579 case AMDGPU::S_CMP_NLT_F16:
5581 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5582 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5583 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5584 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5585 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5586 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5587 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5588 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5589 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5590 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5591 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5594 "Unexpected scalar opcode without corresponding vector one!");
5607 bool IsWave32 = ST.isWave32();
5612 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5613 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5622 const unsigned OrSaveExec =
5623 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5636 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5638 auto ExecRestoreMI =
5648 bool IsAllocatable) {
5649 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5654 case AMDGPU::AV_32RegClassID:
5655 RCID = AMDGPU::VGPR_32RegClassID;
5657 case AMDGPU::AV_64RegClassID:
5658 RCID = AMDGPU::VReg_64RegClassID;
5660 case AMDGPU::AV_96RegClassID:
5661 RCID = AMDGPU::VReg_96RegClassID;
5663 case AMDGPU::AV_128RegClassID:
5664 RCID = AMDGPU::VReg_128RegClassID;
5666 case AMDGPU::AV_160RegClassID:
5667 RCID = AMDGPU::VReg_160RegClassID;
5669 case AMDGPU::AV_512RegClassID:
5670 RCID = AMDGPU::VReg_512RegClassID;
5686 auto RegClass = TID.
operands()[OpNum].RegClass;
5687 bool IsAllocatable =
false;
5697 AMDGPU::OpName::vdst);
5700 : AMDGPU::OpName::vdata);
5701 if (DataIdx != -1) {
5703 TID.
Opcode, AMDGPU::OpName::data1);
5711 unsigned OpNo)
const {
5714 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5715 Desc.operands()[OpNo].RegClass == -1) {
5718 if (Reg.isVirtual())
5719 return MRI.getRegClass(Reg);
5720 return RI.getPhysRegBaseClass(Reg);
5723 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5732 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5734 unsigned Size = RI.getRegSizeInBits(*RC);
5735 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
5736 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
5737 : AMDGPU::V_MOV_B32_e32;
5739 Opcode = AMDGPU::COPY;
5741 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5755 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
5761 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5772 if (SubIdx == AMDGPU::sub0)
5774 if (SubIdx == AMDGPU::sub1)
5786void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5802 if (Reg.isPhysical())
5813 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5840 MO = &
MI.getOperand(OpIdx);
5852 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5858 if (!SGPRsUsed.
count(SGPR) &&
5861 if (--ConstantBusLimit <= 0)
5867 if (!LiteralLimit--)
5869 if (--ConstantBusLimit <= 0)
5887 unsigned Opc =
MI.getOpcode();
5895 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5896 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5897 MI.getOperand(DataIdx).isReg() &&
5898 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5900 if ((
int)OpIdx == DataIdx) {
5901 if (VDstIdx != -1 &&
5902 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5906 AMDGPU::OpName::data1);
5907 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5908 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5911 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5921 bool Is64BitOp = Is64BitFPOp ||
5934 if (!Is64BitFPOp && (int32_t)Imm < 0)
5952 unsigned Opc =
MI.getOpcode();
5971 if (Opc == AMDGPU::V_WRITELANE_B32) {
5974 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5980 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5997 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5999 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6011 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6013 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6025 if (HasImplicitSGPR || !
MI.isCommutable()) {
6042 if (CommutedOpc == -1) {
6047 MI.setDesc(
get(CommutedOpc));
6051 bool Src0Kill = Src0.
isKill();
6055 else if (Src1.
isReg()) {
6070 unsigned Opc =
MI.getOpcode();
6078 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6079 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
6085 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6091 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6102 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6104 SGPRsUsed.
insert(SGPRReg);
6108 for (
int Idx : VOP3Idx) {
6117 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6142 if (ConstantBusLimit > 0) {
6154 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6155 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6165 SRC = RI.getCommonSubClass(SRC, DstRC);
6168 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6172 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6174 get(TargetOpcode::COPY), NewSrcReg)
6181 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6187 for (
unsigned i = 0; i < SubRegs; ++i) {
6188 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6190 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6197 get(AMDGPU::REG_SEQUENCE), DstReg);
6198 for (
unsigned i = 0; i < SubRegs; ++i) {
6213 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6215 SBase->setReg(SGPR);
6227 if (OldSAddrIdx < 0)
6244 if (NewVAddrIdx < 0)
6251 if (OldVAddrIdx >= 0) {
6253 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6254 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6265 if (OldVAddrIdx == NewVAddrIdx) {
6268 MRI.removeRegOperandFromUseList(&NewVAddr);
6269 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6273 MRI.removeRegOperandFromUseList(&NewVAddr);
6274 MRI.addRegOperandToUseList(&NewVAddr);
6276 assert(OldSAddrIdx == NewVAddrIdx);
6278 if (OldVAddrIdx >= 0) {
6280 AMDGPU::OpName::vdst_in);
6284 if (NewVDstIn != -1) {
6291 if (NewVDstIn != -1) {
6333 unsigned OpSubReg =
Op.getSubReg();
6342 Register DstReg =
MRI.createVirtualRegister(DstRC);
6352 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6355 bool ImpDef = Def->isImplicitDef();
6356 while (!ImpDef && Def && Def->isCopy()) {
6357 if (Def->getOperand(1).getReg().isPhysical())
6359 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6360 ImpDef = Def && Def->isImplicitDef();
6362 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6380 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6381 unsigned SaveExecOpc =
6382 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6383 unsigned XorTermOpc =
6384 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6386 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6387 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6393 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6394 unsigned NumSubRegs =
RegSize / 32;
6395 Register VScalarOp = ScalarOp->getReg();
6397 if (NumSubRegs == 1) {
6398 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6400 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6403 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6405 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6411 CondReg = NewCondReg;
6413 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6421 ScalarOp->setReg(CurReg);
6422 ScalarOp->setIsKill();
6426 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6427 "Unhandled register size");
6429 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6430 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6431 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6434 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6435 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6438 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6439 .
addReg(VScalarOp, VScalarOpUndef,
6440 TRI->getSubRegFromChannel(
Idx + 1));
6446 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6447 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6453 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6454 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6457 if (NumSubRegs <= 2)
6458 Cmp.addReg(VScalarOp);
6460 Cmp.addReg(VScalarOp, VScalarOpUndef,
6461 TRI->getSubRegFromChannel(
Idx, 2));
6465 CondReg = NewCondReg;
6467 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6475 const auto *SScalarOpRC =
6476 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6477 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6481 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6482 unsigned Channel = 0;
6483 for (
Register Piece : ReadlanePieces) {
6484 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6488 ScalarOp->setReg(SScalarOp);
6489 ScalarOp->setIsKill();
6493 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6494 MRI.setSimpleHint(SaveExec, CondReg);
6525 if (!Begin.isValid())
6527 if (!
End.isValid()) {
6532 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6533 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6534 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6543 std::numeric_limits<unsigned>::max()) !=
6546 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6552 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6561 for (
auto I = Begin;
I != AfterMI;
I++) {
6562 for (
auto &MO :
I->all_uses())
6563 MRI.clearKillFlags(MO.getReg());
6598 for (
auto &Succ : RemainderBB->
successors()) {
6621static std::tuple<unsigned, unsigned>
6629 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6630 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6633 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6634 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6635 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6636 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6637 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6654 .
addImm(AMDGPU::sub0_sub1)
6660 return std::tuple(RsrcPtr, NewSRsrc);
6697 if (
MI.getOpcode() == AMDGPU::PHI) {
6699 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6700 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6703 MRI.getRegClass(
MI.getOperand(i).getReg());
6718 VRC = &AMDGPU::VReg_1RegClass;
6734 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6736 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6752 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6759 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6761 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6779 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6784 if (DstRC != Src0RC) {
6793 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6801 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6802 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6803 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6804 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6805 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6806 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6807 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
6822 : AMDGPU::OpName::srsrc;
6827 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6836 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6842 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6843 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6848 while (Start->getOpcode() != FrameSetupOpcode)
6851 while (
End->getOpcode() != FrameDestroyOpcode)
6855 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6856 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6864 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6866 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6877 bool isSoffsetLegal =
true;
6880 if (SoffsetIdx != -1) {
6884 isSoffsetLegal =
false;
6888 bool isRsrcLegal =
true;
6891 if (RsrcIdx != -1) {
6894 isRsrcLegal =
false;
6898 if (isRsrcLegal && isSoffsetLegal)
6922 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6923 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6924 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6927 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6928 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6930 unsigned RsrcPtr, NewSRsrc;
6937 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6944 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6962 "FIXME: Need to emit flat atomics here");
6964 unsigned RsrcPtr, NewSRsrc;
6967 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6990 MIB.
addImm(CPol->getImm());
6995 MIB.
addImm(TFE->getImm());
7015 MI.removeFromParent();
7020 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7022 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7026 if (!isSoffsetLegal) {
7038 if (!isSoffsetLegal) {
7047 InstrList.insert(
MI);
7051 if (RsrcIdx != -1) {
7052 DeferredList.insert(
MI);
7057 return DeferredList.contains(
MI);
7063 while (!Worklist.
empty()) {
7077 "Deferred MachineInstr are not supposed to re-populate worklist");
7095 case AMDGPU::S_ADD_U64_PSEUDO:
7096 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
7098 case AMDGPU::S_SUB_U64_PSEUDO:
7099 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
7101 case AMDGPU::S_ADD_I32:
7102 case AMDGPU::S_SUB_I32: {
7106 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7114 case AMDGPU::S_MUL_U64:
7116 splitScalarSMulU64(Worklist, Inst, MDT);
7120 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7121 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7124 splitScalarSMulPseudo(Worklist, Inst, MDT);
7128 case AMDGPU::S_AND_B64:
7129 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7133 case AMDGPU::S_OR_B64:
7134 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7138 case AMDGPU::S_XOR_B64:
7139 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7143 case AMDGPU::S_NAND_B64:
7144 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7148 case AMDGPU::S_NOR_B64:
7149 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7153 case AMDGPU::S_XNOR_B64:
7155 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7157 splitScalar64BitXnor(Worklist, Inst, MDT);
7161 case AMDGPU::S_ANDN2_B64:
7162 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7166 case AMDGPU::S_ORN2_B64:
7167 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7171 case AMDGPU::S_BREV_B64:
7172 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7176 case AMDGPU::S_NOT_B64:
7177 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7181 case AMDGPU::S_BCNT1_I32_B64:
7182 splitScalar64BitBCNT(Worklist, Inst);
7186 case AMDGPU::S_BFE_I64:
7187 splitScalar64BitBFE(Worklist, Inst);
7191 case AMDGPU::S_FLBIT_I32_B64:
7192 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7195 case AMDGPU::S_FF1_I32_B64:
7196 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7200 case AMDGPU::S_LSHL_B32:
7202 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7206 case AMDGPU::S_ASHR_I32:
7208 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7212 case AMDGPU::S_LSHR_B32:
7214 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7218 case AMDGPU::S_LSHL_B64:
7221 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7222 : AMDGPU::V_LSHLREV_B64_e64;
7226 case AMDGPU::S_ASHR_I64:
7228 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7232 case AMDGPU::S_LSHR_B64:
7234 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7239 case AMDGPU::S_ABS_I32:
7240 lowerScalarAbs(Worklist, Inst);
7244 case AMDGPU::S_CBRANCH_SCC0:
7245 case AMDGPU::S_CBRANCH_SCC1: {
7248 bool IsSCC = CondReg == AMDGPU::SCC;
7251 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7254 .
addReg(IsSCC ? VCC : CondReg);
7258 case AMDGPU::S_BFE_U64:
7259 case AMDGPU::S_BFM_B64:
7262 case AMDGPU::S_PACK_LL_B32_B16:
7263 case AMDGPU::S_PACK_LH_B32_B16:
7264 case AMDGPU::S_PACK_HL_B32_B16:
7265 case AMDGPU::S_PACK_HH_B32_B16:
7266 movePackToVALU(Worklist,
MRI, Inst);
7270 case AMDGPU::S_XNOR_B32:
7271 lowerScalarXnor(Worklist, Inst);
7275 case AMDGPU::S_NAND_B32:
7276 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7280 case AMDGPU::S_NOR_B32:
7281 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7285 case AMDGPU::S_ANDN2_B32:
7286 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7290 case AMDGPU::S_ORN2_B32:
7291 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7299 case AMDGPU::S_ADD_CO_PSEUDO:
7300 case AMDGPU::S_SUB_CO_PSEUDO: {
7301 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7302 ? AMDGPU::V_ADDC_U32_e64
7303 : AMDGPU::V_SUBB_U32_e64;
7307 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7308 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7326 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7330 case AMDGPU::S_UADDO_PSEUDO:
7331 case AMDGPU::S_USUBO_PSEUDO: {
7338 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7339 ? AMDGPU::V_ADD_CO_U32_e64
7340 : AMDGPU::V_SUB_CO_U32_e64;
7343 Register DestReg =
MRI.createVirtualRegister(NewRC);
7351 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7358 case AMDGPU::S_CSELECT_B32:
7359 case AMDGPU::S_CSELECT_B64:
7360 lowerSelect(Worklist, Inst, MDT);
7363 case AMDGPU::S_CMP_EQ_I32:
7364 case AMDGPU::S_CMP_LG_I32:
7365 case AMDGPU::S_CMP_GT_I32:
7366 case AMDGPU::S_CMP_GE_I32:
7367 case AMDGPU::S_CMP_LT_I32:
7368 case AMDGPU::S_CMP_LE_I32:
7369 case AMDGPU::S_CMP_EQ_U32:
7370 case AMDGPU::S_CMP_LG_U32:
7371 case AMDGPU::S_CMP_GT_U32:
7372 case AMDGPU::S_CMP_GE_U32:
7373 case AMDGPU::S_CMP_LT_U32:
7374 case AMDGPU::S_CMP_LE_U32:
7375 case AMDGPU::S_CMP_EQ_U64:
7376 case AMDGPU::S_CMP_LG_U64:
7377 case AMDGPU::S_CMP_LT_F32:
7378 case AMDGPU::S_CMP_EQ_F32:
7379 case AMDGPU::S_CMP_LE_F32:
7380 case AMDGPU::S_CMP_GT_F32:
7381 case AMDGPU::S_CMP_LG_F32:
7382 case AMDGPU::S_CMP_GE_F32:
7383 case AMDGPU::S_CMP_O_F32:
7384 case AMDGPU::S_CMP_U_F32:
7385 case AMDGPU::S_CMP_NGE_F32:
7386 case AMDGPU::S_CMP_NLG_F32:
7387 case AMDGPU::S_CMP_NGT_F32:
7388 case AMDGPU::S_CMP_NLE_F32:
7389 case AMDGPU::S_CMP_NEQ_F32:
7390 case AMDGPU::S_CMP_NLT_F32: {
7409 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7413 case AMDGPU::S_CMP_LT_F16:
7414 case AMDGPU::S_CMP_EQ_F16:
7415 case AMDGPU::S_CMP_LE_F16:
7416 case AMDGPU::S_CMP_GT_F16:
7417 case AMDGPU::S_CMP_LG_F16:
7418 case AMDGPU::S_CMP_GE_F16:
7419 case AMDGPU::S_CMP_O_F16:
7420 case AMDGPU::S_CMP_U_F16:
7421 case AMDGPU::S_CMP_NGE_F16:
7422 case AMDGPU::S_CMP_NLG_F16:
7423 case AMDGPU::S_CMP_NGT_F16:
7424 case AMDGPU::S_CMP_NLE_F16:
7425 case AMDGPU::S_CMP_NEQ_F16:
7426 case AMDGPU::S_CMP_NLT_F16: {
7448 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7452 case AMDGPU::S_CVT_HI_F32_F16: {
7454 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7455 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7461 .
addReg(TmpReg, 0, AMDGPU::hi16)
7477 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7481 case AMDGPU::S_MINIMUM_F32:
7482 case AMDGPU::S_MAXIMUM_F32:
7483 case AMDGPU::S_MINIMUM_F16:
7484 case AMDGPU::S_MAXIMUM_F16: {
7486 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7497 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7503 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7511 if (NewOpcode == Opcode) {
7535 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7547 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7548 MRI.replaceRegWith(DstReg, NewDstReg);
7550 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7561 AMDGPU::OpName::src0_modifiers) >= 0)
7566 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7567 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7569 NewInstr->addOperand(Src);
7572 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7575 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7577 NewInstr.addImm(
Size);
7578 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7582 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7587 "Scalar BFE is only implemented for constant width and offset");
7596 AMDGPU::OpName::src1_modifiers) >= 0)
7601 AMDGPU::OpName::src2_modifiers) >= 0)
7615 NewInstr->addOperand(
Op);
7622 if (
Op.getReg() == AMDGPU::SCC) {
7624 if (
Op.isDef() && !
Op.isDead())
7625 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7627 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7632 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7633 Register DstReg = NewInstr->getOperand(0).getReg();
7638 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7639 MRI.replaceRegWith(DstReg, NewDstReg);
7645 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7649std::pair<bool, MachineBasicBlock *>
7661 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7664 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7666 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7667 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7675 MRI.replaceRegWith(OldDstReg, ResultReg);
7678 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7679 return std::pair(
true, NewBB);
7682 return std::pair(
false,
nullptr);
7699 bool IsSCC = (CondReg == AMDGPU::SCC);
7707 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7714 NewCondReg =
MRI.createVirtualRegister(TC);
7718 bool CopyFound =
false;
7722 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7724 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7726 .
addReg(CandI.getOperand(1).getReg());
7738 ST.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
7748 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7749 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7762 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7764 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7776 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7777 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7780 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7790 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7791 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7806 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7814 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7815 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7821 bool Src0IsSGPR = Src0.
isReg() &&
7823 bool Src1IsSGPR = Src1.
isReg() &&
7826 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7827 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7837 }
else if (Src1IsSGPR) {
7851 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7855 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7861 unsigned Opcode)
const {
7871 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7872 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7884 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7885 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7890 unsigned Opcode)
const {
7900 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7901 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7913 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7914 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7932 &AMDGPU::SGPR_32RegClass;
7935 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7938 AMDGPU::sub0, Src0SubRC);
7943 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7945 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7949 AMDGPU::sub1, Src0SubRC);
7951 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7957 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7964 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7966 Worklist.
insert(&LoHalf);
7967 Worklist.
insert(&HiHalf);
7973 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7984 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7985 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7986 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7997 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8001 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8031 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8037 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8043 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8054 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8070 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8082 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8093 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8094 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8095 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8106 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8110 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8122 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8123 ? AMDGPU::V_MUL_HI_U32_e64
8124 : AMDGPU::V_MUL_HI_I32_e64;
8139 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8147 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8166 &AMDGPU::SGPR_32RegClass;
8169 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8172 &AMDGPU::SGPR_32RegClass;
8175 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8178 AMDGPU::sub0, Src0SubRC);
8180 AMDGPU::sub0, Src1SubRC);
8182 AMDGPU::sub1, Src0SubRC);
8184 AMDGPU::sub1, Src1SubRC);
8189 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8191 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8196 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8201 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8208 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8210 Worklist.
insert(&LoHalf);
8211 Worklist.
insert(&HiHalf);
8214 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8232 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8248 Register NewDest =
MRI.createVirtualRegister(DestRC);
8254 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8272 MRI.getRegClass(Src.getReg()) :
8273 &AMDGPU::SGPR_32RegClass;
8275 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8276 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8279 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8282 AMDGPU::sub0, SrcSubRC);
8284 AMDGPU::sub1, SrcSubRC);
8290 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8294 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8313 Offset == 0 &&
"Not implemented");
8316 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8317 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8318 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8335 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8336 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8341 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8342 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8346 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8349 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8354 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8355 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8376 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8377 unsigned OpcodeAdd =
8378 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8381 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8383 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8390 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8391 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8392 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8393 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8400 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8406 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8408 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8410 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8413void SIInstrInfo::addUsersToMoveToVALUWorklist(
8417 E =
MRI.use_end();
I != E;) {
8422 switch (
UseMI.getOpcode()) {
8425 case AMDGPU::SOFT_WQM:
8426 case AMDGPU::STRICT_WWM:
8427 case AMDGPU::STRICT_WQM:
8428 case AMDGPU::REG_SEQUENCE:
8430 case AMDGPU::INSERT_SUBREG:
8433 OpNo =
I.getOperandNo();
8442 }
while (
I != E &&
I->getParent() == &
UseMI);
8452 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8459 case AMDGPU::S_PACK_LL_B32_B16: {
8460 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8461 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8478 case AMDGPU::S_PACK_LH_B32_B16: {
8479 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8488 case AMDGPU::S_PACK_HL_B32_B16: {
8489 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8499 case AMDGPU::S_PACK_HH_B32_B16: {
8500 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8501 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8518 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8519 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8528 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8529 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8537 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8541 Register DestReg =
MI.getOperand(0).getReg();
8543 MRI.replaceRegWith(DestReg, NewCond);
8548 MI.getOperand(SCCIdx).setReg(NewCond);
8554 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8557 for (
auto &Copy : CopyToDelete)
8558 Copy->eraseFromParent();
8566void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8575 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8577 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8594 case AMDGPU::REG_SEQUENCE:
8595 case AMDGPU::INSERT_SUBREG:
8597 case AMDGPU::SOFT_WQM:
8598 case AMDGPU::STRICT_WWM:
8599 case AMDGPU::STRICT_WQM: {
8607 case AMDGPU::REG_SEQUENCE:
8608 case AMDGPU::INSERT_SUBREG:
8618 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8635 int OpIndices[3])
const {
8654 for (
unsigned i = 0; i < 3; ++i) {
8655 int Idx = OpIndices[i];
8692 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8693 SGPRReg = UsedSGPRs[0];
8696 if (!SGPRReg && UsedSGPRs[1]) {
8697 if (UsedSGPRs[1] == UsedSGPRs[2])
8698 SGPRReg = UsedSGPRs[1];
8705 unsigned OperandName)
const {
8710 return &
MI.getOperand(
Idx);
8727 RsrcDataFormat |= (1ULL << 56);
8732 RsrcDataFormat |= (2ULL << 59);
8735 return RsrcDataFormat;
8757 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8763 unsigned Opc =
MI.getOpcode();
8769 return get(Opc).mayLoad() &&
8774 int &FrameIndex)
const {
8782 FrameIndex =
Addr->getIndex();
8787 int &FrameIndex)
const {
8790 FrameIndex =
Addr->getIndex();
8795 int &FrameIndex)
const {
8809 int &FrameIndex)
const {
8826 while (++
I != E &&
I->isInsideBundle()) {
8827 assert(!
I->isBundle() &&
"No nested bundle!");
8835 unsigned Opc =
MI.getOpcode();
8837 unsigned DescSize =
Desc.getSize();
8842 unsigned Size = DescSize;
8857 bool HasLiteral =
false;
8858 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8866 return HasLiteral ? DescSize + 4 : DescSize;
8876 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8880 case TargetOpcode::BUNDLE:
8882 case TargetOpcode::INLINEASM:
8883 case TargetOpcode::INLINEASM_BR: {
8885 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8889 if (
MI.isMetaInstruction())
8899 if (
MI.memoperands_empty())
8911 static const std::pair<int, const char *> TargetIndices[] = {
8949std::pair<unsigned, unsigned>
8956 static const std::pair<unsigned, const char *> TargetFlags[] = {
8971 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8985 return AMDGPU::WWM_COPY;
8987 return AMDGPU::COPY;
8999 bool IsNullOrVectorRegister =
true;
9007 return IsNullOrVectorRegister &&
9009 (Opcode == AMDGPU::IMPLICIT_DEF &&
9011 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9012 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9025 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
9056 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9057 case AMDGPU::SI_KILL_I1_TERMINATOR:
9066 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9067 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9068 case AMDGPU::SI_KILL_I1_PSEUDO:
9069 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9081 const unsigned OffsetBits =
9083 return (1 << OffsetBits) - 1;
9090 if (
MI.isInlineAsm())
9093 for (
auto &
Op :
MI.implicit_operands()) {
9094 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9095 Op.setReg(AMDGPU::VCC_LO);
9108 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9126 if (Imm <= MaxImm + 64) {
9128 Overflow = Imm - MaxImm;
9218std::pair<int64_t, int64_t>
9221 int64_t RemainderOffset = COffsetVal;
9222 int64_t ImmField = 0;
9227 if (AllowNegative) {
9229 int64_t
D = 1LL << NumBits;
9230 RemainderOffset = (COffsetVal /
D) *
D;
9231 ImmField = COffsetVal - RemainderOffset;
9235 (ImmField % 4) != 0) {
9237 RemainderOffset += ImmField % 4;
9238 ImmField -= ImmField % 4;
9240 }
else if (COffsetVal >= 0) {
9241 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9242 RemainderOffset = COffsetVal - ImmField;
9246 assert(RemainderOffset + ImmField == COffsetVal);
9247 return {ImmField, RemainderOffset};
9259 switch (ST.getGeneration()) {
9284 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9285 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9286 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9287 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9288 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9289 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9290 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9291 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9298#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9299 case OPCODE##_dpp: \
9300 case OPCODE##_e32: \
9301 case OPCODE##_e64: \
9302 case OPCODE##_e64_dpp: \
9317 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9318 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9319 case AMDGPU::V_FMA_F16_gfx9_e64:
9320 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9321 case AMDGPU::V_INTERP_P2_F16:
9322 case AMDGPU::V_MAD_F16_e64:
9323 case AMDGPU::V_MAD_U16_e64:
9324 case AMDGPU::V_MAD_I16_e64:
9359 if (
isMAI(Opcode)) {
9404 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9405 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9406 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9418 switch (
MI.getOpcode()) {
9420 case AMDGPU::REG_SEQUENCE:
9424 case AMDGPU::INSERT_SUBREG:
9425 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9442 if (!
P.Reg.isVirtual())
9446 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9447 while (
auto *
MI = DefInst) {
9449 switch (
MI->getOpcode()) {
9451 case AMDGPU::V_MOV_B32_e32: {
9452 auto &Op1 =
MI->getOperand(1);
9457 DefInst =
MRI.getVRegDef(RSR.Reg);
9465 DefInst =
MRI.getVRegDef(RSR.Reg);
9478 assert(
MRI.isSSA() &&
"Must be run on SSA");
9480 auto *
TRI =
MRI.getTargetRegisterInfo();
9481 auto *DefBB =
DefMI.getParent();
9485 if (
UseMI.getParent() != DefBB)
9488 const int MaxInstScan = 20;
9492 auto E =
UseMI.getIterator();
9493 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9494 if (
I->isDebugInstr())
9497 if (++NumInst > MaxInstScan)
9500 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9510 assert(
MRI.isSSA() &&
"Must be run on SSA");
9512 auto *
TRI =
MRI.getTargetRegisterInfo();
9513 auto *DefBB =
DefMI.getParent();
9515 const int MaxUseScan = 10;
9518 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9519 auto &UseInst = *
Use.getParent();
9522 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9525 if (++NumUse > MaxUseScan)
9532 const int MaxInstScan = 20;
9536 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9539 if (
I->isDebugInstr())
9542 if (++NumInst > MaxInstScan)
9555 if (Reg == VReg && --NumUse == 0)
9557 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9569 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9572 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9581 if (InsPt !=
MBB.
end() &&
9582 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9583 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9584 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9585 InsPt->definesRegister(Src,
nullptr)) {
9589 : AMDGPU::S_MOV_B64_term),
9591 .
addReg(Src, 0, SrcSubReg)
9616 if (isFullCopyInstr(
MI)) {
9625 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9629 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9640 unsigned *PredCost)
const {
9641 if (
MI.isBundle()) {
9644 unsigned Lat = 0, Count = 0;
9645 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9647 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9649 return Lat + Count - 1;
9652 return SchedModel.computeInstrLatency(&
MI);
9657 unsigned opcode =
MI.getOpcode();
9658 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9659 auto IID = GI->getIntrinsicID();
9666 case Intrinsic::amdgcn_if:
9667 case Intrinsic::amdgcn_else:
9681 if (opcode == AMDGPU::G_LOAD) {
9682 if (
MI.memoperands_empty())
9686 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9687 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9695 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9696 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9697 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9710 unsigned opcode =
MI.getOpcode();
9711 if (opcode == AMDGPU::V_READLANE_B32 ||
9712 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9713 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9716 if (isCopyInstr(
MI)) {
9720 RI.getPhysRegBaseClass(srcOp.
getReg());
9728 if (
MI.isPreISelOpcode())
9743 if (
MI.memoperands_empty())
9747 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9748 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9763 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9769 if (!Reg || !
SrcOp.readsReg())
9775 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9812 Register &SrcReg2, int64_t &CmpMask,
9813 int64_t &CmpValue)
const {
9814 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9817 switch (
MI.getOpcode()) {
9820 case AMDGPU::S_CMP_EQ_U32:
9821 case AMDGPU::S_CMP_EQ_I32:
9822 case AMDGPU::S_CMP_LG_U32:
9823 case AMDGPU::S_CMP_LG_I32:
9824 case AMDGPU::S_CMP_LT_U32:
9825 case AMDGPU::S_CMP_LT_I32:
9826 case AMDGPU::S_CMP_GT_U32:
9827 case AMDGPU::S_CMP_GT_I32:
9828 case AMDGPU::S_CMP_LE_U32:
9829 case AMDGPU::S_CMP_LE_I32:
9830 case AMDGPU::S_CMP_GE_U32:
9831 case AMDGPU::S_CMP_GE_I32:
9832 case AMDGPU::S_CMP_EQ_U64:
9833 case AMDGPU::S_CMP_LG_U64:
9834 SrcReg =
MI.getOperand(0).getReg();
9835 if (
MI.getOperand(1).isReg()) {
9836 if (
MI.getOperand(1).getSubReg())
9838 SrcReg2 =
MI.getOperand(1).getReg();
9840 }
else if (
MI.getOperand(1).isImm()) {
9842 CmpValue =
MI.getOperand(1).getImm();
9848 case AMDGPU::S_CMPK_EQ_U32:
9849 case AMDGPU::S_CMPK_EQ_I32:
9850 case AMDGPU::S_CMPK_LG_U32:
9851 case AMDGPU::S_CMPK_LG_I32:
9852 case AMDGPU::S_CMPK_LT_U32:
9853 case AMDGPU::S_CMPK_LT_I32:
9854 case AMDGPU::S_CMPK_GT_U32:
9855 case AMDGPU::S_CMPK_GT_I32:
9856 case AMDGPU::S_CMPK_LE_U32:
9857 case AMDGPU::S_CMPK_LE_I32:
9858 case AMDGPU::S_CMPK_GE_U32:
9859 case AMDGPU::S_CMPK_GE_I32:
9860 SrcReg =
MI.getOperand(0).getReg();
9862 CmpValue =
MI.getOperand(1).getImm();
9880 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9881 this](int64_t ExpectedValue,
unsigned SrcSize,
9882 bool IsReversible,
bool IsSigned) ->
bool {
9907 if (!Def || Def->getParent() != CmpInstr.
getParent())
9910 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9911 Def->getOpcode() != AMDGPU::S_AND_B64)
9915 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9926 SrcOp = &Def->getOperand(2);
9927 else if (isMask(&Def->getOperand(2)))
9928 SrcOp = &Def->getOperand(1);
9934 assert(llvm::has_single_bit<uint64_t>(Mask) &&
"Invalid mask.");
9936 if (IsSigned && BitNo == SrcSize - 1)
9939 ExpectedValue <<= BitNo;
9941 bool IsReversedCC =
false;
9942 if (CmpValue != ExpectedValue) {
9945 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9950 Register DefReg = Def->getOperand(0).getReg();
9951 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9954 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9956 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9957 I->killsRegister(AMDGPU::SCC, &RI))
9962 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9966 if (!
MRI->use_nodbg_empty(DefReg)) {
9974 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9975 : AMDGPU::S_BITCMP1_B32
9976 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9977 : AMDGPU::S_BITCMP1_B64;
9982 Def->eraseFromParent();
9990 case AMDGPU::S_CMP_EQ_U32:
9991 case AMDGPU::S_CMP_EQ_I32:
9992 case AMDGPU::S_CMPK_EQ_U32:
9993 case AMDGPU::S_CMPK_EQ_I32:
9994 return optimizeCmpAnd(1, 32,
true,
false);
9995 case AMDGPU::S_CMP_GE_U32:
9996 case AMDGPU::S_CMPK_GE_U32:
9997 return optimizeCmpAnd(1, 32,
false,
false);
9998 case AMDGPU::S_CMP_GE_I32:
9999 case AMDGPU::S_CMPK_GE_I32:
10000 return optimizeCmpAnd(1, 32,
false,
true);
10001 case AMDGPU::S_CMP_EQ_U64:
10002 return optimizeCmpAnd(1, 64,
true,
false);
10003 case AMDGPU::S_CMP_LG_U32:
10004 case AMDGPU::S_CMP_LG_I32:
10005 case AMDGPU::S_CMPK_LG_U32:
10006 case AMDGPU::S_CMPK_LG_I32:
10007 return optimizeCmpAnd(0, 32,
true,
false);
10008 case AMDGPU::S_CMP_GT_U32:
10009 case AMDGPU::S_CMPK_GT_U32:
10010 return optimizeCmpAnd(0, 32,
false,
false);
10011 case AMDGPU::S_CMP_GT_I32:
10012 case AMDGPU::S_CMPK_GT_I32:
10013 return optimizeCmpAnd(0, 32,
false,
true);
10014 case AMDGPU::S_CMP_LG_U64:
10015 return optimizeCmpAnd(0, 64,
true,
false);
10022 unsigned OpName)
const {
10040 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10043 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10044 : &AMDGPU::VReg_64_Align2RegClass);
10046 .
addReg(DataReg, 0,
Op.getSubReg())
10051 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
bool hasVALUReadSGPRHazard() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.