34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
158 if (
MI.isConvergent())
185 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
190 for (
auto Op :
MI.uses()) {
191 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
197 if (FromCycle ==
nullptr)
203 while (FromCycle && !FromCycle->
contains(ToCycle)) {
223 int64_t &Offset1)
const {
231 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
235 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
251 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
252 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
253 if (Offset0Idx == -1 || Offset1Idx == -1)
260 Offset0Idx -=
get(Opc0).NumDefs;
261 Offset1Idx -=
get(Opc1).NumDefs;
291 if (!Load0Offset || !Load1Offset)
308 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
309 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
311 if (OffIdx0 == -1 || OffIdx1 == -1)
317 OffIdx0 -=
get(Opc0).NumDefs;
318 OffIdx1 -=
get(Opc1).NumDefs;
337 case AMDGPU::DS_READ2ST64_B32:
338 case AMDGPU::DS_READ2ST64_B64:
339 case AMDGPU::DS_WRITE2ST64_B32:
340 case AMDGPU::DS_WRITE2ST64_B64:
355 OffsetIsScalable =
false;
372 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
374 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
375 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
388 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
389 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
390 if (Offset0 + 1 != Offset1)
401 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
409 Offset = EltSize * Offset0;
411 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
412 if (DataOpIdx == -1) {
413 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
415 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
431 if (BaseOp && !BaseOp->
isFI())
439 if (SOffset->
isReg())
445 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
447 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
456 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
457 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
459 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
460 if (VAddr0Idx >= 0) {
462 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
469 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
484 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
503 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
520 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
528 if (MO1->getAddrSpace() != MO2->getAddrSpace())
531 const auto *Base1 = MO1->getValue();
532 const auto *Base2 = MO2->getValue();
533 if (!Base1 || !Base2)
541 return Base1 == Base2;
545 int64_t Offset1,
bool OffsetIsScalable1,
547 int64_t Offset2,
bool OffsetIsScalable2,
548 unsigned ClusterSize,
549 unsigned NumBytes)
const {
562 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
581 const unsigned LoadSize = NumBytes / ClusterSize;
582 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
583 return NumDWords <= MaxMemoryClusterDWords;
597 int64_t Offset0, int64_t Offset1,
598 unsigned NumLoads)
const {
599 assert(Offset1 > Offset0 &&
600 "Second offset should be larger than first offset!");
605 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
612 const char *Msg =
"illegal VGPR to SGPR copy") {
633 assert((
TII.getSubtarget().hasMAIInsts() &&
634 !
TII.getSubtarget().hasGFX90AInsts()) &&
635 "Expected GFX908 subtarget.");
638 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
639 "Source register of the copy should be either an SGPR or an AGPR.");
642 "Destination register of the copy should be an AGPR.");
651 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
654 if (!Def->modifiesRegister(SrcReg, &RI))
657 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
658 Def->getOperand(0).getReg() != SrcReg)
665 bool SafeToPropagate =
true;
668 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
669 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
670 SafeToPropagate =
false;
672 if (!SafeToPropagate)
675 for (
auto I = Def;
I !=
MI; ++
I)
676 I->clearRegisterKills(DefOp.
getReg(), &RI);
685 if (ImpUseSuperReg) {
686 Builder.addReg(ImpUseSuperReg,
694 RS.enterBasicBlockEnd(
MBB);
695 RS.backward(std::next(
MI));
704 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
707 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
708 "VGPR used for an intermediate copy should have been reserved.");
713 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
723 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
724 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
725 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
732 if (ImpUseSuperReg) {
733 UseBuilder.
addReg(ImpUseSuperReg,
754 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
755 int16_t SubIdx = BaseIndices[Idx];
756 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
757 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
758 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
759 unsigned Opcode = AMDGPU::S_MOV_B32;
762 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
763 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
764 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
768 DestSubReg = RI.getSubReg(DestReg, SubIdx);
769 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
770 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
771 Opcode = AMDGPU::S_MOV_B64;
786 assert(FirstMI && LastMI);
794 LastMI->addRegisterKilled(SrcReg, &RI);
800 Register SrcReg,
bool KillSrc,
bool RenamableDest,
801 bool RenamableSrc)
const {
803 unsigned Size = RI.getRegSizeInBits(*RC);
805 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
811 if (((
Size == 16) != (SrcSize == 16))) {
813 assert(ST.useRealTrue16Insts());
815 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
818 if (DestReg == SrcReg) {
824 RC = RI.getPhysRegBaseClass(DestReg);
825 Size = RI.getRegSizeInBits(*RC);
826 SrcRC = RI.getPhysRegBaseClass(SrcReg);
827 SrcSize = RI.getRegSizeInBits(*SrcRC);
831 if (RC == &AMDGPU::VGPR_32RegClass) {
833 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
834 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
835 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
836 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
842 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
843 RC == &AMDGPU::SReg_32RegClass) {
844 if (SrcReg == AMDGPU::SCC) {
851 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
852 if (DestReg == AMDGPU::VCC_LO) {
870 if (RC == &AMDGPU::SReg_64RegClass) {
871 if (SrcReg == AMDGPU::SCC) {
878 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
879 if (DestReg == AMDGPU::VCC) {
897 if (DestReg == AMDGPU::SCC) {
900 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
904 assert(ST.hasScalarCompareEq64());
918 if (RC == &AMDGPU::AGPR_32RegClass) {
919 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
920 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
926 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
935 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
942 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
943 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
945 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
946 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
947 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
948 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
951 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
952 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
965 if (IsAGPRDst || IsAGPRSrc) {
966 if (!DstLow || !SrcLow) {
968 "Cannot use hi16 subreg with an AGPR!");
975 if (ST.useRealTrue16Insts()) {
981 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
982 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
994 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg on VI!");
1020 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1021 if (ST.hasVMovB64Inst()) {
1026 if (ST.hasPkMovB32()) {
1042 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1043 if (RI.isSGPRClass(RC)) {
1044 if (!RI.isSGPRClass(SrcRC)) {
1048 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1054 unsigned EltSize = 4;
1055 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1056 if (RI.isAGPRClass(RC)) {
1057 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1058 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1059 else if (RI.hasVGPRs(SrcRC) ||
1060 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1061 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1063 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1064 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1065 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1066 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1067 (RI.isProperlyAlignedRC(*RC) &&
1068 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1070 if (ST.hasVMovB64Inst()) {
1071 Opcode = AMDGPU::V_MOV_B64_e32;
1073 }
else if (ST.hasPkMovB32()) {
1074 Opcode = AMDGPU::V_PK_MOV_B32;
1084 std::unique_ptr<RegScavenger> RS;
1085 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1086 RS = std::make_unique<RegScavenger>();
1092 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1093 const bool CanKillSuperReg = KillSrc && !Overlap;
1095 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1098 SubIdx = SubIndices[Idx];
1100 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1101 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1102 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1103 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1105 bool IsFirstSubreg = Idx == 0;
1106 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1108 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1112 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1113 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1159 return &AMDGPU::VGPR_32RegClass;
1172 "Not a VGPR32 reg");
1174 if (
Cond.size() == 1) {
1184 }
else if (
Cond.size() == 2) {
1185 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1187 case SIInstrInfo::SCC_TRUE: {
1198 case SIInstrInfo::SCC_FALSE: {
1209 case SIInstrInfo::VCCNZ: {
1223 case SIInstrInfo::VCCZ: {
1237 case SIInstrInfo::EXECNZ: {
1250 case SIInstrInfo::EXECZ: {
1300 int64_t &ImmVal)
const {
1301 switch (
MI.getOpcode()) {
1302 case AMDGPU::V_MOV_B32_e32:
1303 case AMDGPU::S_MOV_B32:
1304 case AMDGPU::S_MOVK_I32:
1305 case AMDGPU::S_MOV_B64:
1306 case AMDGPU::V_MOV_B64_e32:
1307 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1308 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1309 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1310 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1311 case AMDGPU::V_MOV_B64_PSEUDO:
1312 case AMDGPU::V_MOV_B16_t16_e32: {
1316 return MI.getOperand(0).getReg() == Reg;
1321 case AMDGPU::V_MOV_B16_t16_e64: {
1323 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1325 return MI.getOperand(0).getReg() == Reg;
1330 case AMDGPU::S_BREV_B32:
1331 case AMDGPU::V_BFREV_B32_e32:
1332 case AMDGPU::V_BFREV_B32_e64: {
1336 return MI.getOperand(0).getReg() == Reg;
1341 case AMDGPU::S_NOT_B32:
1342 case AMDGPU::V_NOT_B32_e32:
1343 case AMDGPU::V_NOT_B32_e64: {
1346 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1347 return MI.getOperand(0).getReg() == Reg;
1357std::optional<int64_t>
1362 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1363 return std::nullopt;
1366 if (Def && Def->isMoveImmediate()) {
1372 return std::nullopt;
1377 if (RI.isAGPRClass(DstRC))
1378 return AMDGPU::COPY;
1379 if (RI.getRegSizeInBits(*DstRC) == 16) {
1382 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1384 if (RI.getRegSizeInBits(*DstRC) == 32)
1385 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1386 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1387 return AMDGPU::S_MOV_B64;
1388 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1389 return AMDGPU::V_MOV_B64_PSEUDO;
1390 return AMDGPU::COPY;
1395 bool IsIndirectSrc)
const {
1396 if (IsIndirectSrc) {
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1455 if (VecSize <= 1024)
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1488 if (VecSize <= 1024)
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1536 if (VecSize <= 1024)
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1544 bool IsSGPR)
const {
1556 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1563 return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE;
1565 return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE;
1567 return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE;
1569 return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE
1570 : AMDGPU::SI_SPILL_S128_SAVE;
1572 return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE
1573 : AMDGPU::SI_SPILL_S160_SAVE;
1575 return NeedsCFI ? AMDGPU::SI_SPILL_S192_CFI_SAVE
1576 : AMDGPU::SI_SPILL_S192_SAVE;
1578 return NeedsCFI ? AMDGPU::SI_SPILL_S224_CFI_SAVE
1579 : AMDGPU::SI_SPILL_S224_SAVE;
1581 return AMDGPU::SI_SPILL_S256_SAVE;
1583 return AMDGPU::SI_SPILL_S288_SAVE;
1585 return AMDGPU::SI_SPILL_S320_SAVE;
1587 return AMDGPU::SI_SPILL_S352_SAVE;
1589 return AMDGPU::SI_SPILL_S384_SAVE;
1591 return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE
1592 : AMDGPU::SI_SPILL_S512_SAVE;
1594 return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE
1595 : AMDGPU::SI_SPILL_S1024_SAVE;
1604 return AMDGPU::SI_SPILL_V16_SAVE;
1606 return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE;
1608 return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE;
1610 return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE;
1612 return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE
1613 : AMDGPU::SI_SPILL_V128_SAVE;
1615 return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE
1616 : AMDGPU::SI_SPILL_V160_SAVE;
1618 return NeedsCFI ? AMDGPU::SI_SPILL_V192_CFI_SAVE
1619 : AMDGPU::SI_SPILL_V192_SAVE;
1621 return NeedsCFI ? AMDGPU::SI_SPILL_V224_CFI_SAVE
1622 : AMDGPU::SI_SPILL_V224_SAVE;
1624 return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE
1625 : AMDGPU::SI_SPILL_V256_SAVE;
1627 return NeedsCFI ? AMDGPU::SI_SPILL_V288_CFI_SAVE
1628 : AMDGPU::SI_SPILL_V288_SAVE;
1630 return NeedsCFI ? AMDGPU::SI_SPILL_V320_CFI_SAVE
1631 : AMDGPU::SI_SPILL_V320_SAVE;
1633 return NeedsCFI ? AMDGPU::SI_SPILL_V352_CFI_SAVE
1634 : AMDGPU::SI_SPILL_V352_SAVE;
1636 return NeedsCFI ? AMDGPU::SI_SPILL_V384_CFI_SAVE
1637 : AMDGPU::SI_SPILL_V384_SAVE;
1639 return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE
1640 : AMDGPU::SI_SPILL_V512_SAVE;
1642 return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE
1643 : AMDGPU::SI_SPILL_V1024_SAVE;
1652 return NeedsCFI ? AMDGPU::SI_SPILL_AV32_CFI_SAVE
1653 : AMDGPU::SI_SPILL_AV32_SAVE;
1655 return NeedsCFI ? AMDGPU::SI_SPILL_AV64_CFI_SAVE
1656 : AMDGPU::SI_SPILL_AV64_SAVE;
1658 return NeedsCFI ? AMDGPU::SI_SPILL_AV96_CFI_SAVE
1659 : AMDGPU::SI_SPILL_AV96_SAVE;
1661 return NeedsCFI ? AMDGPU::SI_SPILL_AV128_CFI_SAVE
1662 : AMDGPU::SI_SPILL_AV128_SAVE;
1664 return NeedsCFI ? AMDGPU::SI_SPILL_AV160_CFI_SAVE
1665 : AMDGPU::SI_SPILL_AV160_SAVE;
1667 return NeedsCFI ? AMDGPU::SI_SPILL_AV192_CFI_SAVE
1668 : AMDGPU::SI_SPILL_AV192_SAVE;
1670 return NeedsCFI ? AMDGPU::SI_SPILL_AV224_CFI_SAVE
1671 : AMDGPU::SI_SPILL_AV224_SAVE;
1673 return NeedsCFI ? AMDGPU::SI_SPILL_AV256_CFI_SAVE
1674 : AMDGPU::SI_SPILL_AV256_SAVE;
1676 return AMDGPU::SI_SPILL_AV288_SAVE;
1678 return AMDGPU::SI_SPILL_AV320_SAVE;
1680 return AMDGPU::SI_SPILL_AV352_SAVE;
1682 return AMDGPU::SI_SPILL_AV384_SAVE;
1684 return NeedsCFI ? AMDGPU::SI_SPILL_AV512_CFI_SAVE
1685 : AMDGPU::SI_SPILL_AV512_SAVE;
1687 return NeedsCFI ? AMDGPU::SI_SPILL_AV1024_CFI_SAVE
1688 : AMDGPU::SI_SPILL_AV1024_SAVE;
1695 bool IsVectorSuperClass) {
1700 if (IsVectorSuperClass)
1701 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1703 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1709 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1716 if (ST.hasMAIInsts())
1722void SIInstrInfo::storeRegToStackSlotImpl(
1735 FrameInfo.getObjectAlign(FrameIndex));
1736 unsigned SpillSize = RI.getSpillSize(*RC);
1742 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1743 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1744 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1753 if (SrcReg.
isVirtual() && SpillSize == 4) {
1767 SpillSize, *MFI, NeedsCFI);
1782 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC, VReg, Flags,
1791 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC,
Register(),
1798 return AMDGPU::SI_SPILL_S32_RESTORE;
1800 return AMDGPU::SI_SPILL_S64_RESTORE;
1802 return AMDGPU::SI_SPILL_S96_RESTORE;
1804 return AMDGPU::SI_SPILL_S128_RESTORE;
1806 return AMDGPU::SI_SPILL_S160_RESTORE;
1808 return AMDGPU::SI_SPILL_S192_RESTORE;
1810 return AMDGPU::SI_SPILL_S224_RESTORE;
1812 return AMDGPU::SI_SPILL_S256_RESTORE;
1814 return AMDGPU::SI_SPILL_S288_RESTORE;
1816 return AMDGPU::SI_SPILL_S320_RESTORE;
1818 return AMDGPU::SI_SPILL_S352_RESTORE;
1820 return AMDGPU::SI_SPILL_S384_RESTORE;
1822 return AMDGPU::SI_SPILL_S512_RESTORE;
1824 return AMDGPU::SI_SPILL_S1024_RESTORE;
1833 return AMDGPU::SI_SPILL_V16_RESTORE;
1835 return AMDGPU::SI_SPILL_V32_RESTORE;
1837 return AMDGPU::SI_SPILL_V64_RESTORE;
1839 return AMDGPU::SI_SPILL_V96_RESTORE;
1841 return AMDGPU::SI_SPILL_V128_RESTORE;
1843 return AMDGPU::SI_SPILL_V160_RESTORE;
1845 return AMDGPU::SI_SPILL_V192_RESTORE;
1847 return AMDGPU::SI_SPILL_V224_RESTORE;
1849 return AMDGPU::SI_SPILL_V256_RESTORE;
1851 return AMDGPU::SI_SPILL_V288_RESTORE;
1853 return AMDGPU::SI_SPILL_V320_RESTORE;
1855 return AMDGPU::SI_SPILL_V352_RESTORE;
1857 return AMDGPU::SI_SPILL_V384_RESTORE;
1859 return AMDGPU::SI_SPILL_V512_RESTORE;
1861 return AMDGPU::SI_SPILL_V1024_RESTORE;
1870 return AMDGPU::SI_SPILL_AV32_RESTORE;
1872 return AMDGPU::SI_SPILL_AV64_RESTORE;
1874 return AMDGPU::SI_SPILL_AV96_RESTORE;
1876 return AMDGPU::SI_SPILL_AV128_RESTORE;
1878 return AMDGPU::SI_SPILL_AV160_RESTORE;
1880 return AMDGPU::SI_SPILL_AV192_RESTORE;
1882 return AMDGPU::SI_SPILL_AV224_RESTORE;
1884 return AMDGPU::SI_SPILL_AV256_RESTORE;
1886 return AMDGPU::SI_SPILL_AV288_RESTORE;
1888 return AMDGPU::SI_SPILL_AV320_RESTORE;
1890 return AMDGPU::SI_SPILL_AV352_RESTORE;
1892 return AMDGPU::SI_SPILL_AV384_RESTORE;
1894 return AMDGPU::SI_SPILL_AV512_RESTORE;
1896 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1903 bool IsVectorSuperClass) {
1908 if (IsVectorSuperClass)
1909 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1911 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1917 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1924 if (ST.hasMAIInsts())
1927 assert(!RI.isAGPRClass(RC));
1941 unsigned SpillSize = RI.getSpillSize(*RC);
1948 FrameInfo.getObjectAlign(FrameIndex));
1950 if (RI.isSGPRClass(RC)) {
1953 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1954 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1955 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1960 if (DestReg.
isVirtual() && SpillSize == 4) {
1989 unsigned Quantity)
const {
1991 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1992 while (Quantity > 0) {
1993 unsigned Arg = std::min(Quantity, MaxSNopCount);
2000 auto *MF =
MBB.getParent();
2003 assert(Info->isEntryFunction());
2005 if (
MBB.succ_empty()) {
2006 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
2007 if (HasNoTerminator) {
2008 if (Info->returnsVoid()) {
2022 constexpr unsigned DoorbellIDMask = 0x3ff;
2023 constexpr unsigned ECQueueWaveAbort = 0x400;
2028 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2029 MBB.splitAt(
MI,
false);
2033 MBB.addSuccessor(TrapBB);
2043 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2047 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2052 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2053 .
addUse(DoorbellRegMasked)
2054 .
addImm(ECQueueWaveAbort);
2055 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2056 .
addUse(SetWaveAbortBit);
2059 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2070 return MBB.getNextNode();
2074 switch (
MI.getOpcode()) {
2076 if (
MI.isMetaInstruction())
2081 return MI.getOperand(0).getImm() + 1;
2091 switch (
MI.getOpcode()) {
2093 case AMDGPU::S_MOV_B64_term:
2096 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2099 case AMDGPU::S_MOV_B32_term:
2102 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2105 case AMDGPU::S_XOR_B64_term:
2108 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2111 case AMDGPU::S_XOR_B32_term:
2114 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2116 case AMDGPU::S_OR_B64_term:
2119 MI.setDesc(
get(AMDGPU::S_OR_B64));
2121 case AMDGPU::S_OR_B32_term:
2124 MI.setDesc(
get(AMDGPU::S_OR_B32));
2127 case AMDGPU::S_ANDN2_B64_term:
2130 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2133 case AMDGPU::S_ANDN2_B32_term:
2136 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2139 case AMDGPU::S_AND_B64_term:
2142 MI.setDesc(
get(AMDGPU::S_AND_B64));
2145 case AMDGPU::S_AND_B32_term:
2148 MI.setDesc(
get(AMDGPU::S_AND_B32));
2151 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2154 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2157 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2160 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2163 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2164 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2167 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2168 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2170 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2174 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2177 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2180 int64_t Imm =
MI.getOperand(1).getImm();
2182 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2183 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2188 MI.eraseFromParent();
2194 case AMDGPU::V_MOV_B64_PSEUDO: {
2196 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2197 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2205 if (ST.hasVMovB64Inst() && Mov64RC->
contains(Dst)) {
2206 MI.setDesc(Mov64Desc);
2211 if (
SrcOp.isImm()) {
2213 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2214 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2238 if (ST.hasPkMovB32() &&
2257 MI.eraseFromParent();
2260 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2264 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2268 if (ST.has64BitLiterals()) {
2269 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2275 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2280 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2281 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2283 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2284 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2289 MI.eraseFromParent();
2292 case AMDGPU::V_SET_INACTIVE_B32: {
2296 .
add(
MI.getOperand(3))
2297 .
add(
MI.getOperand(4))
2298 .
add(
MI.getOperand(1))
2299 .
add(
MI.getOperand(2))
2300 .
add(
MI.getOperand(5));
2301 MI.eraseFromParent();
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2332 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2333 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2334 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2335 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2336 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2340 if (RI.hasVGPRs(EltRC)) {
2341 Opc = AMDGPU::V_MOVRELD_B32_e32;
2343 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2344 : AMDGPU::S_MOVRELD_B32;
2349 bool IsUndef =
MI.getOperand(1).isUndef();
2350 unsigned SubReg =
MI.getOperand(3).getImm();
2351 assert(VecReg ==
MI.getOperand(1).getReg());
2356 .
add(
MI.getOperand(2))
2360 const int ImpDefIdx =
2362 const int ImpUseIdx = ImpDefIdx + 1;
2364 MI.eraseFromParent();
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2374 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2375 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2376 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2377 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2378 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2379 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2380 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2381 assert(ST.useVGPRIndexMode());
2383 bool IsUndef =
MI.getOperand(1).isUndef();
2392 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2396 .
add(
MI.getOperand(2))
2400 const int ImpDefIdx =
2402 const int ImpUseIdx = ImpDefIdx + 1;
2409 MI.eraseFromParent();
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2418 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2419 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2420 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2421 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2422 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2423 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2424 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2425 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2426 assert(ST.useVGPRIndexMode());
2429 bool IsUndef =
MI.getOperand(1).isUndef();
2433 .
add(
MI.getOperand(2))
2446 MI.eraseFromParent();
2449 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2452 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2453 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2472 if (ST.hasGetPCZeroExtension()) {
2476 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2483 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2493 MI.eraseFromParent();
2496 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2506 Op.setOffset(
Op.getOffset() + 4);
2508 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2512 MI.eraseFromParent();
2515 case AMDGPU::ENTER_STRICT_WWM: {
2521 case AMDGPU::ENTER_STRICT_WQM: {
2528 MI.eraseFromParent();
2531 case AMDGPU::EXIT_STRICT_WWM:
2532 case AMDGPU::EXIT_STRICT_WQM: {
2538 case AMDGPU::SI_RETURN: {
2552 MI.eraseFromParent();
2556 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2557 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2558 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2561 case AMDGPU::S_GETPC_B64_pseudo:
2562 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2563 if (ST.hasGetPCZeroExtension()) {
2565 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2574 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2575 assert(ST.hasBF16PackedInsts());
2576 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2587 case AMDGPU::GET_STACK_BASE:
2590 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2597 Register DestReg =
MI.getOperand(0).getReg();
2607 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2608 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2609 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2612 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2615 MI.getNumExplicitOperands());
2633 case AMDGPU::S_MOV_B64:
2634 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2643 if (UsedLanes.
all())
2648 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2649 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2651 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2652 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2654 if (NeedLo && NeedHi)
2658 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2660 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2669 case AMDGPU::S_LOAD_DWORDX16_IMM:
2670 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2683 for (
auto &CandMO :
I->operands()) {
2684 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2692 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2696 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2702 unsigned NewOpcode = -1;
2703 if (SubregSize == 256)
2704 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2705 else if (SubregSize == 128)
2706 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2716 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2721 MI->getOperand(0).setReg(DestReg);
2722 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2726 OffsetMO->
setImm(FinalOffset);
2732 MI->setMemRefs(*MF, NewMMOs);
2745std::pair<MachineInstr*, MachineInstr*>
2747 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2749 if (ST.hasVMovB64Inst() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2752 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2753 return std::pair(&
MI,
nullptr);
2764 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2766 if (Dst.isPhysical()) {
2767 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2774 for (
unsigned I = 1;
I <= 2; ++
I) {
2777 if (
SrcOp.isImm()) {
2779 Imm.ashrInPlace(Part * 32);
2780 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2784 if (Src.isPhysical())
2785 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2792 MovDPP.addImm(MO.getImm());
2794 Split[Part] = MovDPP;
2798 if (Dst.isVirtual())
2805 MI.eraseFromParent();
2806 return std::pair(Split[0], Split[1]);
2809std::optional<DestSourcePair>
2811 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2814 return std::nullopt;
2818 AMDGPU::OpName Src0OpName,
2820 AMDGPU::OpName Src1OpName)
const {
2827 "All commutable instructions have both src0 and src1 modifiers");
2829 int Src0ModsVal = Src0Mods->
getImm();
2830 int Src1ModsVal = Src1Mods->
getImm();
2832 Src1Mods->
setImm(Src0ModsVal);
2833 Src0Mods->
setImm(Src1ModsVal);
2842 bool IsKill = RegOp.
isKill();
2844 bool IsUndef = RegOp.
isUndef();
2845 bool IsDebug = RegOp.
isDebug();
2847 if (NonRegOp.
isImm())
2849 else if (NonRegOp.
isFI())
2870 int64_t NonRegVal = NonRegOp1.
getImm();
2873 NonRegOp2.
setImm(NonRegVal);
2880 unsigned OpIdx1)
const {
2885 unsigned Opc =
MI.getOpcode();
2886 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2896 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2899 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2904 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2910 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2925 unsigned Src1Idx)
const {
2926 assert(!NewMI &&
"this should never be used");
2928 unsigned Opc =
MI.getOpcode();
2930 if (CommutedOpcode == -1)
2933 if (Src0Idx > Src1Idx)
2936 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2937 static_cast<int>(Src0Idx) &&
2938 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2939 static_cast<int>(Src1Idx) &&
2940 "inconsistency with findCommutedOpIndices");
2965 Src1, AMDGPU::OpName::src1_modifiers);
2968 AMDGPU::OpName::src1_sel);
2980 unsigned &SrcOpIdx0,
2981 unsigned &SrcOpIdx1)
const {
2986 unsigned &SrcOpIdx0,
2987 unsigned &SrcOpIdx1)
const {
2988 if (!
Desc.isCommutable())
2991 unsigned Opc =
Desc.getOpcode();
2992 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2996 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
3000 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
3004 int64_t BrOffset)
const {
3021 return MI.getOperand(0).getMBB();
3026 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
3027 MI.getOpcode() == AMDGPU::SI_LOOP)
3039 "new block should be inserted for expanding unconditional branch");
3042 "restore block should be inserted for restoring clobbered registers");
3050 if (ST.useAddPC64Inst()) {
3052 MCCtx.createTempSymbol(
"offset",
true);
3056 MCCtx.createTempSymbol(
"post_addpc",
true);
3057 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3061 Offset->setVariableValue(OffsetExpr);
3065 assert(RS &&
"RegScavenger required for long branching");
3073 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3074 ST.hasVALUReadSGPRHazard();
3075 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3076 if (FlushSGPRWrites)
3084 ApplyHazardWorkarounds();
3087 MCCtx.createTempSymbol(
"post_getpc",
true);
3091 MCCtx.createTempSymbol(
"offset_lo",
true);
3093 MCCtx.createTempSymbol(
"offset_hi",
true);
3096 .
addReg(PCReg, {}, AMDGPU::sub0)
3100 .
addReg(PCReg, {}, AMDGPU::sub1)
3102 ApplyHazardWorkarounds();
3143 if (LongBranchReservedReg) {
3144 RS->enterBasicBlock(
MBB);
3145 Scav = LongBranchReservedReg;
3147 RS->enterBasicBlockEnd(
MBB);
3148 Scav = RS->scavengeRegisterBackwards(
3153 RS->setRegUsed(Scav);
3161 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3178unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3180 case SIInstrInfo::SCC_TRUE:
3181 return AMDGPU::S_CBRANCH_SCC1;
3182 case SIInstrInfo::SCC_FALSE:
3183 return AMDGPU::S_CBRANCH_SCC0;
3184 case SIInstrInfo::VCCNZ:
3185 return AMDGPU::S_CBRANCH_VCCNZ;
3186 case SIInstrInfo::VCCZ:
3187 return AMDGPU::S_CBRANCH_VCCZ;
3188 case SIInstrInfo::EXECNZ:
3189 return AMDGPU::S_CBRANCH_EXECNZ;
3190 case SIInstrInfo::EXECZ:
3191 return AMDGPU::S_CBRANCH_EXECZ;
3197SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3199 case AMDGPU::S_CBRANCH_SCC0:
3201 case AMDGPU::S_CBRANCH_SCC1:
3203 case AMDGPU::S_CBRANCH_VCCNZ:
3205 case AMDGPU::S_CBRANCH_VCCZ:
3207 case AMDGPU::S_CBRANCH_EXECNZ:
3209 case AMDGPU::S_CBRANCH_EXECZ:
3221 bool AllowModify)
const {
3222 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3224 TBB =
I->getOperand(0).getMBB();
3228 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3229 if (Pred == INVALID_BR)
3234 Cond.push_back(
I->getOperand(1));
3238 if (
I ==
MBB.end()) {
3244 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3246 FBB =
I->getOperand(0).getMBB();
3256 bool AllowModify)
const {
3264 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3265 switch (
I->getOpcode()) {
3266 case AMDGPU::S_MOV_B64_term:
3267 case AMDGPU::S_XOR_B64_term:
3268 case AMDGPU::S_OR_B64_term:
3269 case AMDGPU::S_ANDN2_B64_term:
3270 case AMDGPU::S_AND_B64_term:
3271 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3272 case AMDGPU::S_MOV_B32_term:
3273 case AMDGPU::S_XOR_B32_term:
3274 case AMDGPU::S_OR_B32_term:
3275 case AMDGPU::S_ANDN2_B32_term:
3276 case AMDGPU::S_AND_B32_term:
3277 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3280 case AMDGPU::SI_ELSE:
3281 case AMDGPU::SI_KILL_I1_TERMINATOR:
3282 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3299 int *BytesRemoved)
const {
3301 unsigned RemovedSize = 0;
3304 if (
MI.isBranch() ||
MI.isReturn()) {
3306 MI.eraseFromParent();
3312 *BytesRemoved = RemovedSize;
3329 int *BytesAdded)
const {
3330 if (!FBB &&
Cond.empty()) {
3334 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3341 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3353 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3371 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3378 if (
Cond.size() != 2) {
3382 if (
Cond[0].isImm()) {
3393 Register FalseReg,
int &CondCycles,
3394 int &TrueCycles,
int &FalseCycles)
const {
3404 CondCycles = TrueCycles = FalseCycles = NumInsts;
3407 return RI.hasVGPRs(RC) && NumInsts <= 6;
3421 if (NumInsts % 2 == 0)
3424 CondCycles = TrueCycles = FalseCycles = NumInsts;
3425 return RI.isSGPRClass(RC);
3436 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3437 if (Pred == VCCZ || Pred == SCC_FALSE) {
3438 Pred =
static_cast<BranchPredicate
>(-Pred);
3444 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3446 if (DstSize == 32) {
3448 if (Pred == SCC_TRUE) {
3463 if (DstSize == 64 && Pred == SCC_TRUE) {
3473 static const int16_t Sub0_15[] = {
3474 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3475 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3476 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3477 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3480 static const int16_t Sub0_15_64[] = {
3481 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3482 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3483 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3484 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3487 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3489 const int16_t *SubIndices = Sub0_15;
3490 int NElts = DstSize / 32;
3494 if (Pred == SCC_TRUE) {
3496 SelOp = AMDGPU::S_CSELECT_B32;
3497 EltRC = &AMDGPU::SGPR_32RegClass;
3499 SelOp = AMDGPU::S_CSELECT_B64;
3500 EltRC = &AMDGPU::SGPR_64RegClass;
3501 SubIndices = Sub0_15_64;
3507 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3512 for (
int Idx = 0; Idx != NElts; ++Idx) {
3516 unsigned SubIdx = SubIndices[Idx];
3519 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3521 .
addReg(FalseReg, {}, SubIdx)
3522 .addReg(TrueReg, {}, SubIdx);
3525 .
addReg(TrueReg, {}, SubIdx)
3526 .addReg(FalseReg, {}, SubIdx);
3539 if (
MI.isBranch() ||
MI.isCall() ||
MI.isReturn() ||
MI.isIndirectBranch())
3542 switch (
MI.getOpcode()) {
3543 case AMDGPU::S_ENDPGM:
3544 case AMDGPU::S_ENDPGM_SAVED:
3545 case AMDGPU::S_TRAP:
3546 case AMDGPU::S_GETREG_B32:
3547 case AMDGPU::S_SETREG_B32:
3548 case AMDGPU::S_SETREG_B32_mode:
3549 case AMDGPU::S_SETREG_IMM32_B32:
3550 case AMDGPU::S_SETREG_IMM32_B32_mode:
3551 case AMDGPU::S_SENDMSG:
3552 case AMDGPU::S_SENDMSGHALT:
3553 case AMDGPU::S_SENDMSG_RTN_B32:
3554 case AMDGPU::S_SENDMSG_RTN_B64:
3555 case AMDGPU::S_BARRIER_WAIT:
3556 case AMDGPU::S_BARRIER_SIGNAL_M0:
3557 case AMDGPU::S_BARRIER_SIGNAL_IMM:
3558 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
3559 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
3567 switch (
MI.getOpcode()) {
3568 case AMDGPU::V_MOV_B16_t16_e32:
3569 case AMDGPU::V_MOV_B16_t16_e64:
3570 case AMDGPU::V_MOV_B32_e32:
3571 case AMDGPU::V_MOV_B32_e64:
3572 case AMDGPU::V_MOV_B64_PSEUDO:
3573 case AMDGPU::V_MOV_B64_e32:
3574 case AMDGPU::V_MOV_B64_e64:
3575 case AMDGPU::S_MOV_B32:
3576 case AMDGPU::S_MOV_B64:
3577 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3579 case AMDGPU::WWM_COPY:
3580 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3581 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3582 case AMDGPU::V_ACCVGPR_MOV_B32:
3583 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3584 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3592 switch (
MI.getOpcode()) {
3593 case AMDGPU::V_MOV_B16_t16_e32:
3594 case AMDGPU::V_MOV_B16_t16_e64:
3596 case AMDGPU::V_MOV_B32_e32:
3597 case AMDGPU::V_MOV_B32_e64:
3598 case AMDGPU::V_MOV_B64_PSEUDO:
3599 case AMDGPU::V_MOV_B64_e32:
3600 case AMDGPU::V_MOV_B64_e64:
3601 case AMDGPU::S_MOV_B32:
3602 case AMDGPU::S_MOV_B64:
3603 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3605 case AMDGPU::WWM_COPY:
3606 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3607 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3608 case AMDGPU::V_ACCVGPR_MOV_B32:
3609 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3610 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3618 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3619 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3620 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3623 unsigned Opc =
MI.getOpcode();
3625 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3627 MI.removeOperand(Idx);
3633 MI.setDesc(NewDesc);
3639 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3640 Desc.implicit_defs().size();
3642 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3643 MI.removeOperand(
I);
3647 unsigned SubRegIndex) {
3648 switch (SubRegIndex) {
3649 case AMDGPU::NoSubRegister:
3659 case AMDGPU::sub1_lo16:
3661 case AMDGPU::sub1_hi16:
3664 return std::nullopt;
3672 case AMDGPU::V_MAC_F16_e32:
3673 case AMDGPU::V_MAC_F16_e64:
3674 case AMDGPU::V_MAD_F16_e64:
3675 return AMDGPU::V_MADAK_F16;
3676 case AMDGPU::V_MAC_F32_e32:
3677 case AMDGPU::V_MAC_F32_e64:
3678 case AMDGPU::V_MAD_F32_e64:
3679 return AMDGPU::V_MADAK_F32;
3680 case AMDGPU::V_FMAC_F32_e32:
3681 case AMDGPU::V_FMAC_F32_e64:
3682 case AMDGPU::V_FMA_F32_e64:
3683 return AMDGPU::V_FMAAK_F32;
3684 case AMDGPU::V_FMAC_F16_e32:
3685 case AMDGPU::V_FMAC_F16_e64:
3686 case AMDGPU::V_FMAC_F16_t16_e64:
3687 case AMDGPU::V_FMAC_F16_fake16_e64:
3688 case AMDGPU::V_FMAC_F16_t16_e32:
3689 case AMDGPU::V_FMAC_F16_fake16_e32:
3690 case AMDGPU::V_FMA_F16_e64:
3691 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3692 ? AMDGPU::V_FMAAK_F16_t16
3693 : AMDGPU::V_FMAAK_F16_fake16
3694 : AMDGPU::V_FMAAK_F16;
3695 case AMDGPU::V_FMAC_F64_e32:
3696 case AMDGPU::V_FMAC_F64_e64:
3697 case AMDGPU::V_FMA_F64_e64:
3698 return AMDGPU::V_FMAAK_F64;
3706 case AMDGPU::V_MAC_F16_e32:
3707 case AMDGPU::V_MAC_F16_e64:
3708 case AMDGPU::V_MAD_F16_e64:
3709 return AMDGPU::V_MADMK_F16;
3710 case AMDGPU::V_MAC_F32_e32:
3711 case AMDGPU::V_MAC_F32_e64:
3712 case AMDGPU::V_MAD_F32_e64:
3713 return AMDGPU::V_MADMK_F32;
3714 case AMDGPU::V_FMAC_F32_e32:
3715 case AMDGPU::V_FMAC_F32_e64:
3716 case AMDGPU::V_FMA_F32_e64:
3717 return AMDGPU::V_FMAMK_F32;
3718 case AMDGPU::V_FMAC_F16_e32:
3719 case AMDGPU::V_FMAC_F16_e64:
3720 case AMDGPU::V_FMAC_F16_t16_e64:
3721 case AMDGPU::V_FMAC_F16_fake16_e64:
3722 case AMDGPU::V_FMAC_F16_t16_e32:
3723 case AMDGPU::V_FMAC_F16_fake16_e32:
3724 case AMDGPU::V_FMA_F16_e64:
3725 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3726 ? AMDGPU::V_FMAMK_F16_t16
3727 : AMDGPU::V_FMAMK_F16_fake16
3728 : AMDGPU::V_FMAMK_F16;
3729 case AMDGPU::V_FMAC_F64_e32:
3730 case AMDGPU::V_FMAC_F64_e64:
3731 case AMDGPU::V_FMA_F64_e64:
3732 return AMDGPU::V_FMAMK_F64;
3746 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3749 if (
Opc == AMDGPU::COPY) {
3750 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3757 if (HasMultipleUses) {
3760 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3763 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3771 if (ImmDefSize == 32 &&
3776 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3777 RI.getSubRegIdxSize(UseSubReg) == 16;
3780 if (RI.hasVGPRs(DstRC))
3783 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3789 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3796 for (
unsigned MovOp :
3797 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3798 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3806 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3810 if (MovDstPhysReg) {
3814 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3821 if (MovDstPhysReg) {
3822 if (!MovDstRC->
contains(MovDstPhysReg))
3838 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3846 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3850 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3852 UseMI.getOperand(0).setReg(MovDstPhysReg);
3857 UseMI.setDesc(NewMCID);
3858 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3859 UseMI.addImplicitDefUseOperands(*MF);
3863 if (HasMultipleUses)
3866 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3867 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3868 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3869 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3870 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3871 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3872 Opc == AMDGPU::V_FMAC_F64_e64) {
3881 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3892 auto CopyRegOperandToNarrowerRC =
3895 if (!
MI.getOperand(OpNo).isReg())
3899 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3902 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3903 get(AMDGPU::COPY), Tmp)
3905 MI.getOperand(OpNo).setReg(Tmp);
3906 MI.getOperand(OpNo).setIsKill();
3913 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3914 if (!RegSrc->
isReg())
3917 ST.getConstantBusLimit(
Opc) < 2)
3932 if (Def && Def->isMoveImmediate() &&
3947 unsigned SrcSubReg = RegSrc->
getSubReg();
3952 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3953 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3954 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3955 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3956 UseMI.untieRegOperand(
3957 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3964 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3965 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3969 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3970 UseMI.getOperand(0).getReg())
3972 UseMI.getOperand(0).setReg(Tmp);
3973 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3974 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3979 DefMI.eraseFromParent();
3986 if (ST.getConstantBusLimit(
Opc) < 2) {
3989 bool Src0Inlined =
false;
3990 if (Src0->
isReg()) {
3995 if (Def && Def->isMoveImmediate() &&
4000 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
4001 RI.isSGPRReg(*MRI, Src0->
getReg())) {
4007 if (Src1->
isReg() && !Src0Inlined) {
4010 if (Def && Def->isMoveImmediate() &&
4014 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
4027 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
4028 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
4029 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
4030 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
4031 UseMI.untieRegOperand(
4032 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
4034 const std::optional<int64_t> SubRegImm =
4044 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
4045 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
4049 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
4050 UseMI.getOperand(0).getReg())
4052 UseMI.getOperand(0).setReg(Tmp);
4053 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
4054 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
4064 DefMI.eraseFromParent();
4076 if (BaseOps1.
size() != BaseOps2.
size())
4078 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4079 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4087 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4088 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4089 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4091 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4094bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4097 int64_t Offset0, Offset1;
4100 bool Offset0IsScalable, Offset1IsScalable;
4114 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4115 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4122 "MIa must load from or modify a memory location");
4124 "MIb must load from or modify a memory location");
4146 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4153 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4163 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4177 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4188 if (
Reg.isPhysical())
4192 Imm = Def->getOperand(1).getImm();
4212 unsigned NumOps =
MI.getNumOperands();
4215 if (
Op.isReg() &&
Op.isKill())
4223 case AMDGPU::V_MAC_F16_e32:
4224 case AMDGPU::V_MAC_F16_e64:
4225 return AMDGPU::V_MAD_F16_e64;
4226 case AMDGPU::V_MAC_F32_e32:
4227 case AMDGPU::V_MAC_F32_e64:
4228 return AMDGPU::V_MAD_F32_e64;
4229 case AMDGPU::V_MAC_LEGACY_F32_e32:
4230 case AMDGPU::V_MAC_LEGACY_F32_e64:
4231 return AMDGPU::V_MAD_LEGACY_F32_e64;
4232 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4233 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4234 return AMDGPU::V_FMA_LEGACY_F32_e64;
4235 case AMDGPU::V_FMAC_F16_e32:
4236 case AMDGPU::V_FMAC_F16_e64:
4237 case AMDGPU::V_FMAC_F16_t16_e64:
4238 case AMDGPU::V_FMAC_F16_fake16_e64:
4239 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4240 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4241 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4242 : AMDGPU::V_FMA_F16_gfx9_e64;
4243 case AMDGPU::V_FMAC_F32_e32:
4244 case AMDGPU::V_FMAC_F32_e64:
4245 return AMDGPU::V_FMA_F32_e64;
4246 case AMDGPU::V_FMAC_F64_e32:
4247 case AMDGPU::V_FMAC_F64_e64:
4248 return AMDGPU::V_FMA_F64_e64;
4268 if (
MI.isBundle()) {
4271 if (
MI.getBundleSize() != 1)
4273 CandidateMI =
MI.getNextNode();
4277 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4281 if (
MI.isBundle()) {
4286 MI.untieRegOperand(MO.getOperandNo());
4294 if (Def.isEarlyClobber() && Def.isReg() &&
4299 auto UpdateDefIndex = [&](
LiveRange &LR) {
4300 auto *S = LR.find(OldIndex);
4301 if (S != LR.end() && S->start == OldIndex) {
4302 assert(S->valno && S->valno->def == OldIndex);
4303 S->start = NewIndex;
4304 S->valno->def = NewIndex;
4308 for (
auto &SR : LI.subranges())
4314 if (U.RemoveMIUse) {
4317 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4321 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4322 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4323 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4324 U.RemoveMIUse->removeOperand(
I);
4329 if (
MI.isBundle()) {
4333 if (MO.isReg() && MO.getReg() == DefReg) {
4334 assert(MO.getSubReg() == 0 &&
4335 "tied sub-registers in bundles currently not supported");
4336 MI.removeOperand(MO.getOperandNo());
4353 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4354 MIOp.setIsUndef(
true);
4355 MIOp.setReg(DummyReg);
4359 if (
MI.isBundle()) {
4363 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4364 MIOp.setIsUndef(
true);
4365 MIOp.setReg(DummyReg);
4378 return MI.isBundle() ? &
MI : NewMI;
4383 ThreeAddressUpdates &U)
const {
4385 unsigned Opc =
MI.getOpcode();
4389 if (NewMFMAOpc != -1) {
4392 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4393 MIB.
add(
MI.getOperand(
I));
4401 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4406 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4407 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4408 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4412 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4413 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4414 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4415 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4416 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4417 bool Src0Literal =
false;
4422 case AMDGPU::V_MAC_F16_e64:
4423 case AMDGPU::V_FMAC_F16_e64:
4424 case AMDGPU::V_FMAC_F16_t16_e64:
4425 case AMDGPU::V_FMAC_F16_fake16_e64:
4426 case AMDGPU::V_MAC_F32_e64:
4427 case AMDGPU::V_MAC_LEGACY_F32_e64:
4428 case AMDGPU::V_FMAC_F32_e64:
4429 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4430 case AMDGPU::V_FMAC_F64_e64:
4432 case AMDGPU::V_MAC_F16_e32:
4433 case AMDGPU::V_FMAC_F16_e32:
4434 case AMDGPU::V_MAC_F32_e32:
4435 case AMDGPU::V_MAC_LEGACY_F32_e32:
4436 case AMDGPU::V_FMAC_F32_e32:
4437 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4438 case AMDGPU::V_FMAC_F64_e32: {
4439 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4440 AMDGPU::OpName::src0);
4441 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4452 MachineInstrBuilder MIB;
4455 const MachineOperand *Src0Mods =
4458 const MachineOperand *Src1Mods =
4461 const MachineOperand *Src2Mods =
4467 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4468 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4470 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4472 MachineInstr *
DefMI;
4508 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4524 if (Src0Literal && !ST.hasVOP3Literal())
4552 switch (
MI.getOpcode()) {
4553 case AMDGPU::S_SET_GPR_IDX_ON:
4554 case AMDGPU::S_SET_GPR_IDX_MODE:
4555 case AMDGPU::S_SET_GPR_IDX_OFF:
4573 if (
MI.isTerminator() ||
MI.isPosition())
4577 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4580 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4586 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4587 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4588 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4589 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4590 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4595 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4596 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4597 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4611 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4616 if (
MI.memoperands_empty())
4621 unsigned AS = Memop->getAddrSpace();
4622 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4623 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4624 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4625 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4640 if (
MI.memoperands_empty())
4649 unsigned AS = Memop->getAddrSpace();
4666 if (ST.isTgSplitEnabled())
4671 if (
MI.memoperands_empty())
4676 unsigned AS = Memop->getAddrSpace();
4692 unsigned Opcode =
MI.getOpcode();
4707 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4708 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4709 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4710 Opcode == AMDGPU::S_SETHALT)
4713 if (
MI.isCall() ||
MI.isInlineAsm())
4729 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4730 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4731 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4732 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4740 if (
MI.isMetaInstruction())
4744 if (
MI.isCopyLike()) {
4745 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4749 return MI.readsRegister(AMDGPU::EXEC, &RI);
4760 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4764 switch (Imm.getBitWidth()) {
4770 ST.hasInv2PiInlineImm());
4773 ST.hasInv2PiInlineImm());
4775 return ST.has16BitInsts() &&
4777 ST.hasInv2PiInlineImm());
4784 APInt IntImm = Imm.bitcastToAPInt();
4786 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4794 return ST.has16BitInsts() &&
4797 return ST.has16BitInsts() &&
4807 switch (OperandType) {
4817 int32_t Trunc =
static_cast<int32_t
>(Imm);
4861 int16_t Trunc =
static_cast<int16_t
>(Imm);
4862 return ST.has16BitInsts() &&
4871 int16_t Trunc =
static_cast<int16_t
>(Imm);
4872 return ST.has16BitInsts() &&
4923 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4929 return ST.hasVOP3Literal();
4933 int64_t ImmVal)
const {
4936 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4937 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4938 AMDGPU::OpName::src2))
4940 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4952 "unexpected imm-like operand kind");
4965 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4983 AMDGPU::OpName
OpName)
const {
4985 return Mods && Mods->
getImm();
4998 switch (
MI.getOpcode()) {
4999 default:
return false;
5001 case AMDGPU::V_ADDC_U32_e64:
5002 case AMDGPU::V_SUBB_U32_e64:
5003 case AMDGPU::V_SUBBREV_U32_e64: {
5006 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
5011 case AMDGPU::V_MAC_F16_e64:
5012 case AMDGPU::V_MAC_F32_e64:
5013 case AMDGPU::V_MAC_LEGACY_F32_e64:
5014 case AMDGPU::V_FMAC_F16_e64:
5015 case AMDGPU::V_FMAC_F16_t16_e64:
5016 case AMDGPU::V_FMAC_F16_fake16_e64:
5017 case AMDGPU::V_FMAC_F32_e64:
5018 case AMDGPU::V_FMAC_F64_e64:
5019 case AMDGPU::V_FMAC_LEGACY_F32_e64:
5020 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
5025 case AMDGPU::V_CNDMASK_B32_e64:
5031 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
5061 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
5070 unsigned Op32)
const {
5084 Inst32.
add(
MI.getOperand(
I));
5088 int Idx =
MI.getNumExplicitDefs();
5090 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5095 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5117 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5125 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5128 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5129 AMDGPU::SReg_64RegClass.contains(Reg);
5157 switch (MO.getReg()) {
5159 case AMDGPU::VCC_LO:
5160 case AMDGPU::VCC_HI:
5162 case AMDGPU::FLAT_SCR:
5175 switch (
MI.getOpcode()) {
5176 case AMDGPU::V_READLANE_B32:
5177 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5178 case AMDGPU::V_WRITELANE_B32:
5179 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5186 if (
MI.isPreISelOpcode() ||
5187 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5205 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5216 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5217 ErrInfo =
"illegal copy from vector register to SGPR";
5235 if (!MRI.
isSSA() &&
MI.isCopy())
5236 return verifyCopy(
MI, MRI, ErrInfo);
5238 if (SIInstrInfo::isGenericOpcode(Opcode))
5241 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5242 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5243 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5245 if (Src0Idx == -1) {
5247 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5248 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5249 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5250 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5255 if (!
Desc.isVariadic() &&
5256 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5257 ErrInfo =
"Instruction has wrong number of operands.";
5261 if (
MI.isInlineAsm()) {
5274 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5275 ErrInfo =
"inlineasm operand has incorrect register class.";
5283 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5284 ErrInfo =
"missing memory operand from image instruction.";
5289 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5292 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5293 "all fp values to integers.";
5298 int16_t RegClass = getOpRegClassID(OpInfo);
5300 switch (OpInfo.OperandType) {
5302 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5303 ErrInfo =
"Illegal immediate value for operand.";
5336 ErrInfo =
"Illegal immediate value for operand.";
5345 if (ST.has64BitLiterals() &&
Desc.getSize() != 4 && MO.
isImm() &&
5348 OpInfo.OperandType ==
5350 ErrInfo =
"illegal 64-bit immediate value for operand.";
5357 ErrInfo =
"Expected inline constant for operand.";
5371 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5372 ErrInfo =
"Expected immediate, but got non-immediate";
5381 if (OpInfo.isGenericType())
5396 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5397 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5399 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5401 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5402 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5409 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5410 ErrInfo =
"Subtarget requires even aligned vector registers";
5415 if (RegClass != -1) {
5416 if (Reg.isVirtual())
5421 ErrInfo =
"Operand has incorrect register class.";
5429 if (!ST.hasSDWA()) {
5430 ErrInfo =
"SDWA is not supported on this target";
5434 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5435 AMDGPU::OpName::dst_sel}) {
5439 int64_t Imm = MO->
getImm();
5441 ErrInfo =
"Invalid SDWA selection";
5446 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5448 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5453 if (!ST.hasSDWAScalar()) {
5455 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5456 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5463 "Only reg allowed as operands in SDWA instructions on GFX9+";
5469 if (!ST.hasSDWAOmod()) {
5472 if (OMod !=
nullptr &&
5474 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5479 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5480 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5481 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5482 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5485 unsigned Mods = Src0ModsMO->
getImm();
5488 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5494 if (
isVOPC(BasicOpcode)) {
5495 if (!ST.hasSDWASdst() && DstIdx != -1) {
5498 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5499 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5502 }
else if (!ST.hasSDWAOutModsVOPC()) {
5505 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5506 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5512 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5513 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5520 if (DstUnused && DstUnused->isImm() &&
5523 if (!Dst.isReg() || !Dst.isTied()) {
5524 ErrInfo =
"Dst register should have tied register";
5529 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5532 "Dst register should be tied to implicit use of preserved register";
5536 ErrInfo =
"Dst register should use same physical register as preserved";
5542 if (
isDPP(
MI) && !ST.hasDPPSrc1SGPR() && Src1Idx != -1) {
5544 if (Src1MO.
isReg() && RI.isSGPRReg(MRI, Src1MO.
getReg())) {
5545 ErrInfo =
"DPP src1 cannot be SGPR on this subtarget";
5551 if (
isImage(Opcode) && !
MI.mayStore()) {
5563 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5571 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5575 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5576 if (RegCount > DstSize) {
5577 ErrInfo =
"Image instruction returns too many registers for dst "
5586 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5587 unsigned ConstantBusCount = 0;
5588 bool UsesLiteral =
false;
5591 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5595 LiteralVal = &
MI.getOperand(ImmIdx);
5604 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5615 }
else if (!MO.
isFI()) {
5622 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5632 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5633 return !RI.regsOverlap(SGPRUsed, SGPR);
5642 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5643 Opcode != AMDGPU::V_WRITELANE_B32) {
5644 ErrInfo =
"VOP* instruction violates constant bus restriction";
5648 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5649 ErrInfo =
"VOP3 instruction uses literal";
5656 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5657 unsigned SGPRCount = 0;
5660 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5668 if (MO.
getReg() != SGPRUsed)
5673 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5674 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5681 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5682 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5689 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5699 ErrInfo =
"ABS not allowed in VOP3B instructions";
5712 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5719 if (
Desc.isBranch()) {
5721 ErrInfo =
"invalid branch target for SOPK instruction";
5728 ErrInfo =
"invalid immediate for SOPK instruction";
5733 ErrInfo =
"invalid immediate for SOPK instruction";
5740 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5741 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5742 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5743 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5744 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5745 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5747 const unsigned StaticNumOps =
5748 Desc.getNumOperands() +
Desc.implicit_uses().size();
5749 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5755 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5756 ErrInfo =
"missing implicit register operands";
5762 if (!Dst->isUse()) {
5763 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5768 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5769 UseOpIdx != StaticNumOps + 1) {
5770 ErrInfo =
"movrel implicit operands should be tied";
5777 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5779 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5780 ErrInfo =
"src0 should be subreg of implicit vector use";
5788 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5789 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5795 if (
MI.mayStore() &&
5800 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5801 ErrInfo =
"scalar stores must use m0 as offset register";
5807 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5809 if (
Offset->getImm() != 0) {
5810 ErrInfo =
"subtarget does not support offsets in flat instructions";
5815 if (
isDS(
MI) && !ST.hasGDS()) {
5817 if (GDSOp && GDSOp->
getImm() != 0) {
5818 ErrInfo =
"GDS is not supported on this subtarget";
5826 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5827 AMDGPU::OpName::vaddr0);
5828 AMDGPU::OpName RSrcOpName =
5829 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5830 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5838 ErrInfo =
"dim is out of range";
5843 if (ST.hasR128A16()) {
5845 IsA16 = R128A16->
getImm() != 0;
5846 }
else if (ST.hasA16()) {
5848 IsA16 = A16->
getImm() != 0;
5851 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5853 unsigned AddrWords =
5856 unsigned VAddrWords;
5858 VAddrWords = RsrcIdx - VAddr0Idx;
5859 if (ST.hasPartialNSAEncoding() &&
5861 unsigned LastVAddrIdx = RsrcIdx - 1;
5862 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5870 if (VAddrWords != AddrWords) {
5872 <<
" but got " << VAddrWords <<
"\n");
5873 ErrInfo =
"bad vaddr size";
5883 unsigned DC = DppCt->
getImm();
5884 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5885 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5886 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5887 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5888 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5889 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5890 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5891 ErrInfo =
"Invalid dpp_ctrl value";
5894 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5895 !ST.hasDPPWavefrontShifts()) {
5896 ErrInfo =
"Invalid dpp_ctrl value: "
5897 "wavefront shifts are not supported on GFX10+";
5900 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5901 !ST.hasDPPBroadcasts()) {
5902 ErrInfo =
"Invalid dpp_ctrl value: "
5903 "broadcasts are not supported on GFX10+";
5906 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5908 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5909 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5910 !ST.hasGFX90AInsts()) {
5911 ErrInfo =
"Invalid dpp_ctrl value: "
5912 "row_newbroadcast/row_share is not supported before "
5916 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5917 ErrInfo =
"Invalid dpp_ctrl value: "
5918 "row_share and row_xmask are not supported before GFX10";
5923 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5926 ErrInfo =
"Invalid dpp_ctrl value: "
5927 "DP ALU dpp only support row_newbcast";
5934 AMDGPU::OpName DataName =
5935 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5941 if (ST.hasGFX90AInsts()) {
5942 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5943 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5944 ErrInfo =
"Invalid register class: "
5945 "vdata and vdst should be both VGPR or AGPR";
5948 if (
Data && Data2 &&
5949 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5950 ErrInfo =
"Invalid register class: "
5951 "both data operands should be VGPR or AGPR";
5955 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5956 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5957 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5958 ErrInfo =
"Invalid register class: "
5959 "agpr loads and stores not supported on this GPU";
5965 if (ST.needsAlignedVGPRs()) {
5966 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5971 if (Reg.isPhysical())
5972 return !(RI.getHWRegIndex(Reg) & 1);
5974 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5975 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5978 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5979 Opcode == AMDGPU::DS_GWS_BARRIER) {
5981 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5982 ErrInfo =
"Subtarget requires even aligned vector registers "
5983 "for DS_GWS instructions";
5989 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5990 ErrInfo =
"Subtarget requires even aligned vector registers "
5991 "for vaddr operand of image instructions";
5997 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5999 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
6000 ErrInfo =
"Invalid register class: "
6001 "v_accvgpr_write with an SGPR is not supported on this GPU";
6006 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
6009 ErrInfo =
"pseudo expects only physical SGPRs";
6016 if (!ST.hasScaleOffset()) {
6017 ErrInfo =
"Subtarget does not support offset scaling";
6021 ErrInfo =
"Instruction does not support offset scaling";
6030 for (
unsigned I = 0;
I < 3; ++
I) {
6036 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
6037 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
6039 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
6040 &AMDGPU::SReg_64RegClass) ||
6041 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
6042 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
6051 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
6053 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
6055 : AMDGPU::V_MOV_B32_e32;
6065 default:
return AMDGPU::INSTRUCTION_LIST_END;
6066 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
6067 case AMDGPU::COPY:
return AMDGPU::COPY;
6068 case AMDGPU::PHI:
return AMDGPU::PHI;
6069 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
6070 case AMDGPU::WQM:
return AMDGPU::WQM;
6071 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
6072 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
6073 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
6074 case AMDGPU::S_ADD_I32:
6075 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
6076 case AMDGPU::S_ADDC_U32:
6077 return AMDGPU::V_ADDC_U32_e32;
6078 case AMDGPU::S_SUB_I32:
6079 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
6082 case AMDGPU::S_ADD_U32:
6083 return AMDGPU::V_ADD_CO_U32_e32;
6084 case AMDGPU::S_SUB_U32:
6085 return AMDGPU::V_SUB_CO_U32_e32;
6086 case AMDGPU::S_ADD_U64_PSEUDO:
6087 return AMDGPU::V_ADD_U64_PSEUDO;
6088 case AMDGPU::S_SUB_U64_PSEUDO:
6089 return AMDGPU::V_SUB_U64_PSEUDO;
6090 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
6091 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
6092 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
6093 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
6094 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
6095 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
6096 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
6097 case AMDGPU::S_XNOR_B32:
6098 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
6099 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
6100 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
6101 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
6102 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6103 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6104 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6105 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6106 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6107 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6108 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6109 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6110 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6111 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6112 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6113 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6114 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6115 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6116 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6117 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6118 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6119 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6120 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6121 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6122 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6123 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6124 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6125 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6126 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6127 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6128 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6129 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6130 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6131 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6132 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6133 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6134 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6135 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6136 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6137 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6138 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6139 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6140 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6141 case AMDGPU::S_CVT_F32_F16:
6142 case AMDGPU::S_CVT_HI_F32_F16:
6143 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6144 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6145 case AMDGPU::S_CVT_F16_F32:
6146 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6147 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6148 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6149 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6150 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6151 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6152 case AMDGPU::S_CEIL_F16:
6153 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6154 : AMDGPU::V_CEIL_F16_fake16_e64;
6155 case AMDGPU::S_FLOOR_F16:
6156 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6157 : AMDGPU::V_FLOOR_F16_fake16_e64;
6158 case AMDGPU::S_TRUNC_F16:
6159 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6160 : AMDGPU::V_TRUNC_F16_fake16_e64;
6161 case AMDGPU::S_RNDNE_F16:
6162 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6163 : AMDGPU::V_RNDNE_F16_fake16_e64;
6164 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6165 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6166 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6167 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6168 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6169 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6170 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6171 case AMDGPU::S_ADD_F16:
6172 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6173 : AMDGPU::V_ADD_F16_fake16_e64;
6174 case AMDGPU::S_SUB_F16:
6175 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6176 : AMDGPU::V_SUB_F16_fake16_e64;
6177 case AMDGPU::S_MIN_F16:
6178 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6179 : AMDGPU::V_MIN_F16_fake16_e64;
6180 case AMDGPU::S_MAX_F16:
6181 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6182 : AMDGPU::V_MAX_F16_fake16_e64;
6183 case AMDGPU::S_MINIMUM_F16:
6184 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6185 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6186 case AMDGPU::S_MAXIMUM_F16:
6187 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6188 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6189 case AMDGPU::S_MUL_F16:
6190 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6191 : AMDGPU::V_MUL_F16_fake16_e64;
6192 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6193 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6194 case AMDGPU::S_FMAC_F16:
6195 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6196 : AMDGPU::V_FMAC_F16_fake16_e64;
6197 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6198 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6199 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6200 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6201 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6202 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6203 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6204 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6205 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6206 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6207 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6208 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6209 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6210 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6211 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6212 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6213 case AMDGPU::S_CMP_LT_F16:
6214 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6215 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6216 case AMDGPU::S_CMP_EQ_F16:
6217 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6218 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6219 case AMDGPU::S_CMP_LE_F16:
6220 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6221 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6222 case AMDGPU::S_CMP_GT_F16:
6223 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6224 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6225 case AMDGPU::S_CMP_LG_F16:
6226 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6227 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6228 case AMDGPU::S_CMP_GE_F16:
6229 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6230 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6231 case AMDGPU::S_CMP_O_F16:
6232 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6233 : AMDGPU::V_CMP_O_F16_fake16_e64;
6234 case AMDGPU::S_CMP_U_F16:
6235 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6236 : AMDGPU::V_CMP_U_F16_fake16_e64;
6237 case AMDGPU::S_CMP_NGE_F16:
6238 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6239 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6240 case AMDGPU::S_CMP_NLG_F16:
6241 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6242 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6243 case AMDGPU::S_CMP_NGT_F16:
6244 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6245 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6246 case AMDGPU::S_CMP_NLE_F16:
6247 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6248 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6249 case AMDGPU::S_CMP_NEQ_F16:
6250 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6251 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6252 case AMDGPU::S_CMP_NLT_F16:
6253 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6254 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6255 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6256 case AMDGPU::V_S_EXP_F16_e64:
6257 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6258 : AMDGPU::V_EXP_F16_fake16_e64;
6259 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6260 case AMDGPU::V_S_LOG_F16_e64:
6261 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6262 : AMDGPU::V_LOG_F16_fake16_e64;
6263 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6264 case AMDGPU::V_S_RCP_F16_e64:
6265 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6266 : AMDGPU::V_RCP_F16_fake16_e64;
6267 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6268 case AMDGPU::V_S_RSQ_F16_e64:
6269 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6270 : AMDGPU::V_RSQ_F16_fake16_e64;
6271 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6272 case AMDGPU::V_S_SQRT_F16_e64:
6273 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6274 : AMDGPU::V_SQRT_F16_fake16_e64;
6277 "Unexpected scalar opcode without corresponding vector one!");
6326 "Not a whole wave func");
6329 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6330 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6337 unsigned OpNo)
const {
6339 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6340 Desc.operands()[OpNo].RegClass == -1) {
6343 if (Reg.isVirtual()) {
6347 return RI.getPhysRegBaseClass(Reg);
6350 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6351 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6359 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6361 unsigned Size = RI.getRegSizeInBits(*RC);
6362 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6363 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6364 : AMDGPU::V_MOV_B32_e32;
6366 Opcode = AMDGPU::COPY;
6367 else if (RI.isSGPRClass(RC))
6368 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6382 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6388 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6399 if (SubIdx == AMDGPU::sub0)
6401 if (SubIdx == AMDGPU::sub1)
6413void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6429 if (Reg.isPhysical())
6439 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6442 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6449 unsigned Opc =
MI.getOpcode();
6455 constexpr AMDGPU::OpName OpNames[] = {
6456 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6459 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6460 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6470 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6471 if (IsAGPR && !ST.hasMAIInsts())
6477 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6478 const int DataIdx = AMDGPU::getNamedOperandIdx(
6479 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6480 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6481 MI.getOperand(DataIdx).isReg() &&
6482 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6484 if ((
int)
OpIdx == DataIdx) {
6485 if (VDstIdx != -1 &&
6486 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6489 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6490 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6491 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6496 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6497 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6498 RI.isSGPRReg(MRI, MO.
getReg()))
6501 if (ST.hasFlatScratchHiInB64InstHazard() &&
6508 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6511 if (!ST.hasDPPSrc1SGPR() &&
isDPP(
MI) && RI.isSGPRReg(MRI, MO.
getReg()) &&
6512 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1))
6532 constexpr unsigned NumOps = 3;
6533 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6534 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6535 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6536 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6541 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6544 MO = &
MI.getOperand(SrcIdx);
6547 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6551 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6555 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6559 return !OpSel && !OpSelHi;
6568 int64_t RegClass = getOpRegClassID(OpInfo);
6570 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6579 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6580 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6584 if (!LiteralLimit--)
6594 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6602 if (--ConstantBusLimit <= 0)
6614 if (!LiteralLimit--)
6616 if (--ConstantBusLimit <= 0)
6622 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6626 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6628 !
Op.isIdenticalTo(*MO))
6638 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6653 bool Is64BitOp = Is64BitFPOp ||
6661 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6670 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6688 bool IsGFX950Only = ST.hasGFX950Insts();
6689 bool IsGFX940Only = ST.hasGFX940Insts();
6691 if (!IsGFX950Only && !IsGFX940Only)
6709 unsigned Opcode =
MI.getOpcode();
6711 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6712 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6713 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6714 case AMDGPU::V_MQSAD_U32_U8_e64:
6715 case AMDGPU::V_PK_ADD_F16:
6716 case AMDGPU::V_PK_ADD_F32:
6717 case AMDGPU::V_PK_ADD_I16:
6718 case AMDGPU::V_PK_ADD_U16:
6719 case AMDGPU::V_PK_ASHRREV_I16:
6720 case AMDGPU::V_PK_FMA_F16:
6721 case AMDGPU::V_PK_FMA_F32:
6722 case AMDGPU::V_PK_FMAC_F16_e32:
6723 case AMDGPU::V_PK_FMAC_F16_e64:
6724 case AMDGPU::V_PK_LSHLREV_B16:
6725 case AMDGPU::V_PK_LSHRREV_B16:
6726 case AMDGPU::V_PK_MAD_I16:
6727 case AMDGPU::V_PK_MAD_U16:
6728 case AMDGPU::V_PK_MAX_F16:
6729 case AMDGPU::V_PK_MAX_I16:
6730 case AMDGPU::V_PK_MAX_U16:
6731 case AMDGPU::V_PK_MIN_F16:
6732 case AMDGPU::V_PK_MIN_I16:
6733 case AMDGPU::V_PK_MIN_U16:
6734 case AMDGPU::V_PK_MOV_B32:
6735 case AMDGPU::V_PK_MUL_F16:
6736 case AMDGPU::V_PK_MUL_F32:
6737 case AMDGPU::V_PK_MUL_LO_U16:
6738 case AMDGPU::V_PK_SUB_I16:
6739 case AMDGPU::V_PK_SUB_U16:
6740 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6749 unsigned Opc =
MI.getOpcode();
6752 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6755 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6761 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6762 RI.isSGPRReg(MRI, Src0.
getReg()))
6768 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6770 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6776 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6787 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6788 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6789 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6801 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6802 RI.isVGPR(MRI, Src1.
getReg())) {
6815 if (HasImplicitSGPR || !
MI.isCommutable()) {
6832 if (CommutedOpc == -1) {
6837 MI.setDesc(
get(CommutedOpc));
6841 bool Src0Kill = Src0.
isKill();
6845 else if (Src1.
isReg()) {
6860 unsigned Opc =
MI.getOpcode();
6863 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6864 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6865 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6868 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6869 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6870 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6871 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6872 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6873 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6874 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6884 if (VOP3Idx[2] != -1) {
6896 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6897 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6899 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6901 SGPRsUsed.
insert(SGPRReg);
6905 for (
int Idx : VOP3Idx) {
6914 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6926 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6933 if (ConstantBusLimit > 0) {
6945 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6946 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6953 for (
unsigned I = 0;
I < 3; ++
I) {
6966 SRC = RI.getCommonSubClass(SRC, DstRC);
6969 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6971 if (RI.hasAGPRs(VRC)) {
6972 VRC = RI.getEquivalentVGPRClass(VRC);
6975 get(TargetOpcode::COPY), NewSrcReg)
6982 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6988 for (
unsigned i = 0; i < SubRegs; ++i) {
6991 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6992 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6998 get(AMDGPU::REG_SEQUENCE), DstReg);
6999 for (
unsigned i = 0; i < SubRegs; ++i) {
7001 MIB.
addImm(RI.getSubRegFromChannel(i));
7014 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
7016 SBase->setReg(SGPR);
7019 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
7027 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
7028 if (OldSAddrIdx < 0)
7041 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
7044 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
7045 if (NewVAddrIdx < 0)
7048 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
7052 if (OldVAddrIdx >= 0) {
7066 if (OldVAddrIdx == NewVAddrIdx) {
7077 assert(OldSAddrIdx == NewVAddrIdx);
7079 if (OldVAddrIdx >= 0) {
7080 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
7081 AMDGPU::OpName::vdst_in);
7085 if (NewVDstIn != -1) {
7086 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
7092 if (NewVDstIn != -1) {
7093 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7134 unsigned OpSubReg =
Op.getSubReg();
7137 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7153 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7156 bool ImpDef = Def->isImplicitDef();
7157 while (!ImpDef && Def && Def->isCopy()) {
7158 if (Def->getOperand(1).getReg().isPhysical())
7161 ImpDef = Def && Def->isImplicitDef();
7163 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7179 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7183 for (
auto [Idx, ScalarOp] :
enumerate(ScalarOps)) {
7184 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7185 unsigned NumSubRegs =
RegSize / 32;
7186 Register VScalarOp = ScalarOp->getReg();
7189 TII.getRegClass(
TII.get(AMDGPU::V_READFIRSTLANE_B32), 1);
7191 if (NumSubRegs == 1) {
7194 TRI->getCommonSubClass(VScalarOpRC, RFLSrcRC);
7195 Common != VScalarOpRC) {
7202 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7207 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7213 CondReg = NewCondReg;
7223 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7224 ScalarOp->setReg(CurReg);
7227 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7228 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7230 ScalarOp->setReg(PhySGPRs[Idx]);
7232 ScalarOp->setIsKill();
7236 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7237 "Unhandled register size");
7239 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7246 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7247 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7250 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7251 .
addReg(VScalarOp, VScalarOpUndef,
7252 TRI->getSubRegFromChannel(Idx + 1));
7259 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7269 if (NumSubRegs <= 2)
7270 Cmp.addReg(VScalarOp);
7272 Cmp.addReg(VScalarOp, VScalarOpUndef,
7273 TRI->getSubRegFromChannel(Idx, 2));
7277 CondReg = NewCondReg;
7287 const auto *SScalarOpRC =
7293 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7294 unsigned Channel = 0;
7295 for (
Register Piece : ReadlanePieces) {
7296 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7300 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7301 ScalarOp->setReg(SScalarOp);
7303 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7304 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7306 ScalarOp->setReg(PhySGPRs[Idx]);
7308 ScalarOp->setIsKill();
7340 assert((PhySGPRs.empty() || PhySGPRs.size() == ScalarOps.
size()) &&
7341 "Physical SGPRs must be empty or match the number of scalar operands");
7347 if (!Begin.isValid())
7349 if (!End.isValid()) {
7355 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7364 std::numeric_limits<unsigned>::max()) !=
7382 for (
auto I = Begin;
I != AfterMI;
I++) {
7383 for (
auto &MO :
I->all_uses())
7419 for (
auto &Succ : RemainderBB->
successors()) {
7444static std::tuple<unsigned, unsigned>
7452 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7453 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7460 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7477 .
addImm(AMDGPU::sub0_sub1)
7483 return std::tuple(RsrcPtr, NewSRsrc);
7520 if (
MI.getOpcode() == AMDGPU::PHI) {
7522 assert(!RI.isSGPRClass(VRC));
7525 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7527 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7543 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7546 if (RI.hasVGPRs(DstRC)) {
7550 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7552 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7570 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7575 if (DstRC != Src0RC) {
7584 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7586 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7592 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7593 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7594 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7595 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7596 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7597 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7598 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7600 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7613 ? AMDGPU::OpName::rsrc
7614 : AMDGPU::OpName::srsrc;
7619 AMDGPU::OpName SampOpName =
7620 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7629 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7637 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7641 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7651 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7652 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7653 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7654 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7656 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7663 bool isSoffsetLegal =
true;
7665 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7666 if (SoffsetIdx != -1) {
7670 isSoffsetLegal =
false;
7674 bool isRsrcLegal =
true;
7676 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7677 if (RsrcIdx != -1) {
7679 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7680 isRsrcLegal =
false;
7684 if (isRsrcLegal && isSoffsetLegal)
7712 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7716 unsigned RsrcPtr, NewSRsrc;
7723 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7724 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7730 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7731 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7744 }
else if (!VAddr && ST.hasAddr64()) {
7748 "FIXME: Need to emit flat atomics here");
7750 unsigned RsrcPtr, NewSRsrc;
7776 MIB.
addImm(CPol->getImm());
7781 MIB.
addImm(TFE->getImm());
7801 MI.removeFromParent();
7806 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7807 .addImm(AMDGPU::sub0)
7808 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7809 .addImm(AMDGPU::sub1);
7812 if (!isSoffsetLegal) {
7823 if (!isSoffsetLegal) {
7835 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7836 if (RsrcIdx != -1) {
7837 DeferredList.insert(
MI);
7842 return DeferredList.contains(
MI);
7852 if (!ST.useRealTrue16Insts())
7855 unsigned Opcode =
MI.getOpcode();
7859 OpIdx >=
get(Opcode).getNumOperands() ||
7860 get(Opcode).operands()[
OpIdx].RegClass == -1)
7864 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7868 if (!RI.isVGPRClass(CurrRC))
7871 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7873 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7874 Op.setSubReg(AMDGPU::lo16);
7875 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7885 Op.setReg(NewDstReg);
7898 assert(
MI->getOpcode() == AMDGPU::SI_CALL_ISEL &&
7899 "This only handle waterfall for SI_CALL_ISEL");
7906 while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
7909 while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
7914 while (End !=
MBB.end() && End->isCopy() &&
7915 MI->definesRegister(End->getOperand(1).getReg(), &RI))
7925 while (!Worklist.
empty()) {
7931 moveToVALUImpl(Worklist, MDT, Inst, WaterFalls, V2SPhyCopiesToErase);
7937 moveToVALUImpl(Worklist, MDT, *Inst, WaterFalls, V2SPhyCopiesToErase);
7939 "Deferred MachineInstr are not supposed to re-populate worklist");
7942 for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : WaterFalls) {
7943 if (Entry.first->getOpcode() == AMDGPU::SI_CALL_ISEL)
7945 Entry.second.SGPRs);
7948 for (std::pair<MachineInstr *, bool> Entry : V2SPhyCopiesToErase)
7950 Entry.first->eraseFromParent();
7958 if (SubRegIndices.
size() <= 1) {
7961 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7968 for (int16_t Indice : SubRegIndices) {
7971 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7978 get(AMDGPU::REG_SEQUENCE), DstReg);
7979 for (
unsigned i = 0; i < SubRegIndices.size(); ++i) {
7981 MIB.
addImm(RI.getSubRegFromChannel(i));
7991 if (DstReg == AMDGPU::M0) {
8004 if (
I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
8006 for (
unsigned i = 0; i <
UseMI->getNumOperands(); ++i) {
8007 if (
UseMI->getOperand(i).isReg() &&
8008 UseMI->getOperand(i).getReg() == DstReg) {
8012 V2SCopyInfo.MOs.push_back(MO);
8013 V2SCopyInfo.SGPRs.push_back(DstReg);
8017 }
else if (
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
8018 I->getOperand(0).isReg() &&
8019 I->getOperand(0).getReg() == DstReg) {
8022 }
else if (
I->readsRegister(DstReg, &RI)) {
8024 V2SPhyCopiesToErase[&Inst] =
false;
8026 if (
I->findRegisterDefOperand(DstReg, &RI))
8048 case AMDGPU::S_ADD_I32:
8049 case AMDGPU::S_SUB_I32: {
8053 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
8061 case AMDGPU::S_MUL_U64:
8062 if (ST.hasVMulU64Inst()) {
8063 NewOpcode = AMDGPU::V_MUL_U64_e64;
8067 splitScalarSMulU64(Worklist, Inst, MDT);
8071 case AMDGPU::S_MUL_U64_U32_PSEUDO:
8072 case AMDGPU::S_MUL_I64_I32_PSEUDO:
8075 splitScalarSMulPseudo(Worklist, Inst, MDT);
8079 case AMDGPU::S_AND_B64:
8080 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
8084 case AMDGPU::S_OR_B64:
8085 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
8089 case AMDGPU::S_XOR_B64:
8090 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
8094 case AMDGPU::S_NAND_B64:
8095 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
8099 case AMDGPU::S_NOR_B64:
8100 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
8104 case AMDGPU::S_XNOR_B64:
8105 if (ST.hasDLInsts())
8106 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
8108 splitScalar64BitXnor(Worklist, Inst, MDT);
8112 case AMDGPU::S_ANDN2_B64:
8113 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
8117 case AMDGPU::S_ORN2_B64:
8118 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
8122 case AMDGPU::S_BREV_B64:
8123 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
8127 case AMDGPU::S_NOT_B64:
8128 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
8132 case AMDGPU::S_BCNT1_I32_B64:
8133 splitScalar64BitBCNT(Worklist, Inst);
8137 case AMDGPU::S_BFE_I64:
8138 splitScalar64BitBFE(Worklist, Inst);
8142 case AMDGPU::S_FLBIT_I32_B64:
8143 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
8146 case AMDGPU::S_FF1_I32_B64:
8147 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
8151 case AMDGPU::S_LSHL_B32:
8152 if (ST.hasOnlyRevVALUShifts()) {
8153 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
8157 case AMDGPU::S_ASHR_I32:
8158 if (ST.hasOnlyRevVALUShifts()) {
8159 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
8163 case AMDGPU::S_LSHR_B32:
8164 if (ST.hasOnlyRevVALUShifts()) {
8165 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
8169 case AMDGPU::S_LSHL_B64:
8170 if (ST.hasOnlyRevVALUShifts()) {
8172 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
8173 : AMDGPU::V_LSHLREV_B64_e64;
8177 case AMDGPU::S_ASHR_I64:
8178 if (ST.hasOnlyRevVALUShifts()) {
8179 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
8183 case AMDGPU::S_LSHR_B64:
8184 if (ST.hasOnlyRevVALUShifts()) {
8185 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
8190 case AMDGPU::S_ABS_I32:
8191 lowerScalarAbs(Worklist, Inst);
8195 case AMDGPU::S_ABSDIFF_I32:
8196 lowerScalarAbsDiff(Worklist, Inst);
8200 case AMDGPU::S_CBRANCH_SCC0:
8201 case AMDGPU::S_CBRANCH_SCC1: {
8204 bool IsSCC = CondReg == AMDGPU::SCC;
8212 case AMDGPU::S_BFE_U64:
8213 case AMDGPU::S_BFM_B64:
8216 case AMDGPU::S_PACK_LL_B32_B16:
8217 case AMDGPU::S_PACK_LH_B32_B16:
8218 case AMDGPU::S_PACK_HL_B32_B16:
8219 case AMDGPU::S_PACK_HH_B32_B16:
8220 movePackToVALU(Worklist, MRI, Inst);
8224 case AMDGPU::S_XNOR_B32:
8225 lowerScalarXnor(Worklist, Inst);
8229 case AMDGPU::S_NAND_B32:
8230 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8234 case AMDGPU::S_NOR_B32:
8235 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8239 case AMDGPU::S_ANDN2_B32:
8240 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8244 case AMDGPU::S_ORN2_B32:
8245 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8253 case AMDGPU::S_ADD_CO_PSEUDO:
8254 case AMDGPU::S_SUB_CO_PSEUDO: {
8255 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8256 ? AMDGPU::V_ADDC_U32_e64
8257 : AMDGPU::V_SUBB_U32_e64;
8258 const auto *CarryRC = RI.getWaveMaskRegClass();
8280 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8284 case AMDGPU::S_UADDO_PSEUDO:
8285 case AMDGPU::S_USUBO_PSEUDO: {
8291 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8292 ? AMDGPU::V_ADD_CO_U32_e64
8293 : AMDGPU::V_SUB_CO_U32_e64;
8305 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8309 case AMDGPU::S_LSHL1_ADD_U32:
8310 case AMDGPU::S_LSHL2_ADD_U32:
8311 case AMDGPU::S_LSHL3_ADD_U32:
8312 case AMDGPU::S_LSHL4_ADD_U32: {
8316 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8317 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8318 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8332 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8336 case AMDGPU::S_CSELECT_B32:
8337 case AMDGPU::S_CSELECT_B64:
8338 lowerSelect(Worklist, Inst, MDT);
8341 case AMDGPU::S_CMP_EQ_I32:
8342 case AMDGPU::S_CMP_LG_I32:
8343 case AMDGPU::S_CMP_GT_I32:
8344 case AMDGPU::S_CMP_GE_I32:
8345 case AMDGPU::S_CMP_LT_I32:
8346 case AMDGPU::S_CMP_LE_I32:
8347 case AMDGPU::S_CMP_EQ_U32:
8348 case AMDGPU::S_CMP_LG_U32:
8349 case AMDGPU::S_CMP_GT_U32:
8350 case AMDGPU::S_CMP_GE_U32:
8351 case AMDGPU::S_CMP_LT_U32:
8352 case AMDGPU::S_CMP_LE_U32:
8353 case AMDGPU::S_CMP_EQ_U64:
8354 case AMDGPU::S_CMP_LG_U64:
8355 case AMDGPU::S_CMP_LT_F32:
8356 case AMDGPU::S_CMP_EQ_F32:
8357 case AMDGPU::S_CMP_LE_F32:
8358 case AMDGPU::S_CMP_GT_F32:
8359 case AMDGPU::S_CMP_LG_F32:
8360 case AMDGPU::S_CMP_GE_F32:
8361 case AMDGPU::S_CMP_O_F32:
8362 case AMDGPU::S_CMP_U_F32:
8363 case AMDGPU::S_CMP_NGE_F32:
8364 case AMDGPU::S_CMP_NLG_F32:
8365 case AMDGPU::S_CMP_NGT_F32:
8366 case AMDGPU::S_CMP_NLE_F32:
8367 case AMDGPU::S_CMP_NEQ_F32:
8368 case AMDGPU::S_CMP_NLT_F32: {
8373 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8387 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8391 case AMDGPU::S_CMP_LT_F16:
8392 case AMDGPU::S_CMP_EQ_F16:
8393 case AMDGPU::S_CMP_LE_F16:
8394 case AMDGPU::S_CMP_GT_F16:
8395 case AMDGPU::S_CMP_LG_F16:
8396 case AMDGPU::S_CMP_GE_F16:
8397 case AMDGPU::S_CMP_O_F16:
8398 case AMDGPU::S_CMP_U_F16:
8399 case AMDGPU::S_CMP_NGE_F16:
8400 case AMDGPU::S_CMP_NLG_F16:
8401 case AMDGPU::S_CMP_NGT_F16:
8402 case AMDGPU::S_CMP_NLE_F16:
8403 case AMDGPU::S_CMP_NEQ_F16:
8404 case AMDGPU::S_CMP_NLT_F16: {
8427 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8431 case AMDGPU::S_CVT_HI_F32_F16: {
8434 if (ST.useRealTrue16Insts()) {
8439 .
addReg(TmpReg, {}, AMDGPU::hi16)
8455 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8459 case AMDGPU::S_MINIMUM_F32:
8460 case AMDGPU::S_MAXIMUM_F32: {
8472 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8476 case AMDGPU::S_MINIMUM_F16:
8477 case AMDGPU::S_MAXIMUM_F16: {
8479 ? &AMDGPU::VGPR_16RegClass
8480 : &AMDGPU::VGPR_32RegClass);
8492 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8496 case AMDGPU::V_S_EXP_F16_e64:
8497 case AMDGPU::V_S_LOG_F16_e64:
8498 case AMDGPU::V_S_RCP_F16_e64:
8499 case AMDGPU::V_S_RSQ_F16_e64:
8500 case AMDGPU::V_S_SQRT_F16_e64: {
8502 ? &AMDGPU::VGPR_16RegClass
8503 : &AMDGPU::VGPR_32RegClass);
8515 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8521 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8529 if (NewOpcode == Opcode) {
8536 V2SPhyCopiesToErase);
8544 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8551 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8582 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8586 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8592 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8599 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8601 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8606 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8614 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8624 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8625 AMDGPU::OpName::src0_modifiers) >= 0)
8629 NewInstr->addOperand(Src);
8632 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8635 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8637 NewInstr.addImm(
Size);
8638 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8642 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8647 "Scalar BFE is only implemented for constant width and offset");
8655 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8656 AMDGPU::OpName::src1_modifiers) >= 0)
8658 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8660 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8661 AMDGPU::OpName::src2_modifiers) >= 0)
8663 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8665 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8667 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8669 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8675 NewInstr->addOperand(
Op);
8682 if (
Op.getReg() == AMDGPU::SCC) {
8684 if (
Op.isDef() && !
Op.isDead())
8685 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8687 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8692 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8693 Register DstReg = NewInstr->getOperand(0).getReg();
8708 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8712std::pair<bool, MachineBasicBlock *>
8715 if (ST.hasAddNoCarryInsts()) {
8727 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8729 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8730 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8741 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8742 return std::pair(
true, NewBB);
8745 return std::pair(
false,
nullptr);
8762 bool IsSCC = (CondReg == AMDGPU::SCC);
8776 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8781 bool CopyFound =
false;
8782 for (MachineInstr &CandI :
8785 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8787 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8789 .
addReg(CandI.getOperand(1).getReg());
8801 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8810 MachineInstr *NewInst;
8811 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8812 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8827 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8842 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8843 : AMDGPU::V_SUB_CO_U32_e32;
8854 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8871 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8872 : AMDGPU::V_SUB_CO_U32_e32;
8885 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8899 if (ST.hasDLInsts()) {
8909 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8915 bool Src0IsSGPR = Src0.
isReg() &&
8917 bool Src1IsSGPR = Src1.
isReg() &&
8931 }
else if (Src1IsSGPR) {
8949 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8955 unsigned Opcode)
const {
8979 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8984 unsigned Opcode)
const {
9008 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
9023 const MCInstrDesc &InstDesc =
get(Opcode);
9024 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9026 &AMDGPU::SGPR_32RegClass;
9028 const TargetRegisterClass *Src0SubRC =
9029 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9032 AMDGPU::sub0, Src0SubRC);
9035 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9036 const TargetRegisterClass *NewDestSubRC =
9037 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9040 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
9043 AMDGPU::sub1, Src0SubRC);
9046 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
9060 Worklist.
insert(&LoHalf);
9061 Worklist.
insert(&HiHalf);
9067 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9090 const TargetRegisterClass *Src0SubRC =
9091 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9092 if (RI.isSGPRClass(Src0SubRC))
9093 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9094 const TargetRegisterClass *Src1SubRC =
9095 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9096 if (RI.isSGPRClass(Src1SubRC))
9097 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9101 MachineOperand Op0L =
9103 MachineOperand Op1L =
9105 MachineOperand Op0H =
9107 MachineOperand Op1H =
9126 MachineInstr *Op1L_Op0H =
9132 MachineInstr *Op1H_Op0L =
9138 MachineInstr *Carry =
9143 MachineInstr *LoHalf =
9153 MachineInstr *HiHalf =
9176 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9199 const TargetRegisterClass *Src0SubRC =
9200 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9201 if (RI.isSGPRClass(Src0SubRC))
9202 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9203 const TargetRegisterClass *Src1SubRC =
9204 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9205 if (RI.isSGPRClass(Src1SubRC))
9206 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9210 MachineOperand Op0L =
9212 MachineOperand Op1L =
9216 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9217 ? AMDGPU::V_MUL_HI_U32_e64
9218 : AMDGPU::V_MUL_HI_I32_e64;
9219 MachineInstr *HiHalf =
9222 MachineInstr *LoHalf =
9241 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9257 const MCInstrDesc &InstDesc =
get(Opcode);
9258 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9260 &AMDGPU::SGPR_32RegClass;
9262 const TargetRegisterClass *Src0SubRC =
9263 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9264 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9266 &AMDGPU::SGPR_32RegClass;
9268 const TargetRegisterClass *Src1SubRC =
9269 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9272 AMDGPU::sub0, Src0SubRC);
9274 AMDGPU::sub0, Src1SubRC);
9276 AMDGPU::sub1, Src0SubRC);
9278 AMDGPU::sub1, Src1SubRC);
9281 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9282 const TargetRegisterClass *NewDestSubRC =
9283 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9286 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9291 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9304 Worklist.
insert(&LoHalf);
9305 Worklist.
insert(&HiHalf);
9308 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9328 MachineOperand* Op0;
9329 MachineOperand* Op1;
9331 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9364 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9365 const TargetRegisterClass *SrcRC = Src.isReg() ?
9367 &AMDGPU::SGPR_32RegClass;
9372 const TargetRegisterClass *SrcSubRC =
9373 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9376 AMDGPU::sub0, SrcSubRC);
9378 AMDGPU::sub1, SrcSubRC);
9388 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9407 Offset == 0 &&
"Not implemented");
9430 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9440 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9443 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9449 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9468 const MCInstrDesc &InstDesc =
get(Opcode);
9470 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9471 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9472 : AMDGPU::V_ADD_CO_U32_e32;
9474 const TargetRegisterClass *SrcRC =
9475 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9476 const TargetRegisterClass *SrcSubRC =
9477 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9479 MachineOperand SrcRegSub0 =
9481 MachineOperand SrcRegSub1 =
9494 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9500 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9504 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9507void SIInstrInfo::addUsersToMoveToVALUWorklist(
9511 MachineInstr &
UseMI = *MO.getParent();
9515 switch (
UseMI.getOpcode()) {
9518 case AMDGPU::SOFT_WQM:
9519 case AMDGPU::STRICT_WWM:
9520 case AMDGPU::STRICT_WQM:
9521 case AMDGPU::REG_SEQUENCE:
9523 case AMDGPU::INSERT_SUBREG:
9526 OpNo = MO.getOperandNo();
9533 if (!RI.hasVectorRegisters(OpRC))
9550 if (ST.useRealTrue16Insts()) {
9552 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9555 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9561 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9564 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9573 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9575 case AMDGPU::S_PACK_LL_B32_B16:
9577 .addReg(SrcReg0, {},
9578 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9579 .addImm(AMDGPU::lo16)
9580 .addReg(SrcReg1, {},
9581 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9582 .addImm(AMDGPU::hi16);
9584 case AMDGPU::S_PACK_LH_B32_B16:
9586 .addReg(SrcReg0, {},
9587 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9588 .addImm(AMDGPU::lo16)
9589 .addReg(SrcReg1, {}, AMDGPU::hi16)
9590 .addImm(AMDGPU::hi16);
9592 case AMDGPU::S_PACK_HL_B32_B16:
9593 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9594 .addImm(AMDGPU::lo16)
9595 .addReg(SrcReg1, {},
9596 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9597 .addImm(AMDGPU::hi16);
9599 case AMDGPU::S_PACK_HH_B32_B16:
9600 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9601 .addImm(AMDGPU::lo16)
9602 .addReg(SrcReg1, {}, AMDGPU::hi16)
9603 .addImm(AMDGPU::hi16);
9611 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9616 case AMDGPU::S_PACK_LL_B32_B16: {
9635 case AMDGPU::S_PACK_LH_B32_B16: {
9645 case AMDGPU::S_PACK_HL_B32_B16: {
9656 case AMDGPU::S_PACK_HH_B32_B16: {
9676 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9685 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9686 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9687 SmallVector<MachineInstr *, 4> CopyToDelete;
9690 for (MachineInstr &
MI :
9694 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9697 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9698 Register DestReg =
MI.getOperand(0).getReg();
9705 MI.getOperand(SCCIdx).setReg(NewCond);
9711 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9714 for (
auto &Copy : CopyToDelete)
9715 Copy->eraseFromParent();
9723void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9729 for (MachineInstr &
MI :
9732 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9734 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9743 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9751 case AMDGPU::REG_SEQUENCE:
9752 case AMDGPU::INSERT_SUBREG:
9754 case AMDGPU::SOFT_WQM:
9755 case AMDGPU::STRICT_WWM:
9756 case AMDGPU::STRICT_WQM: {
9758 if (RI.isAGPRClass(SrcRC)) {
9759 if (RI.isAGPRClass(NewDstRC))
9764 case AMDGPU::REG_SEQUENCE:
9765 case AMDGPU::INSERT_SUBREG:
9766 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9769 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9775 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9778 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9792 int OpIndices[3])
const {
9793 const MCInstrDesc &
Desc =
MI.getDesc();
9809 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9811 for (
unsigned i = 0; i < 3; ++i) {
9812 int Idx = OpIndices[i];
9816 const MachineOperand &MO =
MI.getOperand(Idx);
9822 const TargetRegisterClass *OpRC =
9823 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9824 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9831 if (RI.isSGPRClass(RegRC))
9849 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9850 SGPRReg = UsedSGPRs[0];
9853 if (!SGPRReg && UsedSGPRs[1]) {
9854 if (UsedSGPRs[1] == UsedSGPRs[2])
9855 SGPRReg = UsedSGPRs[1];
9862 AMDGPU::OpName OperandName)
const {
9863 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9866 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9870 return &
MI.getOperand(Idx);
9884 if (ST.isAmdHsaOS()) {
9887 RsrcDataFormat |= (1ULL << 56);
9892 RsrcDataFormat |= (2ULL << 59);
9895 return RsrcDataFormat;
9905 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9910 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9917 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9923 unsigned Opc =
MI.getOpcode();
9929 return get(
Opc).mayLoad() &&
9936 if (!Addr || !Addr->
isFI())
9945 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9947 return MI.getOperand(VDataIdx).getReg();
9957 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9959 return MI.getOperand(DataIdx).getReg();
9993 unsigned Opc =
MI.getOpcode();
9995 unsigned DescSize =
Desc.getSize();
10000 unsigned Size = DescSize;
10004 if (
MI.isBranch() && ST.hasOffset3fBug())
10015 bool HasLiteral =
false;
10016 unsigned LiteralSize = 4;
10017 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
10022 if (ST.has64BitLiterals()) {
10023 switch (OpInfo.OperandType) {
10048 return HasLiteral ? DescSize + LiteralSize : DescSize;
10053 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
10057 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
10058 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
10062 case TargetOpcode::BUNDLE:
10063 return getInstBundleSize(
MI);
10064 case TargetOpcode::INLINEASM:
10065 case TargetOpcode::INLINEASM_BR: {
10067 const char *AsmStr =
MI.getOperand(0).getSymbolName();
10071 if (
MI.isMetaInstruction())
10075 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
10078 unsigned LoInstOpcode = D16Info->LoOp;
10080 DescSize =
Desc.getSize();
10084 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
10087 DescSize =
Desc.getSize();
10096 if (
MI.isBranch() && ST.hasOffset3fBug())
10097 return InstSizeVerifyMode::NoVerify;
10098 return InstSizeVerifyMode::ExactSize;
10105 if (
MI.memoperands_empty())
10117 static const std::pair<int, const char *> TargetIndices[] = {
10156std::pair<unsigned, unsigned>
10163 static const std::pair<unsigned, const char *> TargetFlags[] = {
10181 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10197 return AMDGPU::WWM_COPY;
10199 return AMDGPU::COPY;
10216 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10220 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10221 return IsLRSplitInst;
10234 bool IsNullOrVectorRegister =
true;
10238 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10241 return IsNullOrVectorRegister &&
10243 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10244 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10252 if (ST.hasAddNoCarryInsts())
10268 if (ST.hasAddNoCarryInsts())
10272 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10274 : RS.scavengeRegisterBackwards(
10275 *RI.getBoolRC(),
I,
false,
10288 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10289 case AMDGPU::SI_KILL_I1_TERMINATOR:
10298 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10299 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10300 case AMDGPU::SI_KILL_I1_PSEUDO:
10301 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10313 const unsigned OffsetBits =
10315 return (1 << OffsetBits) - 1;
10319 if (!ST.isWave32())
10322 if (
MI.isInlineAsm())
10325 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10328 for (
auto &
Op :
MI.implicit_operands()) {
10329 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10330 Op.setReg(AMDGPU::VCC_LO);
10339 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10343 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10344 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10360 if (Imm > MaxImm) {
10361 if (Imm <= MaxImm + 64) {
10363 Overflow = Imm - MaxImm;
10382 if (Overflow > 0) {
10390 if (ST.hasRestrictedSOffset())
10395 SOffset = Overflow;
10433 if (!ST.hasFlatInstOffsets())
10437 if (ST.hasFlatSegmentOffsetBug() && FlatVariant == FlatAddrSpace::FLAT &&
10442 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10443 FlatVariant == FlatAddrSpace::FlatScratch &&
Offset < 0 &&
10454std::pair<int64_t, int64_t>
10457 int64_t RemainderOffset = COffsetVal;
10458 int64_t ImmField = 0;
10463 if (AllowNegative) {
10465 int64_t
D = 1LL << NumBits;
10466 RemainderOffset = (COffsetVal /
D) *
D;
10467 ImmField = COffsetVal - RemainderOffset;
10469 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10471 (ImmField % 4) != 0) {
10473 RemainderOffset += ImmField % 4;
10474 ImmField -= ImmField % 4;
10476 }
else if (COffsetVal >= 0) {
10478 RemainderOffset = COffsetVal - ImmField;
10482 assert(RemainderOffset + ImmField == COffsetVal);
10483 return {ImmField, RemainderOffset};
10488 if (ST.hasNegativeScratchOffsetBug() &&
10496 switch (ST.getGeneration()) {
10525 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10526 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10527 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10528 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10529 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10530 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10531 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10532 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10539#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10540 case OPCODE##_dpp: \
10541 case OPCODE##_e32: \
10542 case OPCODE##_e64: \
10543 case OPCODE##_e64_dpp: \
10544 case OPCODE##_sdwa:
10558 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10559 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10560 case AMDGPU::V_FMA_F16_gfx9_e64:
10561 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10562 case AMDGPU::V_INTERP_P2_F16:
10563 case AMDGPU::V_MAD_F16_e64:
10564 case AMDGPU::V_MAD_U16_e64:
10565 case AMDGPU::V_MAD_I16_e64:
10574 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10588 switch (ST.getGeneration()) {
10601 if (
isMAI(Opcode)) {
10609 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10612 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10619 if (ST.hasGFX90AInsts()) {
10620 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10621 if (ST.hasGFX940Insts())
10623 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10625 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10627 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10633 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10652 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10653 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10654 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10666 switch (
MI.getOpcode()) {
10668 case AMDGPU::REG_SEQUENCE:
10672 case AMDGPU::INSERT_SUBREG:
10673 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10690 if (!
P.Reg.isVirtual())
10695 while (
auto *
MI = DefInst) {
10697 switch (
MI->getOpcode()) {
10699 case AMDGPU::V_MOV_B32_e32: {
10700 auto &Op1 =
MI->getOperand(1);
10729 auto *DefBB =
DefMI.getParent();
10733 if (
UseMI.getParent() != DefBB)
10736 const int MaxInstScan = 20;
10740 auto E =
UseMI.getIterator();
10741 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10742 if (
I->isDebugInstr())
10745 if (++NumInst > MaxInstScan)
10748 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10761 auto *DefBB =
DefMI.getParent();
10763 const int MaxUseScan = 10;
10767 auto &UseInst = *
Use.getParent();
10770 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10773 if (++NumUse > MaxUseScan)
10780 const int MaxInstScan = 20;
10784 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10787 if (
I->isDebugInstr())
10790 if (++NumInst > MaxInstScan)
10803 if (Reg == VReg && --NumUse == 0)
10805 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10814 auto Cur =
MBB.begin();
10815 if (Cur !=
MBB.end())
10817 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10820 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10829 if (InsPt !=
MBB.end() &&
10830 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10831 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10832 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10833 InsPt->definesRegister(Src,
nullptr)) {
10837 .
addReg(Src, {}, SrcSubReg)
10880 if (isFullCopyInstr(
MI)) {
10881 Register DstReg =
MI.getOperand(0).getReg();
10882 Register SrcReg =
MI.getOperand(1).getReg();
10904 unsigned *PredCost)
const {
10905 if (
MI.isBundle()) {
10908 unsigned Lat = 0,
Count = 0;
10909 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10911 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10913 return Lat +
Count - 1;
10916 return SchedModel.computeInstrLatency(&
MI);
10923 return *CallAddrOp;
10930 unsigned Opcode =
MI.getOpcode();
10932 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10935 :
MI.getOperand(1).getReg();
10939 unsigned SrcAS = SrcTy.getAddressSpace();
10942 ST.hasGloballyAddressableScratch()
10950 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10951 return HandleAddrSpaceCast(
MI);
10954 auto IID = GI->getIntrinsicID();
10961 case Intrinsic::amdgcn_addrspacecast_nonnull:
10962 return HandleAddrSpaceCast(
MI);
10963 case Intrinsic::amdgcn_if:
10964 case Intrinsic::amdgcn_else:
10978 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10979 Opcode == AMDGPU::G_SEXTLOAD) {
10980 if (
MI.memoperands_empty())
10984 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10985 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10993 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10994 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10995 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
11001 if (Opcode == TargetOpcode::G_DYN_STACKALLOC)
11004 if (Opcode == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
11012 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
11013 return Formatter.get();
11021 unsigned opcode =
MI.getOpcode();
11022 if (opcode == AMDGPU::V_READLANE_B32 ||
11023 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
11024 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
11029 if (
MI.isInlineAsm()) {
11035 if (!RC || !RI.isSGPRClass(RC))
11040 if (isCopyInstr(
MI)) {
11044 RI.getPhysRegBaseClass(srcOp.
getReg());
11052 if (
MI.isPreISelOpcode())
11067 if (
MI.memoperands_empty())
11071 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
11072 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
11087 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
11089 if (!
SrcOp.isReg())
11093 if (!Reg || !
SrcOp.readsReg())
11099 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
11126 F,
"ds_ordered_count unsupported for this calling conv"));
11140 Register &SrcReg2, int64_t &CmpMask,
11141 int64_t &CmpValue)
const {
11142 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
11145 switch (
MI.getOpcode()) {
11148 case AMDGPU::S_CMP_EQ_U32:
11149 case AMDGPU::S_CMP_EQ_I32:
11150 case AMDGPU::S_CMP_LG_U32:
11151 case AMDGPU::S_CMP_LG_I32:
11152 case AMDGPU::S_CMP_LT_U32:
11153 case AMDGPU::S_CMP_LT_I32:
11154 case AMDGPU::S_CMP_GT_U32:
11155 case AMDGPU::S_CMP_GT_I32:
11156 case AMDGPU::S_CMP_LE_U32:
11157 case AMDGPU::S_CMP_LE_I32:
11158 case AMDGPU::S_CMP_GE_U32:
11159 case AMDGPU::S_CMP_GE_I32:
11160 case AMDGPU::S_CMP_EQ_U64:
11161 case AMDGPU::S_CMP_LG_U64:
11162 SrcReg =
MI.getOperand(0).getReg();
11163 if (
MI.getOperand(1).isReg()) {
11164 if (
MI.getOperand(1).getSubReg())
11166 SrcReg2 =
MI.getOperand(1).getReg();
11168 }
else if (
MI.getOperand(1).isImm()) {
11170 CmpValue =
MI.getOperand(1).getImm();
11176 case AMDGPU::S_CMPK_EQ_U32:
11177 case AMDGPU::S_CMPK_EQ_I32:
11178 case AMDGPU::S_CMPK_LG_U32:
11179 case AMDGPU::S_CMPK_LG_I32:
11180 case AMDGPU::S_CMPK_LT_U32:
11181 case AMDGPU::S_CMPK_LT_I32:
11182 case AMDGPU::S_CMPK_GT_U32:
11183 case AMDGPU::S_CMPK_GT_I32:
11184 case AMDGPU::S_CMPK_LE_U32:
11185 case AMDGPU::S_CMPK_LE_I32:
11186 case AMDGPU::S_CMPK_GE_U32:
11187 case AMDGPU::S_CMPK_GE_I32:
11188 SrcReg =
MI.getOperand(0).getReg();
11190 CmpValue =
MI.getOperand(1).getImm();
11200 if (S->isLiveIn(AMDGPU::SCC))
11209bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
11212 bool SCCIsDead =
false;
11215 constexpr unsigned ScanLimit = 12;
11216 unsigned Count = 0;
11217 for (MachineInstr &
MI :
11219 if (++
Count > ScanLimit)
11221 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
11222 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
11223 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
11224 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11225 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11230 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11243 for (MachineInstr *
MI : InvertInstr) {
11244 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11245 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11247 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11248 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11249 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11250 ? AMDGPU::S_CBRANCH_SCC1
11251 : AMDGPU::S_CBRANCH_SCC0));
11264 bool NeedInversion)
const {
11265 MachineInstr *KillsSCC =
nullptr;
11270 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11272 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11275 if (NeedInversion && !invertSCCUse(SCCRedefine))
11277 if (MachineOperand *SccDef =
11279 SccDef->setIsDead(
false);
11287 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11288 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11290 bool Op1IsNonZeroImm =
11291 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11292 bool Op2IsZeroImm =
11293 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11294 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11300 unsigned &NewDefOpc) {
11303 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11304 Def.getOpcode() != AMDGPU::S_ADD_U32)
11310 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11316 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11318 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11321 NewDefOpc = AMDGPU::S_ADD_U32;
11323 NeedInversion = !NeedInversion;
11328 Register SrcReg2, int64_t CmpMask,
11337 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11338 this](
bool NeedInversion) ->
bool {
11362 unsigned NewDefOpc = Def->getOpcode();
11368 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11371 if (NewDefOpc != Def->getOpcode())
11372 Def->setDesc(
get(NewDefOpc));
11381 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11388 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11396 optimizeSCC(
Select, Def,
false);
11403 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11404 this](int64_t ExpectedValue,
unsigned SrcSize,
11405 bool IsReversible,
bool IsSigned) ->
bool {
11433 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11434 Def->getOpcode() != AMDGPU::S_AND_B64)
11438 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11449 SrcOp = &Def->getOperand(2);
11450 else if (isMask(&Def->getOperand(2)))
11451 SrcOp = &Def->getOperand(1);
11459 if (IsSigned && BitNo == SrcSize - 1)
11462 ExpectedValue <<= BitNo;
11464 bool IsReversedCC =
false;
11465 if (CmpValue != ExpectedValue) {
11468 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11473 Register DefReg = Def->getOperand(0).getReg();
11477 if (!optimizeSCC(Def, &CmpInstr,
false))
11488 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11489 : AMDGPU::S_BITCMP1_B32
11490 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11491 : AMDGPU::S_BITCMP1_B64;
11496 Def->eraseFromParent();
11504 case AMDGPU::S_CMP_EQ_U32:
11505 case AMDGPU::S_CMP_EQ_I32:
11506 case AMDGPU::S_CMPK_EQ_U32:
11507 case AMDGPU::S_CMPK_EQ_I32:
11508 return optimizeCmpAnd(1, 32,
true,
false) ||
11509 optimizeCmpSelect(
true);
11510 case AMDGPU::S_CMP_GE_U32:
11511 case AMDGPU::S_CMPK_GE_U32:
11512 return optimizeCmpAnd(1, 32,
false,
false);
11513 case AMDGPU::S_CMP_GE_I32:
11514 case AMDGPU::S_CMPK_GE_I32:
11515 return optimizeCmpAnd(1, 32,
false,
true);
11516 case AMDGPU::S_CMP_EQ_U64:
11517 return optimizeCmpAnd(1, 64,
true,
false);
11518 case AMDGPU::S_CMP_LG_U32:
11519 case AMDGPU::S_CMP_LG_I32:
11520 case AMDGPU::S_CMPK_LG_U32:
11521 case AMDGPU::S_CMPK_LG_I32:
11522 return optimizeCmpAnd(0, 32,
true,
false) ||
11523 optimizeCmpSelect(
false);
11524 case AMDGPU::S_CMP_GT_U32:
11525 case AMDGPU::S_CMPK_GT_U32:
11526 return optimizeCmpAnd(0, 32,
false,
false);
11527 case AMDGPU::S_CMP_GT_I32:
11528 case AMDGPU::S_CMPK_GT_I32:
11529 return optimizeCmpAnd(0, 32,
false,
true);
11530 case AMDGPU::S_CMP_LG_U64:
11531 return optimizeCmpAnd(0, 64,
true,
false) ||
11532 optimizeCmpSelect(
false);
11539 AMDGPU::OpName
OpName)
const {
11540 if (!ST.needsAlignedVGPRs())
11543 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11555 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11557 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11561 : &AMDGPU::VReg_64_Align2RegClass);
11563 .
addReg(DataReg, {},
Op.getSubReg())
11568 Op.setSubReg(AMDGPU::sub0);
11583 if (ST.hasGFX1250Insts())
11590 unsigned Opcode =
MI.getOpcode();
11596 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11597 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11600 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={})
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static MachineBasicBlock * generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr, ArrayRef< Register > PhySGPRs={})
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static constexpr AMDGPU::OpName ModifierOpNames[]
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
uint64_t getZExtValue() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
void storeRegToStackSlotCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
static bool isXcntDrain(const MachineInstr &MI)
True if MI implicitly drains XCNT.
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
InstSizeVerifyMode getInstSizeVerifyMode(const MachineInstr &MI) const override
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool hasRAWDependency(const MachineInstr &FirstMI, const MachineInstr &SecondMI) const
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
void handleCopyToPhysHelper(SIInstrWorklist &Worklist, Register DstReg, MachineInstr &Inst, MachineRegisterInfo &MRI, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool isVOPDAntidependencyAllowed(const MachineInstr &MI) const
If OpX is multicycle, anti-dependencies are not allowed.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void createWaterFallForSiCall(MachineInstr *MI, MachineDominatorTree *MDT, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={}) const
Wrapper function for generating waterfall for instruction MI This function take into consideration of...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isLegalGFX12PlusPackedMathFP32or64BitOperand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 or 64 instructions.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI, bool NeedsCFI) const
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void createReadFirstLaneFromCopyToPhysReg(MachineRegisterInfo &MRI, Register DstReg, MachineInstr &Inst) const
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool allowNegativeFlatOffset(AMDGPU::FlatAddrSpace FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool isPackedFP32or64BitInst(unsigned Opc)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.