34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
158 if (
MI.isConvergent())
185 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
190 for (
auto Op :
MI.uses()) {
191 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
197 if (FromCycle ==
nullptr)
203 while (FromCycle && !FromCycle->
contains(ToCycle)) {
223 int64_t &Offset1)
const {
231 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
235 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
251 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
252 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
253 if (Offset0Idx == -1 || Offset1Idx == -1)
260 Offset0Idx -=
get(Opc0).NumDefs;
261 Offset1Idx -=
get(Opc1).NumDefs;
291 if (!Load0Offset || !Load1Offset)
308 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
309 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
311 if (OffIdx0 == -1 || OffIdx1 == -1)
317 OffIdx0 -=
get(Opc0).NumDefs;
318 OffIdx1 -=
get(Opc1).NumDefs;
337 case AMDGPU::DS_READ2ST64_B32:
338 case AMDGPU::DS_READ2ST64_B64:
339 case AMDGPU::DS_WRITE2ST64_B32:
340 case AMDGPU::DS_WRITE2ST64_B64:
355 OffsetIsScalable =
false;
372 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
374 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
375 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
388 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
389 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
390 if (Offset0 + 1 != Offset1)
401 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
409 Offset = EltSize * Offset0;
411 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
412 if (DataOpIdx == -1) {
413 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
415 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
431 if (BaseOp && !BaseOp->
isFI())
439 if (SOffset->
isReg())
445 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
447 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
456 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
457 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
459 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
460 if (VAddr0Idx >= 0) {
462 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
469 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
484 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
503 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
520 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
528 if (MO1->getAddrSpace() != MO2->getAddrSpace())
531 const auto *Base1 = MO1->getValue();
532 const auto *Base2 = MO2->getValue();
533 if (!Base1 || !Base2)
541 return Base1 == Base2;
545 int64_t Offset1,
bool OffsetIsScalable1,
547 int64_t Offset2,
bool OffsetIsScalable2,
548 unsigned ClusterSize,
549 unsigned NumBytes)
const {
562 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
581 const unsigned LoadSize = NumBytes / ClusterSize;
582 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
583 return NumDWords <= MaxMemoryClusterDWords;
597 int64_t Offset0, int64_t Offset1,
598 unsigned NumLoads)
const {
599 assert(Offset1 > Offset0 &&
600 "Second offset should be larger than first offset!");
605 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
612 const char *Msg =
"illegal VGPR to SGPR copy") {
633 assert((
TII.getSubtarget().hasMAIInsts() &&
634 !
TII.getSubtarget().hasGFX90AInsts()) &&
635 "Expected GFX908 subtarget.");
638 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
639 "Source register of the copy should be either an SGPR or an AGPR.");
642 "Destination register of the copy should be an AGPR.");
651 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
654 if (!Def->modifiesRegister(SrcReg, &RI))
657 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
658 Def->getOperand(0).getReg() != SrcReg)
665 bool SafeToPropagate =
true;
668 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
669 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
670 SafeToPropagate =
false;
672 if (!SafeToPropagate)
675 for (
auto I = Def;
I !=
MI; ++
I)
676 I->clearRegisterKills(DefOp.
getReg(), &RI);
685 if (ImpUseSuperReg) {
686 Builder.addReg(ImpUseSuperReg,
694 RS.enterBasicBlockEnd(
MBB);
695 RS.backward(std::next(
MI));
704 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
707 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
708 "VGPR used for an intermediate copy should have been reserved.");
713 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
723 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
724 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
725 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
732 if (ImpUseSuperReg) {
733 UseBuilder.
addReg(ImpUseSuperReg,
754 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
755 int16_t SubIdx = BaseIndices[Idx];
756 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
757 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
758 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
759 unsigned Opcode = AMDGPU::S_MOV_B32;
762 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
763 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
764 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
768 DestSubReg = RI.getSubReg(DestReg, SubIdx);
769 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
770 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
771 Opcode = AMDGPU::S_MOV_B64;
786 assert(FirstMI && LastMI);
794 LastMI->addRegisterKilled(SrcReg, &RI);
800 Register SrcReg,
bool KillSrc,
bool RenamableDest,
801 bool RenamableSrc)
const {
803 unsigned Size = RI.getRegSizeInBits(*RC);
805 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
811 if (((
Size == 16) != (SrcSize == 16))) {
813 assert(ST.useRealTrue16Insts());
815 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
818 if (DestReg == SrcReg) {
824 RC = RI.getPhysRegBaseClass(DestReg);
825 Size = RI.getRegSizeInBits(*RC);
826 SrcRC = RI.getPhysRegBaseClass(SrcReg);
827 SrcSize = RI.getRegSizeInBits(*SrcRC);
831 if (RC == &AMDGPU::VGPR_32RegClass) {
833 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
834 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
835 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
836 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
842 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
843 RC == &AMDGPU::SReg_32RegClass) {
844 if (SrcReg == AMDGPU::SCC) {
851 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
852 if (DestReg == AMDGPU::VCC_LO) {
870 if (RC == &AMDGPU::SReg_64RegClass) {
871 if (SrcReg == AMDGPU::SCC) {
878 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
879 if (DestReg == AMDGPU::VCC) {
897 if (DestReg == AMDGPU::SCC) {
900 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
904 assert(ST.hasScalarCompareEq64());
918 if (RC == &AMDGPU::AGPR_32RegClass) {
919 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
920 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
926 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
935 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
942 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
943 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
945 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
946 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
947 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
948 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
951 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
952 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
965 if (IsAGPRDst || IsAGPRSrc) {
966 if (!DstLow || !SrcLow) {
968 "Cannot use hi16 subreg with an AGPR!");
975 if (ST.useRealTrue16Insts()) {
981 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
982 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
994 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg on VI!");
1020 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1021 if (ST.hasVMovB64Inst()) {
1026 if (ST.hasPkMovB32()) {
1042 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1043 if (RI.isSGPRClass(RC)) {
1044 if (!RI.isSGPRClass(SrcRC)) {
1048 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1054 unsigned EltSize = 4;
1055 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1056 if (RI.isAGPRClass(RC)) {
1057 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1058 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1059 else if (RI.hasVGPRs(SrcRC) ||
1060 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1061 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1063 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1064 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1065 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1066 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1067 (RI.isProperlyAlignedRC(*RC) &&
1068 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1070 if (ST.hasVMovB64Inst()) {
1071 Opcode = AMDGPU::V_MOV_B64_e32;
1073 }
else if (ST.hasPkMovB32()) {
1074 Opcode = AMDGPU::V_PK_MOV_B32;
1084 std::unique_ptr<RegScavenger> RS;
1085 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1086 RS = std::make_unique<RegScavenger>();
1092 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1093 const bool CanKillSuperReg = KillSrc && !Overlap;
1095 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1098 SubIdx = SubIndices[Idx];
1100 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1101 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1102 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1103 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1105 bool IsFirstSubreg = Idx == 0;
1106 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1108 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1112 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1113 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1159 return &AMDGPU::VGPR_32RegClass;
1172 "Not a VGPR32 reg");
1174 if (
Cond.size() == 1) {
1184 }
else if (
Cond.size() == 2) {
1185 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1187 case SIInstrInfo::SCC_TRUE: {
1198 case SIInstrInfo::SCC_FALSE: {
1209 case SIInstrInfo::VCCNZ: {
1223 case SIInstrInfo::VCCZ: {
1237 case SIInstrInfo::EXECNZ: {
1250 case SIInstrInfo::EXECZ: {
1300 int64_t &ImmVal)
const {
1301 switch (
MI.getOpcode()) {
1302 case AMDGPU::V_MOV_B32_e32:
1303 case AMDGPU::S_MOV_B32:
1304 case AMDGPU::S_MOVK_I32:
1305 case AMDGPU::S_MOV_B64:
1306 case AMDGPU::V_MOV_B64_e32:
1307 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1308 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1309 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1310 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1311 case AMDGPU::V_MOV_B64_PSEUDO:
1312 case AMDGPU::V_MOV_B16_t16_e32: {
1316 return MI.getOperand(0).getReg() == Reg;
1321 case AMDGPU::V_MOV_B16_t16_e64: {
1323 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1325 return MI.getOperand(0).getReg() == Reg;
1330 case AMDGPU::S_BREV_B32:
1331 case AMDGPU::V_BFREV_B32_e32:
1332 case AMDGPU::V_BFREV_B32_e64: {
1336 return MI.getOperand(0).getReg() == Reg;
1341 case AMDGPU::S_NOT_B32:
1342 case AMDGPU::V_NOT_B32_e32:
1343 case AMDGPU::V_NOT_B32_e64: {
1346 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1347 return MI.getOperand(0).getReg() == Reg;
1357std::optional<int64_t>
1362 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1363 return std::nullopt;
1366 if (Def && Def->isMoveImmediate()) {
1372 return std::nullopt;
1377 if (RI.isAGPRClass(DstRC))
1378 return AMDGPU::COPY;
1379 if (RI.getRegSizeInBits(*DstRC) == 16) {
1382 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1384 if (RI.getRegSizeInBits(*DstRC) == 32)
1385 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1386 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1387 return AMDGPU::S_MOV_B64;
1388 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1389 return AMDGPU::V_MOV_B64_PSEUDO;
1390 return AMDGPU::COPY;
1395 bool IsIndirectSrc)
const {
1396 if (IsIndirectSrc) {
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1455 if (VecSize <= 1024)
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1488 if (VecSize <= 1024)
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1536 if (VecSize <= 1024)
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1544 bool IsSGPR)
const {
1556 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1563 return AMDGPU::SI_SPILL_S32_SAVE;
1565 return AMDGPU::SI_SPILL_S64_SAVE;
1567 return AMDGPU::SI_SPILL_S96_SAVE;
1569 return AMDGPU::SI_SPILL_S128_SAVE;
1571 return AMDGPU::SI_SPILL_S160_SAVE;
1573 return AMDGPU::SI_SPILL_S192_SAVE;
1575 return AMDGPU::SI_SPILL_S224_SAVE;
1577 return AMDGPU::SI_SPILL_S256_SAVE;
1579 return AMDGPU::SI_SPILL_S288_SAVE;
1581 return AMDGPU::SI_SPILL_S320_SAVE;
1583 return AMDGPU::SI_SPILL_S352_SAVE;
1585 return AMDGPU::SI_SPILL_S384_SAVE;
1587 return AMDGPU::SI_SPILL_S512_SAVE;
1589 return AMDGPU::SI_SPILL_S1024_SAVE;
1598 return AMDGPU::SI_SPILL_V16_SAVE;
1600 return AMDGPU::SI_SPILL_V32_SAVE;
1602 return AMDGPU::SI_SPILL_V64_SAVE;
1604 return AMDGPU::SI_SPILL_V96_SAVE;
1606 return AMDGPU::SI_SPILL_V128_SAVE;
1608 return AMDGPU::SI_SPILL_V160_SAVE;
1610 return AMDGPU::SI_SPILL_V192_SAVE;
1612 return AMDGPU::SI_SPILL_V224_SAVE;
1614 return AMDGPU::SI_SPILL_V256_SAVE;
1616 return AMDGPU::SI_SPILL_V288_SAVE;
1618 return AMDGPU::SI_SPILL_V320_SAVE;
1620 return AMDGPU::SI_SPILL_V352_SAVE;
1622 return AMDGPU::SI_SPILL_V384_SAVE;
1624 return AMDGPU::SI_SPILL_V512_SAVE;
1626 return AMDGPU::SI_SPILL_V1024_SAVE;
1635 return AMDGPU::SI_SPILL_AV32_SAVE;
1637 return AMDGPU::SI_SPILL_AV64_SAVE;
1639 return AMDGPU::SI_SPILL_AV96_SAVE;
1641 return AMDGPU::SI_SPILL_AV128_SAVE;
1643 return AMDGPU::SI_SPILL_AV160_SAVE;
1645 return AMDGPU::SI_SPILL_AV192_SAVE;
1647 return AMDGPU::SI_SPILL_AV224_SAVE;
1649 return AMDGPU::SI_SPILL_AV256_SAVE;
1651 return AMDGPU::SI_SPILL_AV288_SAVE;
1653 return AMDGPU::SI_SPILL_AV320_SAVE;
1655 return AMDGPU::SI_SPILL_AV352_SAVE;
1657 return AMDGPU::SI_SPILL_AV384_SAVE;
1659 return AMDGPU::SI_SPILL_AV512_SAVE;
1661 return AMDGPU::SI_SPILL_AV1024_SAVE;
1668 bool IsVectorSuperClass) {
1673 if (IsVectorSuperClass)
1674 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1676 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1682 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1689 if (ST.hasMAIInsts())
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize = RI.getSpillSize(*RC);
1712 if (RI.isSGPRClass(RC)) {
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1734 if (RI.spillSGPRToVGPR())
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1873 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1880 if (ST.hasMAIInsts())
1883 assert(!RI.isAGPRClass(RC));
1897 unsigned SpillSize = RI.getSpillSize(*RC);
1904 FrameInfo.getObjectAlign(FrameIndex));
1906 if (RI.isSGPRClass(RC)) {
1908 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1909 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1910 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1915 if (DestReg.
isVirtual() && SpillSize == 4) {
1920 if (RI.spillSGPRToVGPR())
1946 unsigned Quantity)
const {
1948 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, MaxSNopCount);
1957 auto *MF =
MBB.getParent();
1960 assert(Info->isEntryFunction());
1962 if (
MBB.succ_empty()) {
1963 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1964 if (HasNoTerminator) {
1965 if (Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1985 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1986 MBB.splitAt(
MI,
false);
1990 MBB.addSuccessor(TrapBB);
2000 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2004 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2009 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2010 .
addUse(DoorbellRegMasked)
2011 .
addImm(ECQueueWaveAbort);
2012 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2013 .
addUse(SetWaveAbortBit);
2016 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2027 return MBB.getNextNode();
2031 switch (
MI.getOpcode()) {
2033 if (
MI.isMetaInstruction())
2038 return MI.getOperand(0).getImm() + 1;
2048 switch (
MI.getOpcode()) {
2050 case AMDGPU::S_MOV_B64_term:
2053 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2056 case AMDGPU::S_MOV_B32_term:
2059 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2062 case AMDGPU::S_XOR_B64_term:
2065 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2068 case AMDGPU::S_XOR_B32_term:
2071 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2073 case AMDGPU::S_OR_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_OR_B64));
2078 case AMDGPU::S_OR_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_OR_B32));
2084 case AMDGPU::S_ANDN2_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2090 case AMDGPU::S_ANDN2_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2096 case AMDGPU::S_AND_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_AND_B64));
2102 case AMDGPU::S_AND_B32_term:
2105 MI.setDesc(
get(AMDGPU::S_AND_B32));
2108 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2111 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2114 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2120 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2121 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2124 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2125 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2131 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2134 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2137 int64_t Imm =
MI.getOperand(1).getImm();
2139 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2140 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2145 MI.eraseFromParent();
2151 case AMDGPU::V_MOV_B64_PSEUDO: {
2153 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2154 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2162 if (ST.hasVMovB64Inst() && Mov64RC->
contains(Dst)) {
2163 MI.setDesc(Mov64Desc);
2168 if (
SrcOp.isImm()) {
2170 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2171 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2195 if (ST.hasPkMovB32() &&
2214 MI.eraseFromParent();
2217 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2221 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2225 if (ST.has64BitLiterals()) {
2226 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2232 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2237 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2238 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2240 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2241 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2246 MI.eraseFromParent();
2249 case AMDGPU::V_SET_INACTIVE_B32: {
2253 .
add(
MI.getOperand(3))
2254 .
add(
MI.getOperand(4))
2255 .
add(
MI.getOperand(1))
2256 .
add(
MI.getOperand(2))
2257 .
add(
MI.getOperand(5));
2258 MI.eraseFromParent();
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2264 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2297 if (RI.hasVGPRs(EltRC)) {
2298 Opc = AMDGPU::V_MOVRELD_B32_e32;
2300 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2301 : AMDGPU::S_MOVRELD_B32;
2306 bool IsUndef =
MI.getOperand(1).isUndef();
2307 unsigned SubReg =
MI.getOperand(3).getImm();
2308 assert(VecReg ==
MI.getOperand(1).getReg());
2313 .
add(
MI.getOperand(2))
2317 const int ImpDefIdx =
2319 const int ImpUseIdx = ImpDefIdx + 1;
2321 MI.eraseFromParent();
2324 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2336 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2337 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2338 assert(ST.useVGPRIndexMode());
2340 bool IsUndef =
MI.getOperand(1).isUndef();
2349 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2353 .
add(
MI.getOperand(2))
2357 const int ImpDefIdx =
2359 const int ImpUseIdx = ImpDefIdx + 1;
2366 MI.eraseFromParent();
2369 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2380 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2381 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2382 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2383 assert(ST.useVGPRIndexMode());
2386 bool IsUndef =
MI.getOperand(1).isUndef();
2390 .
add(
MI.getOperand(2))
2403 MI.eraseFromParent();
2406 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2409 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2410 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2429 if (ST.hasGetPCZeroExtension()) {
2433 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2440 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2450 MI.eraseFromParent();
2453 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2463 Op.setOffset(
Op.getOffset() + 4);
2465 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2469 MI.eraseFromParent();
2472 case AMDGPU::ENTER_STRICT_WWM: {
2478 case AMDGPU::ENTER_STRICT_WQM: {
2485 MI.eraseFromParent();
2488 case AMDGPU::EXIT_STRICT_WWM:
2489 case AMDGPU::EXIT_STRICT_WQM: {
2495 case AMDGPU::SI_RETURN: {
2509 MI.eraseFromParent();
2513 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2514 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2515 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2518 case AMDGPU::S_GETPC_B64_pseudo:
2519 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2520 if (ST.hasGetPCZeroExtension()) {
2522 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2531 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2532 assert(ST.hasBF16PackedInsts());
2533 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2544 case AMDGPU::GET_STACK_BASE:
2547 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2554 Register DestReg =
MI.getOperand(0).getReg();
2564 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2565 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2566 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2569 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2572 MI.getNumExplicitOperands());
2590 case AMDGPU::S_MOV_B64:
2591 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2600 if (UsedLanes.
all())
2605 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2606 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2608 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2609 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2611 if (NeedLo && NeedHi)
2615 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2617 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2626 case AMDGPU::S_LOAD_DWORDX16_IMM:
2627 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2640 for (
auto &CandMO :
I->operands()) {
2641 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2649 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2653 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2659 unsigned NewOpcode = -1;
2660 if (SubregSize == 256)
2661 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2662 else if (SubregSize == 128)
2663 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2673 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2678 MI->getOperand(0).setReg(DestReg);
2679 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2683 OffsetMO->
setImm(FinalOffset);
2689 MI->setMemRefs(*MF, NewMMOs);
2702std::pair<MachineInstr*, MachineInstr*>
2704 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2706 if (ST.hasVMovB64Inst() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2709 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2710 return std::pair(&
MI,
nullptr);
2721 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2723 if (Dst.isPhysical()) {
2724 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2731 for (
unsigned I = 1;
I <= 2; ++
I) {
2734 if (
SrcOp.isImm()) {
2736 Imm.ashrInPlace(Part * 32);
2737 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2741 if (Src.isPhysical())
2742 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2749 MovDPP.addImm(MO.getImm());
2751 Split[Part] = MovDPP;
2755 if (Dst.isVirtual())
2762 MI.eraseFromParent();
2763 return std::pair(Split[0], Split[1]);
2766std::optional<DestSourcePair>
2768 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2771 return std::nullopt;
2775 AMDGPU::OpName Src0OpName,
2777 AMDGPU::OpName Src1OpName)
const {
2784 "All commutable instructions have both src0 and src1 modifiers");
2786 int Src0ModsVal = Src0Mods->
getImm();
2787 int Src1ModsVal = Src1Mods->
getImm();
2789 Src1Mods->
setImm(Src0ModsVal);
2790 Src0Mods->
setImm(Src1ModsVal);
2799 bool IsKill = RegOp.
isKill();
2801 bool IsUndef = RegOp.
isUndef();
2802 bool IsDebug = RegOp.
isDebug();
2804 if (NonRegOp.
isImm())
2806 else if (NonRegOp.
isFI())
2827 int64_t NonRegVal = NonRegOp1.
getImm();
2830 NonRegOp2.
setImm(NonRegVal);
2837 unsigned OpIdx1)
const {
2842 unsigned Opc =
MI.getOpcode();
2843 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2853 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2856 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2861 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2867 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2882 unsigned Src1Idx)
const {
2883 assert(!NewMI &&
"this should never be used");
2885 unsigned Opc =
MI.getOpcode();
2887 if (CommutedOpcode == -1)
2890 if (Src0Idx > Src1Idx)
2893 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2894 static_cast<int>(Src0Idx) &&
2895 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2896 static_cast<int>(Src1Idx) &&
2897 "inconsistency with findCommutedOpIndices");
2922 Src1, AMDGPU::OpName::src1_modifiers);
2925 AMDGPU::OpName::src1_sel);
2937 unsigned &SrcOpIdx0,
2938 unsigned &SrcOpIdx1)
const {
2943 unsigned &SrcOpIdx0,
2944 unsigned &SrcOpIdx1)
const {
2945 if (!
Desc.isCommutable())
2948 unsigned Opc =
Desc.getOpcode();
2949 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2953 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2957 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2961 int64_t BrOffset)
const {
2978 return MI.getOperand(0).getMBB();
2983 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2984 MI.getOpcode() == AMDGPU::SI_LOOP)
2996 "new block should be inserted for expanding unconditional branch");
2999 "restore block should be inserted for restoring clobbered registers");
3007 if (ST.useAddPC64Inst()) {
3009 MCCtx.createTempSymbol(
"offset",
true);
3013 MCCtx.createTempSymbol(
"post_addpc",
true);
3014 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3018 Offset->setVariableValue(OffsetExpr);
3022 assert(RS &&
"RegScavenger required for long branching");
3030 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3031 ST.hasVALUReadSGPRHazard();
3032 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3033 if (FlushSGPRWrites)
3041 ApplyHazardWorkarounds();
3044 MCCtx.createTempSymbol(
"post_getpc",
true);
3048 MCCtx.createTempSymbol(
"offset_lo",
true);
3050 MCCtx.createTempSymbol(
"offset_hi",
true);
3053 .
addReg(PCReg, {}, AMDGPU::sub0)
3057 .
addReg(PCReg, {}, AMDGPU::sub1)
3059 ApplyHazardWorkarounds();
3100 if (LongBranchReservedReg) {
3101 RS->enterBasicBlock(
MBB);
3102 Scav = LongBranchReservedReg;
3104 RS->enterBasicBlockEnd(
MBB);
3105 Scav = RS->scavengeRegisterBackwards(
3110 RS->setRegUsed(Scav);
3118 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3135unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3137 case SIInstrInfo::SCC_TRUE:
3138 return AMDGPU::S_CBRANCH_SCC1;
3139 case SIInstrInfo::SCC_FALSE:
3140 return AMDGPU::S_CBRANCH_SCC0;
3141 case SIInstrInfo::VCCNZ:
3142 return AMDGPU::S_CBRANCH_VCCNZ;
3143 case SIInstrInfo::VCCZ:
3144 return AMDGPU::S_CBRANCH_VCCZ;
3145 case SIInstrInfo::EXECNZ:
3146 return AMDGPU::S_CBRANCH_EXECNZ;
3147 case SIInstrInfo::EXECZ:
3148 return AMDGPU::S_CBRANCH_EXECZ;
3154SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3156 case AMDGPU::S_CBRANCH_SCC0:
3158 case AMDGPU::S_CBRANCH_SCC1:
3160 case AMDGPU::S_CBRANCH_VCCNZ:
3162 case AMDGPU::S_CBRANCH_VCCZ:
3164 case AMDGPU::S_CBRANCH_EXECNZ:
3166 case AMDGPU::S_CBRANCH_EXECZ:
3178 bool AllowModify)
const {
3179 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3181 TBB =
I->getOperand(0).getMBB();
3185 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3186 if (Pred == INVALID_BR)
3191 Cond.push_back(
I->getOperand(1));
3195 if (
I ==
MBB.end()) {
3201 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3203 FBB =
I->getOperand(0).getMBB();
3213 bool AllowModify)
const {
3221 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3222 switch (
I->getOpcode()) {
3223 case AMDGPU::S_MOV_B64_term:
3224 case AMDGPU::S_XOR_B64_term:
3225 case AMDGPU::S_OR_B64_term:
3226 case AMDGPU::S_ANDN2_B64_term:
3227 case AMDGPU::S_AND_B64_term:
3228 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3229 case AMDGPU::S_MOV_B32_term:
3230 case AMDGPU::S_XOR_B32_term:
3231 case AMDGPU::S_OR_B32_term:
3232 case AMDGPU::S_ANDN2_B32_term:
3233 case AMDGPU::S_AND_B32_term:
3234 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3237 case AMDGPU::SI_ELSE:
3238 case AMDGPU::SI_KILL_I1_TERMINATOR:
3239 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3256 int *BytesRemoved)
const {
3258 unsigned RemovedSize = 0;
3261 if (
MI.isBranch() ||
MI.isReturn()) {
3263 MI.eraseFromParent();
3269 *BytesRemoved = RemovedSize;
3286 int *BytesAdded)
const {
3287 if (!FBB &&
Cond.empty()) {
3291 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3298 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3310 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3328 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3335 if (
Cond.size() != 2) {
3339 if (
Cond[0].isImm()) {
3350 Register FalseReg,
int &CondCycles,
3351 int &TrueCycles,
int &FalseCycles)
const {
3361 CondCycles = TrueCycles = FalseCycles = NumInsts;
3364 return RI.hasVGPRs(RC) && NumInsts <= 6;
3378 if (NumInsts % 2 == 0)
3381 CondCycles = TrueCycles = FalseCycles = NumInsts;
3382 return RI.isSGPRClass(RC);
3393 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3394 if (Pred == VCCZ || Pred == SCC_FALSE) {
3395 Pred =
static_cast<BranchPredicate
>(-Pred);
3401 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3403 if (DstSize == 32) {
3405 if (Pred == SCC_TRUE) {
3420 if (DstSize == 64 && Pred == SCC_TRUE) {
3430 static const int16_t Sub0_15[] = {
3431 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3432 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3433 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3434 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3437 static const int16_t Sub0_15_64[] = {
3438 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3439 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3440 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3441 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3444 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3446 const int16_t *SubIndices = Sub0_15;
3447 int NElts = DstSize / 32;
3451 if (Pred == SCC_TRUE) {
3453 SelOp = AMDGPU::S_CSELECT_B32;
3454 EltRC = &AMDGPU::SGPR_32RegClass;
3456 SelOp = AMDGPU::S_CSELECT_B64;
3457 EltRC = &AMDGPU::SGPR_64RegClass;
3458 SubIndices = Sub0_15_64;
3464 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3469 for (
int Idx = 0; Idx != NElts; ++Idx) {
3473 unsigned SubIdx = SubIndices[Idx];
3476 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3478 .
addReg(FalseReg, {}, SubIdx)
3479 .addReg(TrueReg, {}, SubIdx);
3482 .
addReg(TrueReg, {}, SubIdx)
3483 .addReg(FalseReg, {}, SubIdx);
3495 switch (
MI.getOpcode()) {
3496 case AMDGPU::V_MOV_B16_t16_e32:
3497 case AMDGPU::V_MOV_B16_t16_e64:
3498 case AMDGPU::V_MOV_B32_e32:
3499 case AMDGPU::V_MOV_B32_e64:
3500 case AMDGPU::V_MOV_B64_PSEUDO:
3501 case AMDGPU::V_MOV_B64_e32:
3502 case AMDGPU::V_MOV_B64_e64:
3503 case AMDGPU::S_MOV_B32:
3504 case AMDGPU::S_MOV_B64:
3505 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3507 case AMDGPU::WWM_COPY:
3508 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3509 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3510 case AMDGPU::V_ACCVGPR_MOV_B32:
3511 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3512 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3520 switch (
MI.getOpcode()) {
3521 case AMDGPU::V_MOV_B16_t16_e32:
3522 case AMDGPU::V_MOV_B16_t16_e64:
3524 case AMDGPU::V_MOV_B32_e32:
3525 case AMDGPU::V_MOV_B32_e64:
3526 case AMDGPU::V_MOV_B64_PSEUDO:
3527 case AMDGPU::V_MOV_B64_e32:
3528 case AMDGPU::V_MOV_B64_e64:
3529 case AMDGPU::S_MOV_B32:
3530 case AMDGPU::S_MOV_B64:
3531 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3533 case AMDGPU::WWM_COPY:
3534 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3535 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3536 case AMDGPU::V_ACCVGPR_MOV_B32:
3537 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3538 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3546 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3547 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3548 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3551 unsigned Opc =
MI.getOpcode();
3553 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3555 MI.removeOperand(Idx);
3561 MI.setDesc(NewDesc);
3567 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3568 Desc.implicit_defs().size();
3570 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3571 MI.removeOperand(
I);
3575 unsigned SubRegIndex) {
3576 switch (SubRegIndex) {
3577 case AMDGPU::NoSubRegister:
3587 case AMDGPU::sub1_lo16:
3589 case AMDGPU::sub1_hi16:
3592 return std::nullopt;
3600 case AMDGPU::V_MAC_F16_e32:
3601 case AMDGPU::V_MAC_F16_e64:
3602 case AMDGPU::V_MAD_F16_e64:
3603 return AMDGPU::V_MADAK_F16;
3604 case AMDGPU::V_MAC_F32_e32:
3605 case AMDGPU::V_MAC_F32_e64:
3606 case AMDGPU::V_MAD_F32_e64:
3607 return AMDGPU::V_MADAK_F32;
3608 case AMDGPU::V_FMAC_F32_e32:
3609 case AMDGPU::V_FMAC_F32_e64:
3610 case AMDGPU::V_FMA_F32_e64:
3611 return AMDGPU::V_FMAAK_F32;
3612 case AMDGPU::V_FMAC_F16_e32:
3613 case AMDGPU::V_FMAC_F16_e64:
3614 case AMDGPU::V_FMAC_F16_t16_e64:
3615 case AMDGPU::V_FMAC_F16_fake16_e64:
3616 case AMDGPU::V_FMAC_F16_t16_e32:
3617 case AMDGPU::V_FMAC_F16_fake16_e32:
3618 case AMDGPU::V_FMA_F16_e64:
3619 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3620 ? AMDGPU::V_FMAAK_F16_t16
3621 : AMDGPU::V_FMAAK_F16_fake16
3622 : AMDGPU::V_FMAAK_F16;
3623 case AMDGPU::V_FMAC_F64_e32:
3624 case AMDGPU::V_FMAC_F64_e64:
3625 case AMDGPU::V_FMA_F64_e64:
3626 return AMDGPU::V_FMAAK_F64;
3634 case AMDGPU::V_MAC_F16_e32:
3635 case AMDGPU::V_MAC_F16_e64:
3636 case AMDGPU::V_MAD_F16_e64:
3637 return AMDGPU::V_MADMK_F16;
3638 case AMDGPU::V_MAC_F32_e32:
3639 case AMDGPU::V_MAC_F32_e64:
3640 case AMDGPU::V_MAD_F32_e64:
3641 return AMDGPU::V_MADMK_F32;
3642 case AMDGPU::V_FMAC_F32_e32:
3643 case AMDGPU::V_FMAC_F32_e64:
3644 case AMDGPU::V_FMA_F32_e64:
3645 return AMDGPU::V_FMAMK_F32;
3646 case AMDGPU::V_FMAC_F16_e32:
3647 case AMDGPU::V_FMAC_F16_e64:
3648 case AMDGPU::V_FMAC_F16_t16_e64:
3649 case AMDGPU::V_FMAC_F16_fake16_e64:
3650 case AMDGPU::V_FMAC_F16_t16_e32:
3651 case AMDGPU::V_FMAC_F16_fake16_e32:
3652 case AMDGPU::V_FMA_F16_e64:
3653 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3654 ? AMDGPU::V_FMAMK_F16_t16
3655 : AMDGPU::V_FMAMK_F16_fake16
3656 : AMDGPU::V_FMAMK_F16;
3657 case AMDGPU::V_FMAC_F64_e32:
3658 case AMDGPU::V_FMAC_F64_e64:
3659 case AMDGPU::V_FMA_F64_e64:
3660 return AMDGPU::V_FMAMK_F64;
3674 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3677 if (
Opc == AMDGPU::COPY) {
3678 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3685 if (HasMultipleUses) {
3688 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3691 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3699 if (ImmDefSize == 32 &&
3704 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3705 RI.getSubRegIdxSize(UseSubReg) == 16;
3708 if (RI.hasVGPRs(DstRC))
3711 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3717 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3724 for (
unsigned MovOp :
3725 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3726 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3734 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3738 if (MovDstPhysReg) {
3742 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3749 if (MovDstPhysReg) {
3750 if (!MovDstRC->
contains(MovDstPhysReg))
3766 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3774 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3778 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3780 UseMI.getOperand(0).setReg(MovDstPhysReg);
3785 UseMI.setDesc(NewMCID);
3786 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3787 UseMI.addImplicitDefUseOperands(*MF);
3791 if (HasMultipleUses)
3794 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3795 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3796 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3797 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3798 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3799 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3800 Opc == AMDGPU::V_FMAC_F64_e64) {
3809 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3820 auto CopyRegOperandToNarrowerRC =
3823 if (!
MI.getOperand(OpNo).isReg())
3827 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3830 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3831 get(AMDGPU::COPY), Tmp)
3833 MI.getOperand(OpNo).setReg(Tmp);
3834 MI.getOperand(OpNo).setIsKill();
3841 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3842 if (!RegSrc->
isReg())
3845 ST.getConstantBusLimit(
Opc) < 2)
3860 if (Def && Def->isMoveImmediate() &&
3875 unsigned SrcSubReg = RegSrc->
getSubReg();
3880 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3881 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3882 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3883 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3884 UseMI.untieRegOperand(
3885 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3892 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3893 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3897 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3898 UseMI.getOperand(0).getReg())
3900 UseMI.getOperand(0).setReg(Tmp);
3901 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3902 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3907 DefMI.eraseFromParent();
3914 if (ST.getConstantBusLimit(
Opc) < 2) {
3917 bool Src0Inlined =
false;
3918 if (Src0->
isReg()) {
3923 if (Def && Def->isMoveImmediate() &&
3928 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3929 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3935 if (Src1->
isReg() && !Src0Inlined) {
3938 if (Def && Def->isMoveImmediate() &&
3942 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3955 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3956 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3957 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3958 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3959 UseMI.untieRegOperand(
3960 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3962 const std::optional<int64_t> SubRegImm =
3972 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3973 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3977 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3978 UseMI.getOperand(0).getReg())
3980 UseMI.getOperand(0).setReg(Tmp);
3981 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3982 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3992 DefMI.eraseFromParent();
4004 if (BaseOps1.
size() != BaseOps2.
size())
4006 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4007 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4015 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4016 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4017 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4019 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4022bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4025 int64_t Offset0, Offset1;
4028 bool Offset0IsScalable, Offset1IsScalable;
4042 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4043 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4050 "MIa must load from or modify a memory location");
4052 "MIb must load from or modify a memory location");
4074 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4081 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4091 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4105 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4116 if (
Reg.isPhysical())
4120 Imm = Def->getOperand(1).getImm();
4140 unsigned NumOps =
MI.getNumOperands();
4143 if (
Op.isReg() &&
Op.isKill())
4151 case AMDGPU::V_MAC_F16_e32:
4152 case AMDGPU::V_MAC_F16_e64:
4153 return AMDGPU::V_MAD_F16_e64;
4154 case AMDGPU::V_MAC_F32_e32:
4155 case AMDGPU::V_MAC_F32_e64:
4156 return AMDGPU::V_MAD_F32_e64;
4157 case AMDGPU::V_MAC_LEGACY_F32_e32:
4158 case AMDGPU::V_MAC_LEGACY_F32_e64:
4159 return AMDGPU::V_MAD_LEGACY_F32_e64;
4160 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4161 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4162 return AMDGPU::V_FMA_LEGACY_F32_e64;
4163 case AMDGPU::V_FMAC_F16_e32:
4164 case AMDGPU::V_FMAC_F16_e64:
4165 case AMDGPU::V_FMAC_F16_t16_e64:
4166 case AMDGPU::V_FMAC_F16_fake16_e64:
4167 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4168 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4169 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4170 : AMDGPU::V_FMA_F16_gfx9_e64;
4171 case AMDGPU::V_FMAC_F32_e32:
4172 case AMDGPU::V_FMAC_F32_e64:
4173 return AMDGPU::V_FMA_F32_e64;
4174 case AMDGPU::V_FMAC_F64_e32:
4175 case AMDGPU::V_FMAC_F64_e64:
4176 return AMDGPU::V_FMA_F64_e64;
4196 if (
MI.isBundle()) {
4199 if (
MI.getBundleSize() != 1)
4201 CandidateMI =
MI.getNextNode();
4205 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4209 if (
MI.isBundle()) {
4214 MI.untieRegOperand(MO.getOperandNo());
4222 if (Def.isEarlyClobber() && Def.isReg() &&
4227 auto UpdateDefIndex = [&](
LiveRange &LR) {
4228 auto *S = LR.find(OldIndex);
4229 if (S != LR.end() && S->start == OldIndex) {
4230 assert(S->valno && S->valno->def == OldIndex);
4231 S->start = NewIndex;
4232 S->valno->def = NewIndex;
4236 for (
auto &SR : LI.subranges())
4242 if (U.RemoveMIUse) {
4245 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4249 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4250 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4251 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4252 U.RemoveMIUse->removeOperand(
I);
4257 if (
MI.isBundle()) {
4261 if (MO.isReg() && MO.getReg() == DefReg) {
4262 assert(MO.getSubReg() == 0 &&
4263 "tied sub-registers in bundles currently not supported");
4264 MI.removeOperand(MO.getOperandNo());
4281 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4282 MIOp.setIsUndef(
true);
4283 MIOp.setReg(DummyReg);
4287 if (
MI.isBundle()) {
4291 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4292 MIOp.setIsUndef(
true);
4293 MIOp.setReg(DummyReg);
4306 return MI.isBundle() ? &
MI : NewMI;
4311 ThreeAddressUpdates &U)
const {
4313 unsigned Opc =
MI.getOpcode();
4317 if (NewMFMAOpc != -1) {
4320 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4321 MIB.
add(
MI.getOperand(
I));
4329 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4334 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4335 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4336 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4340 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4341 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4342 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4343 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4344 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4345 bool Src0Literal =
false;
4350 case AMDGPU::V_MAC_F16_e64:
4351 case AMDGPU::V_FMAC_F16_e64:
4352 case AMDGPU::V_FMAC_F16_t16_e64:
4353 case AMDGPU::V_FMAC_F16_fake16_e64:
4354 case AMDGPU::V_MAC_F32_e64:
4355 case AMDGPU::V_MAC_LEGACY_F32_e64:
4356 case AMDGPU::V_FMAC_F32_e64:
4357 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4358 case AMDGPU::V_FMAC_F64_e64:
4360 case AMDGPU::V_MAC_F16_e32:
4361 case AMDGPU::V_FMAC_F16_e32:
4362 case AMDGPU::V_MAC_F32_e32:
4363 case AMDGPU::V_MAC_LEGACY_F32_e32:
4364 case AMDGPU::V_FMAC_F32_e32:
4365 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4366 case AMDGPU::V_FMAC_F64_e32: {
4367 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4368 AMDGPU::OpName::src0);
4369 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4380 MachineInstrBuilder MIB;
4383 const MachineOperand *Src0Mods =
4386 const MachineOperand *Src1Mods =
4389 const MachineOperand *Src2Mods =
4395 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4396 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4398 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4400 MachineInstr *
DefMI;
4436 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4452 if (Src0Literal && !ST.hasVOP3Literal())
4480 switch (
MI.getOpcode()) {
4481 case AMDGPU::S_SET_GPR_IDX_ON:
4482 case AMDGPU::S_SET_GPR_IDX_MODE:
4483 case AMDGPU::S_SET_GPR_IDX_OFF:
4501 if (
MI.isTerminator() ||
MI.isPosition())
4505 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4508 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4514 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4515 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4516 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4517 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4518 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4523 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4524 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4525 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4539 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4544 if (
MI.memoperands_empty())
4549 unsigned AS = Memop->getAddrSpace();
4550 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4551 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4552 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4553 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4568 if (
MI.memoperands_empty())
4577 unsigned AS = Memop->getAddrSpace();
4594 if (ST.isTgSplitEnabled())
4599 if (
MI.memoperands_empty())
4604 unsigned AS = Memop->getAddrSpace();
4620 unsigned Opcode =
MI.getOpcode();
4635 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4636 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4637 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4638 Opcode == AMDGPU::S_SETHALT)
4641 if (
MI.isCall() ||
MI.isInlineAsm())
4657 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4658 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4659 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4660 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4668 if (
MI.isMetaInstruction())
4672 if (
MI.isCopyLike()) {
4673 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4677 return MI.readsRegister(AMDGPU::EXEC, &RI);
4688 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4692 switch (Imm.getBitWidth()) {
4698 ST.hasInv2PiInlineImm());
4701 ST.hasInv2PiInlineImm());
4703 return ST.has16BitInsts() &&
4705 ST.hasInv2PiInlineImm());
4712 APInt IntImm = Imm.bitcastToAPInt();
4714 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4722 return ST.has16BitInsts() &&
4725 return ST.has16BitInsts() &&
4735 switch (OperandType) {
4745 int32_t Trunc =
static_cast<int32_t
>(Imm);
4787 int16_t Trunc =
static_cast<int16_t
>(Imm);
4788 return ST.has16BitInsts() &&
4797 int16_t Trunc =
static_cast<int16_t
>(Imm);
4798 return ST.has16BitInsts() &&
4849 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4855 return ST.hasVOP3Literal();
4859 int64_t ImmVal)
const {
4862 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4863 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4864 AMDGPU::OpName::src2))
4866 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4878 "unexpected imm-like operand kind");
4891 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4909 AMDGPU::OpName
OpName)
const {
4911 return Mods && Mods->
getImm();
4924 switch (
MI.getOpcode()) {
4925 default:
return false;
4927 case AMDGPU::V_ADDC_U32_e64:
4928 case AMDGPU::V_SUBB_U32_e64:
4929 case AMDGPU::V_SUBBREV_U32_e64: {
4932 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4937 case AMDGPU::V_MAC_F16_e64:
4938 case AMDGPU::V_MAC_F32_e64:
4939 case AMDGPU::V_MAC_LEGACY_F32_e64:
4940 case AMDGPU::V_FMAC_F16_e64:
4941 case AMDGPU::V_FMAC_F16_t16_e64:
4942 case AMDGPU::V_FMAC_F16_fake16_e64:
4943 case AMDGPU::V_FMAC_F32_e64:
4944 case AMDGPU::V_FMAC_F64_e64:
4945 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4946 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4951 case AMDGPU::V_CNDMASK_B32_e64:
4957 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
4987 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4996 unsigned Op32)
const {
5010 Inst32.
add(
MI.getOperand(
I));
5014 int Idx =
MI.getNumExplicitDefs();
5016 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5021 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5043 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5051 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5054 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5055 AMDGPU::SReg_64RegClass.contains(Reg);
5083 switch (MO.getReg()) {
5085 case AMDGPU::VCC_LO:
5086 case AMDGPU::VCC_HI:
5088 case AMDGPU::FLAT_SCR:
5101 switch (
MI.getOpcode()) {
5102 case AMDGPU::V_READLANE_B32:
5103 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5104 case AMDGPU::V_WRITELANE_B32:
5105 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5112 if (
MI.isPreISelOpcode() ||
5113 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5131 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5142 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5143 ErrInfo =
"illegal copy from vector register to SGPR";
5161 if (!MRI.
isSSA() &&
MI.isCopy())
5162 return verifyCopy(
MI, MRI, ErrInfo);
5164 if (SIInstrInfo::isGenericOpcode(Opcode))
5167 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5168 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5169 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5171 if (Src0Idx == -1) {
5173 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5174 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5175 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5176 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5181 if (!
Desc.isVariadic() &&
5182 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5183 ErrInfo =
"Instruction has wrong number of operands.";
5187 if (
MI.isInlineAsm()) {
5200 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5201 ErrInfo =
"inlineasm operand has incorrect register class.";
5209 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5210 ErrInfo =
"missing memory operand from image instruction.";
5215 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5218 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5219 "all fp values to integers.";
5224 int16_t RegClass = getOpRegClassID(OpInfo);
5226 switch (OpInfo.OperandType) {
5228 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5229 ErrInfo =
"Illegal immediate value for operand.";
5260 ErrInfo =
"Illegal immediate value for operand.";
5269 if (ST.has64BitLiterals() &&
Desc.getSize() != 4 && MO.
isImm() &&
5272 OpInfo.OperandType ==
5274 ErrInfo =
"illegal 64-bit immediate value for operand.";
5281 ErrInfo =
"Expected inline constant for operand.";
5295 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5296 ErrInfo =
"Expected immediate, but got non-immediate";
5305 if (OpInfo.isGenericType())
5320 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5321 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5323 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5325 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5326 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5333 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5334 ErrInfo =
"Subtarget requires even aligned vector registers";
5339 if (RegClass != -1) {
5340 if (Reg.isVirtual())
5345 ErrInfo =
"Operand has incorrect register class.";
5353 if (!ST.hasSDWA()) {
5354 ErrInfo =
"SDWA is not supported on this target";
5358 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5359 AMDGPU::OpName::dst_sel}) {
5363 int64_t Imm = MO->
getImm();
5365 ErrInfo =
"Invalid SDWA selection";
5370 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5372 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5377 if (!ST.hasSDWAScalar()) {
5379 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5380 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5387 "Only reg allowed as operands in SDWA instructions on GFX9+";
5393 if (!ST.hasSDWAOmod()) {
5396 if (OMod !=
nullptr &&
5398 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5403 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5404 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5405 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5406 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5409 unsigned Mods = Src0ModsMO->
getImm();
5412 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5418 if (
isVOPC(BasicOpcode)) {
5419 if (!ST.hasSDWASdst() && DstIdx != -1) {
5422 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5423 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5426 }
else if (!ST.hasSDWAOutModsVOPC()) {
5429 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5430 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5436 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5437 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5444 if (DstUnused && DstUnused->isImm() &&
5447 if (!Dst.isReg() || !Dst.isTied()) {
5448 ErrInfo =
"Dst register should have tied register";
5453 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5456 "Dst register should be tied to implicit use of preserved register";
5460 ErrInfo =
"Dst register should use same physical register as preserved";
5466 if (
isDPP(
MI) && !ST.hasDPPSrc1SGPR() && Src1Idx != -1) {
5468 if (Src1MO.
isReg() && RI.isSGPRReg(MRI, Src1MO.
getReg())) {
5469 ErrInfo =
"DPP src1 cannot be SGPR on this subtarget";
5475 if (
isImage(Opcode) && !
MI.mayStore()) {
5487 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5495 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5499 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5500 if (RegCount > DstSize) {
5501 ErrInfo =
"Image instruction returns too many registers for dst "
5510 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5511 unsigned ConstantBusCount = 0;
5512 bool UsesLiteral =
false;
5515 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5519 LiteralVal = &
MI.getOperand(ImmIdx);
5528 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5539 }
else if (!MO.
isFI()) {
5546 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5556 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5557 return !RI.regsOverlap(SGPRUsed, SGPR);
5566 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5567 Opcode != AMDGPU::V_WRITELANE_B32) {
5568 ErrInfo =
"VOP* instruction violates constant bus restriction";
5572 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5573 ErrInfo =
"VOP3 instruction uses literal";
5580 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5581 unsigned SGPRCount = 0;
5584 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5592 if (MO.
getReg() != SGPRUsed)
5597 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5598 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5605 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5606 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5613 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5623 ErrInfo =
"ABS not allowed in VOP3B instructions";
5636 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5643 if (
Desc.isBranch()) {
5645 ErrInfo =
"invalid branch target for SOPK instruction";
5652 ErrInfo =
"invalid immediate for SOPK instruction";
5657 ErrInfo =
"invalid immediate for SOPK instruction";
5664 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5665 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5666 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5667 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5668 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5669 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5671 const unsigned StaticNumOps =
5672 Desc.getNumOperands() +
Desc.implicit_uses().size();
5673 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5679 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5680 ErrInfo =
"missing implicit register operands";
5686 if (!Dst->isUse()) {
5687 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5692 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5693 UseOpIdx != StaticNumOps + 1) {
5694 ErrInfo =
"movrel implicit operands should be tied";
5701 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5703 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5704 ErrInfo =
"src0 should be subreg of implicit vector use";
5712 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5713 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5719 if (
MI.mayStore() &&
5724 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5725 ErrInfo =
"scalar stores must use m0 as offset register";
5731 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5733 if (
Offset->getImm() != 0) {
5734 ErrInfo =
"subtarget does not support offsets in flat instructions";
5739 if (
isDS(
MI) && !ST.hasGDS()) {
5741 if (GDSOp && GDSOp->
getImm() != 0) {
5742 ErrInfo =
"GDS is not supported on this subtarget";
5750 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5751 AMDGPU::OpName::vaddr0);
5752 AMDGPU::OpName RSrcOpName =
5753 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5754 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5762 ErrInfo =
"dim is out of range";
5767 if (ST.hasR128A16()) {
5769 IsA16 = R128A16->
getImm() != 0;
5770 }
else if (ST.hasA16()) {
5772 IsA16 = A16->
getImm() != 0;
5775 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5777 unsigned AddrWords =
5780 unsigned VAddrWords;
5782 VAddrWords = RsrcIdx - VAddr0Idx;
5783 if (ST.hasPartialNSAEncoding() &&
5785 unsigned LastVAddrIdx = RsrcIdx - 1;
5786 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5794 if (VAddrWords != AddrWords) {
5796 <<
" but got " << VAddrWords <<
"\n");
5797 ErrInfo =
"bad vaddr size";
5807 unsigned DC = DppCt->
getImm();
5808 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5809 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5810 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5811 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5812 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5813 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5814 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5815 ErrInfo =
"Invalid dpp_ctrl value";
5818 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5819 !ST.hasDPPWavefrontShifts()) {
5820 ErrInfo =
"Invalid dpp_ctrl value: "
5821 "wavefront shifts are not supported on GFX10+";
5824 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5825 !ST.hasDPPBroadcasts()) {
5826 ErrInfo =
"Invalid dpp_ctrl value: "
5827 "broadcasts are not supported on GFX10+";
5830 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5832 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5833 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5834 !ST.hasGFX90AInsts()) {
5835 ErrInfo =
"Invalid dpp_ctrl value: "
5836 "row_newbroadcast/row_share is not supported before "
5840 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5841 ErrInfo =
"Invalid dpp_ctrl value: "
5842 "row_share and row_xmask are not supported before GFX10";
5847 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5850 ErrInfo =
"Invalid dpp_ctrl value: "
5851 "DP ALU dpp only support row_newbcast";
5858 AMDGPU::OpName DataName =
5859 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5865 if (ST.hasGFX90AInsts()) {
5866 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5867 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5868 ErrInfo =
"Invalid register class: "
5869 "vdata and vdst should be both VGPR or AGPR";
5872 if (
Data && Data2 &&
5873 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5874 ErrInfo =
"Invalid register class: "
5875 "both data operands should be VGPR or AGPR";
5879 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5880 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5881 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5882 ErrInfo =
"Invalid register class: "
5883 "agpr loads and stores not supported on this GPU";
5889 if (ST.needsAlignedVGPRs()) {
5890 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5895 if (Reg.isPhysical())
5896 return !(RI.getHWRegIndex(Reg) & 1);
5898 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5899 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5902 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5903 Opcode == AMDGPU::DS_GWS_BARRIER) {
5905 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5906 ErrInfo =
"Subtarget requires even aligned vector registers "
5907 "for DS_GWS instructions";
5913 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5914 ErrInfo =
"Subtarget requires even aligned vector registers "
5915 "for vaddr operand of image instructions";
5921 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5923 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5924 ErrInfo =
"Invalid register class: "
5925 "v_accvgpr_write with an SGPR is not supported on this GPU";
5930 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5933 ErrInfo =
"pseudo expects only physical SGPRs";
5940 if (!ST.hasScaleOffset()) {
5941 ErrInfo =
"Subtarget does not support offset scaling";
5945 ErrInfo =
"Instruction does not support offset scaling";
5954 for (
unsigned I = 0;
I < 3; ++
I) {
5960 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5961 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5963 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5964 &AMDGPU::SReg_64RegClass) ||
5965 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5966 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5975 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5977 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5979 : AMDGPU::V_MOV_B32_e32;
5989 default:
return AMDGPU::INSTRUCTION_LIST_END;
5990 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5991 case AMDGPU::COPY:
return AMDGPU::COPY;
5992 case AMDGPU::PHI:
return AMDGPU::PHI;
5993 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5994 case AMDGPU::WQM:
return AMDGPU::WQM;
5995 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5996 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5997 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5998 case AMDGPU::S_ADD_I32:
5999 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
6000 case AMDGPU::S_ADDC_U32:
6001 return AMDGPU::V_ADDC_U32_e32;
6002 case AMDGPU::S_SUB_I32:
6003 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
6006 case AMDGPU::S_ADD_U32:
6007 return AMDGPU::V_ADD_CO_U32_e32;
6008 case AMDGPU::S_SUB_U32:
6009 return AMDGPU::V_SUB_CO_U32_e32;
6010 case AMDGPU::S_ADD_U64_PSEUDO:
6011 return AMDGPU::V_ADD_U64_PSEUDO;
6012 case AMDGPU::S_SUB_U64_PSEUDO:
6013 return AMDGPU::V_SUB_U64_PSEUDO;
6014 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
6015 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
6016 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
6017 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
6018 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
6019 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
6020 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
6021 case AMDGPU::S_XNOR_B32:
6022 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
6023 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
6024 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
6025 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
6026 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6027 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6028 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6029 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6030 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6031 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6032 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6033 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6034 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6035 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6036 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6037 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6038 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6039 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6040 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6041 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6042 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6043 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6044 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6045 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6046 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6047 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6048 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6049 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6050 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6051 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6052 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6053 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6054 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6055 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6056 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6057 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6058 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6059 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6060 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6061 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6062 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6063 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6064 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6065 case AMDGPU::S_CVT_F32_F16:
6066 case AMDGPU::S_CVT_HI_F32_F16:
6067 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6068 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6069 case AMDGPU::S_CVT_F16_F32:
6070 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6071 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6072 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6073 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6074 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6075 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6076 case AMDGPU::S_CEIL_F16:
6077 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6078 : AMDGPU::V_CEIL_F16_fake16_e64;
6079 case AMDGPU::S_FLOOR_F16:
6080 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6081 : AMDGPU::V_FLOOR_F16_fake16_e64;
6082 case AMDGPU::S_TRUNC_F16:
6083 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6084 : AMDGPU::V_TRUNC_F16_fake16_e64;
6085 case AMDGPU::S_RNDNE_F16:
6086 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6087 : AMDGPU::V_RNDNE_F16_fake16_e64;
6088 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6089 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6090 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6091 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6092 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6093 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6094 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6095 case AMDGPU::S_ADD_F16:
6096 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6097 : AMDGPU::V_ADD_F16_fake16_e64;
6098 case AMDGPU::S_SUB_F16:
6099 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6100 : AMDGPU::V_SUB_F16_fake16_e64;
6101 case AMDGPU::S_MIN_F16:
6102 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6103 : AMDGPU::V_MIN_F16_fake16_e64;
6104 case AMDGPU::S_MAX_F16:
6105 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6106 : AMDGPU::V_MAX_F16_fake16_e64;
6107 case AMDGPU::S_MINIMUM_F16:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6109 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6110 case AMDGPU::S_MAXIMUM_F16:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6112 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6113 case AMDGPU::S_MUL_F16:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6115 : AMDGPU::V_MUL_F16_fake16_e64;
6116 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6117 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6118 case AMDGPU::S_FMAC_F16:
6119 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6120 : AMDGPU::V_FMAC_F16_fake16_e64;
6121 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6122 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6123 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6124 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6125 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6126 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6127 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6128 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6129 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6130 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6131 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6132 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6133 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6134 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6135 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6136 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6137 case AMDGPU::S_CMP_LT_F16:
6138 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6139 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6140 case AMDGPU::S_CMP_EQ_F16:
6141 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6142 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6143 case AMDGPU::S_CMP_LE_F16:
6144 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6145 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6146 case AMDGPU::S_CMP_GT_F16:
6147 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6148 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6149 case AMDGPU::S_CMP_LG_F16:
6150 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6151 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6152 case AMDGPU::S_CMP_GE_F16:
6153 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6154 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6155 case AMDGPU::S_CMP_O_F16:
6156 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6157 : AMDGPU::V_CMP_O_F16_fake16_e64;
6158 case AMDGPU::S_CMP_U_F16:
6159 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6160 : AMDGPU::V_CMP_U_F16_fake16_e64;
6161 case AMDGPU::S_CMP_NGE_F16:
6162 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6163 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6164 case AMDGPU::S_CMP_NLG_F16:
6165 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6166 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6167 case AMDGPU::S_CMP_NGT_F16:
6168 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6169 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6170 case AMDGPU::S_CMP_NLE_F16:
6171 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6172 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6173 case AMDGPU::S_CMP_NEQ_F16:
6174 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6175 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6176 case AMDGPU::S_CMP_NLT_F16:
6177 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6178 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6179 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6180 case AMDGPU::V_S_EXP_F16_e64:
6181 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6182 : AMDGPU::V_EXP_F16_fake16_e64;
6183 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6184 case AMDGPU::V_S_LOG_F16_e64:
6185 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6186 : AMDGPU::V_LOG_F16_fake16_e64;
6187 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6188 case AMDGPU::V_S_RCP_F16_e64:
6189 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6190 : AMDGPU::V_RCP_F16_fake16_e64;
6191 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6192 case AMDGPU::V_S_RSQ_F16_e64:
6193 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6194 : AMDGPU::V_RSQ_F16_fake16_e64;
6195 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6196 case AMDGPU::V_S_SQRT_F16_e64:
6197 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6198 : AMDGPU::V_SQRT_F16_fake16_e64;
6201 "Unexpected scalar opcode without corresponding vector one!");
6250 "Not a whole wave func");
6253 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6254 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6261 unsigned OpNo)
const {
6263 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6264 Desc.operands()[OpNo].RegClass == -1) {
6267 if (Reg.isVirtual()) {
6271 return RI.getPhysRegBaseClass(Reg);
6274 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6275 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6283 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6285 unsigned Size = RI.getRegSizeInBits(*RC);
6286 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6287 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6288 : AMDGPU::V_MOV_B32_e32;
6290 Opcode = AMDGPU::COPY;
6291 else if (RI.isSGPRClass(RC))
6292 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6306 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6312 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6323 if (SubIdx == AMDGPU::sub0)
6325 if (SubIdx == AMDGPU::sub1)
6337void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6353 if (Reg.isPhysical())
6363 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6366 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6373 unsigned Opc =
MI.getOpcode();
6379 constexpr AMDGPU::OpName OpNames[] = {
6380 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6383 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6384 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6394 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6395 if (IsAGPR && !ST.hasMAIInsts())
6401 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6402 const int DataIdx = AMDGPU::getNamedOperandIdx(
6403 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6404 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6405 MI.getOperand(DataIdx).isReg() &&
6406 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6408 if ((
int)
OpIdx == DataIdx) {
6409 if (VDstIdx != -1 &&
6410 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6413 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6414 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6415 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6420 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6421 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6422 RI.isSGPRReg(MRI, MO.
getReg()))
6425 if (ST.hasFlatScratchHiInB64InstHazard() &&
6432 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6435 if (!ST.hasDPPSrc1SGPR() &&
isDPP(
MI) && RI.isSGPRReg(MRI, MO.
getReg()) &&
6436 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1))
6456 constexpr unsigned NumOps = 3;
6457 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6458 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6459 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6460 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6465 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6468 MO = &
MI.getOperand(SrcIdx);
6471 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6475 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6479 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6483 return !OpSel && !OpSelHi;
6492 int64_t RegClass = getOpRegClassID(OpInfo);
6494 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6503 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6504 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6508 if (!LiteralLimit--)
6518 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6526 if (--ConstantBusLimit <= 0)
6538 if (!LiteralLimit--)
6540 if (--ConstantBusLimit <= 0)
6546 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6550 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6552 !
Op.isIdenticalTo(*MO))
6562 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6576 bool Is64BitOp = Is64BitFPOp ||
6583 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6592 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6610 bool IsGFX950Only = ST.hasGFX950Insts();
6611 bool IsGFX940Only = ST.hasGFX940Insts();
6613 if (!IsGFX950Only && !IsGFX940Only)
6631 unsigned Opcode =
MI.getOpcode();
6633 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6634 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6635 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6636 case AMDGPU::V_MQSAD_U32_U8_e64:
6637 case AMDGPU::V_PK_ADD_F16:
6638 case AMDGPU::V_PK_ADD_F32:
6639 case AMDGPU::V_PK_ADD_I16:
6640 case AMDGPU::V_PK_ADD_U16:
6641 case AMDGPU::V_PK_ASHRREV_I16:
6642 case AMDGPU::V_PK_FMA_F16:
6643 case AMDGPU::V_PK_FMA_F32:
6644 case AMDGPU::V_PK_FMAC_F16_e32:
6645 case AMDGPU::V_PK_FMAC_F16_e64:
6646 case AMDGPU::V_PK_LSHLREV_B16:
6647 case AMDGPU::V_PK_LSHRREV_B16:
6648 case AMDGPU::V_PK_MAD_I16:
6649 case AMDGPU::V_PK_MAD_U16:
6650 case AMDGPU::V_PK_MAX_F16:
6651 case AMDGPU::V_PK_MAX_I16:
6652 case AMDGPU::V_PK_MAX_U16:
6653 case AMDGPU::V_PK_MIN_F16:
6654 case AMDGPU::V_PK_MIN_I16:
6655 case AMDGPU::V_PK_MIN_U16:
6656 case AMDGPU::V_PK_MOV_B32:
6657 case AMDGPU::V_PK_MUL_F16:
6658 case AMDGPU::V_PK_MUL_F32:
6659 case AMDGPU::V_PK_MUL_LO_U16:
6660 case AMDGPU::V_PK_SUB_I16:
6661 case AMDGPU::V_PK_SUB_U16:
6662 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6671 unsigned Opc =
MI.getOpcode();
6674 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6677 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6683 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6684 RI.isSGPRReg(MRI, Src0.
getReg()))
6690 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6692 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6698 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6709 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6710 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6711 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6723 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6724 RI.isVGPR(MRI, Src1.
getReg())) {
6737 if (HasImplicitSGPR || !
MI.isCommutable()) {
6754 if (CommutedOpc == -1) {
6759 MI.setDesc(
get(CommutedOpc));
6763 bool Src0Kill = Src0.
isKill();
6767 else if (Src1.
isReg()) {
6782 unsigned Opc =
MI.getOpcode();
6785 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6786 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6787 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6790 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6791 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6792 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6793 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6794 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6795 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6796 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6806 if (VOP3Idx[2] != -1) {
6818 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6819 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6821 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6823 SGPRsUsed.
insert(SGPRReg);
6827 for (
int Idx : VOP3Idx) {
6836 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6848 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6855 if (ConstantBusLimit > 0) {
6867 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6868 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6874 for (
unsigned I = 0;
I < 3; ++
I) {
6887 SRC = RI.getCommonSubClass(SRC, DstRC);
6890 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6892 if (RI.hasAGPRs(VRC)) {
6893 VRC = RI.getEquivalentVGPRClass(VRC);
6896 get(TargetOpcode::COPY), NewSrcReg)
6903 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6909 for (
unsigned i = 0; i < SubRegs; ++i) {
6912 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6913 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6919 get(AMDGPU::REG_SEQUENCE), DstReg);
6920 for (
unsigned i = 0; i < SubRegs; ++i) {
6922 MIB.
addImm(RI.getSubRegFromChannel(i));
6935 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6937 SBase->setReg(SGPR);
6940 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6948 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6949 if (OldSAddrIdx < 0)
6962 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6965 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6966 if (NewVAddrIdx < 0)
6969 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6973 if (OldVAddrIdx >= 0) {
6987 if (OldVAddrIdx == NewVAddrIdx) {
6998 assert(OldSAddrIdx == NewVAddrIdx);
7000 if (OldVAddrIdx >= 0) {
7001 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
7002 AMDGPU::OpName::vdst_in);
7006 if (NewVDstIn != -1) {
7007 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
7013 if (NewVDstIn != -1) {
7014 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7055 unsigned OpSubReg =
Op.getSubReg();
7058 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7074 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7077 bool ImpDef = Def->isImplicitDef();
7078 while (!ImpDef && Def && Def->isCopy()) {
7079 if (Def->getOperand(1).getReg().isPhysical())
7082 ImpDef = Def && Def->isImplicitDef();
7084 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7100 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7104 for (
auto [Idx, ScalarOp] :
enumerate(ScalarOps)) {
7105 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7106 unsigned NumSubRegs =
RegSize / 32;
7107 Register VScalarOp = ScalarOp->getReg();
7110 TII.getRegClass(
TII.get(AMDGPU::V_READFIRSTLANE_B32), 1);
7112 if (NumSubRegs == 1) {
7115 TRI->getCommonSubClass(VScalarOpRC, RFLSrcRC);
7116 Common != VScalarOpRC) {
7123 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7128 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7134 CondReg = NewCondReg;
7144 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7145 ScalarOp->setReg(CurReg);
7148 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7149 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7151 ScalarOp->setReg(PhySGPRs[Idx]);
7153 ScalarOp->setIsKill();
7157 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7158 "Unhandled register size");
7160 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7167 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7168 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7171 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7172 .
addReg(VScalarOp, VScalarOpUndef,
7173 TRI->getSubRegFromChannel(Idx + 1));
7180 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7190 if (NumSubRegs <= 2)
7191 Cmp.addReg(VScalarOp);
7193 Cmp.addReg(VScalarOp, VScalarOpUndef,
7194 TRI->getSubRegFromChannel(Idx, 2));
7198 CondReg = NewCondReg;
7208 const auto *SScalarOpRC =
7214 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7215 unsigned Channel = 0;
7216 for (
Register Piece : ReadlanePieces) {
7217 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7221 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7222 ScalarOp->setReg(SScalarOp);
7224 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7225 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7227 ScalarOp->setReg(PhySGPRs[Idx]);
7229 ScalarOp->setIsKill();
7261 assert((PhySGPRs.empty() || PhySGPRs.size() == ScalarOps.
size()) &&
7262 "Physical SGPRs must be empty or match the number of scalar operands");
7268 if (!Begin.isValid())
7270 if (!End.isValid()) {
7276 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7285 std::numeric_limits<unsigned>::max()) !=
7303 for (
auto I = Begin;
I != AfterMI;
I++) {
7304 for (
auto &MO :
I->all_uses())
7340 for (
auto &Succ : RemainderBB->
successors()) {
7365static std::tuple<unsigned, unsigned>
7373 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7374 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7381 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7398 .
addImm(AMDGPU::sub0_sub1)
7404 return std::tuple(RsrcPtr, NewSRsrc);
7441 if (
MI.getOpcode() == AMDGPU::PHI) {
7443 assert(!RI.isSGPRClass(VRC));
7446 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7448 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7464 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7467 if (RI.hasVGPRs(DstRC)) {
7471 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7473 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7491 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7496 if (DstRC != Src0RC) {
7505 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7507 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7513 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7514 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7515 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7516 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7517 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7518 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7519 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7521 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7534 ? AMDGPU::OpName::rsrc
7535 : AMDGPU::OpName::srsrc;
7540 AMDGPU::OpName SampOpName =
7541 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7550 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7558 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7562 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7572 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7573 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7574 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7575 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7577 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7584 bool isSoffsetLegal =
true;
7586 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7587 if (SoffsetIdx != -1) {
7591 isSoffsetLegal =
false;
7595 bool isRsrcLegal =
true;
7597 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7598 if (RsrcIdx != -1) {
7600 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7601 isRsrcLegal =
false;
7605 if (isRsrcLegal && isSoffsetLegal)
7633 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7637 unsigned RsrcPtr, NewSRsrc;
7644 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7645 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7651 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7652 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7665 }
else if (!VAddr && ST.hasAddr64()) {
7669 "FIXME: Need to emit flat atomics here");
7671 unsigned RsrcPtr, NewSRsrc;
7697 MIB.
addImm(CPol->getImm());
7702 MIB.
addImm(TFE->getImm());
7722 MI.removeFromParent();
7727 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7728 .addImm(AMDGPU::sub0)
7729 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7730 .addImm(AMDGPU::sub1);
7733 if (!isSoffsetLegal) {
7744 if (!isSoffsetLegal) {
7756 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7757 if (RsrcIdx != -1) {
7758 DeferredList.insert(
MI);
7763 return DeferredList.contains(
MI);
7773 if (!ST.useRealTrue16Insts())
7776 unsigned Opcode =
MI.getOpcode();
7780 OpIdx >=
get(Opcode).getNumOperands() ||
7781 get(Opcode).operands()[
OpIdx].RegClass == -1)
7785 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7789 if (!RI.isVGPRClass(CurrRC))
7792 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7794 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7795 Op.setSubReg(AMDGPU::lo16);
7796 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7806 Op.setReg(NewDstReg);
7819 assert(
MI->getOpcode() == AMDGPU::SI_CALL_ISEL &&
7820 "This only handle waterfall for SI_CALL_ISEL");
7827 while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
7830 while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
7835 while (End !=
MBB.end() && End->isCopy() &&
7836 MI->definesRegister(End->getOperand(1).getReg(), &RI))
7846 while (!Worklist.
empty()) {
7852 moveToVALUImpl(Worklist, MDT, Inst, WaterFalls, V2SPhyCopiesToErase);
7858 moveToVALUImpl(Worklist, MDT, *Inst, WaterFalls, V2SPhyCopiesToErase);
7860 "Deferred MachineInstr are not supposed to re-populate worklist");
7863 for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : WaterFalls) {
7864 if (Entry.first->getOpcode() == AMDGPU::SI_CALL_ISEL)
7866 Entry.second.SGPRs);
7869 for (std::pair<MachineInstr *, bool> Entry : V2SPhyCopiesToErase)
7871 Entry.first->eraseFromParent();
7879 if (SubRegIndices.
size() <= 1) {
7882 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7889 for (int16_t Indice : SubRegIndices) {
7892 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7899 get(AMDGPU::REG_SEQUENCE), DstReg);
7900 for (
unsigned i = 0; i < SubRegIndices.size(); ++i) {
7902 MIB.
addImm(RI.getSubRegFromChannel(i));
7912 if (DstReg == AMDGPU::M0) {
7925 if (
I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
7927 for (
unsigned i = 0; i <
UseMI->getNumOperands(); ++i) {
7928 if (
UseMI->getOperand(i).isReg() &&
7929 UseMI->getOperand(i).getReg() == DstReg) {
7933 V2SCopyInfo.MOs.push_back(MO);
7934 V2SCopyInfo.SGPRs.push_back(DstReg);
7938 }
else if (
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
7939 I->getOperand(0).isReg() &&
7940 I->getOperand(0).getReg() == DstReg) {
7943 }
else if (
I->readsRegister(DstReg, &RI)) {
7945 V2SPhyCopiesToErase[&Inst] =
false;
7947 if (
I->findRegisterDefOperand(DstReg, &RI))
7969 case AMDGPU::S_ADD_I32:
7970 case AMDGPU::S_SUB_I32: {
7974 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7982 case AMDGPU::S_MUL_U64:
7983 if (ST.hasVMulU64Inst()) {
7984 NewOpcode = AMDGPU::V_MUL_U64_e64;
7988 splitScalarSMulU64(Worklist, Inst, MDT);
7992 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7993 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7996 splitScalarSMulPseudo(Worklist, Inst, MDT);
8000 case AMDGPU::S_AND_B64:
8001 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
8005 case AMDGPU::S_OR_B64:
8006 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
8010 case AMDGPU::S_XOR_B64:
8011 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
8015 case AMDGPU::S_NAND_B64:
8016 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
8020 case AMDGPU::S_NOR_B64:
8021 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
8025 case AMDGPU::S_XNOR_B64:
8026 if (ST.hasDLInsts())
8027 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
8029 splitScalar64BitXnor(Worklist, Inst, MDT);
8033 case AMDGPU::S_ANDN2_B64:
8034 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
8038 case AMDGPU::S_ORN2_B64:
8039 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
8043 case AMDGPU::S_BREV_B64:
8044 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
8048 case AMDGPU::S_NOT_B64:
8049 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
8053 case AMDGPU::S_BCNT1_I32_B64:
8054 splitScalar64BitBCNT(Worklist, Inst);
8058 case AMDGPU::S_BFE_I64:
8059 splitScalar64BitBFE(Worklist, Inst);
8063 case AMDGPU::S_FLBIT_I32_B64:
8064 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
8067 case AMDGPU::S_FF1_I32_B64:
8068 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
8072 case AMDGPU::S_LSHL_B32:
8073 if (ST.hasOnlyRevVALUShifts()) {
8074 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
8078 case AMDGPU::S_ASHR_I32:
8079 if (ST.hasOnlyRevVALUShifts()) {
8080 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
8084 case AMDGPU::S_LSHR_B32:
8085 if (ST.hasOnlyRevVALUShifts()) {
8086 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
8090 case AMDGPU::S_LSHL_B64:
8091 if (ST.hasOnlyRevVALUShifts()) {
8093 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
8094 : AMDGPU::V_LSHLREV_B64_e64;
8098 case AMDGPU::S_ASHR_I64:
8099 if (ST.hasOnlyRevVALUShifts()) {
8100 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
8104 case AMDGPU::S_LSHR_B64:
8105 if (ST.hasOnlyRevVALUShifts()) {
8106 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
8111 case AMDGPU::S_ABS_I32:
8112 lowerScalarAbs(Worklist, Inst);
8116 case AMDGPU::S_ABSDIFF_I32:
8117 lowerScalarAbsDiff(Worklist, Inst);
8121 case AMDGPU::S_CBRANCH_SCC0:
8122 case AMDGPU::S_CBRANCH_SCC1: {
8125 bool IsSCC = CondReg == AMDGPU::SCC;
8133 case AMDGPU::S_BFE_U64:
8134 case AMDGPU::S_BFM_B64:
8137 case AMDGPU::S_PACK_LL_B32_B16:
8138 case AMDGPU::S_PACK_LH_B32_B16:
8139 case AMDGPU::S_PACK_HL_B32_B16:
8140 case AMDGPU::S_PACK_HH_B32_B16:
8141 movePackToVALU(Worklist, MRI, Inst);
8145 case AMDGPU::S_XNOR_B32:
8146 lowerScalarXnor(Worklist, Inst);
8150 case AMDGPU::S_NAND_B32:
8151 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8155 case AMDGPU::S_NOR_B32:
8156 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8160 case AMDGPU::S_ANDN2_B32:
8161 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8165 case AMDGPU::S_ORN2_B32:
8166 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8174 case AMDGPU::S_ADD_CO_PSEUDO:
8175 case AMDGPU::S_SUB_CO_PSEUDO: {
8176 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8177 ? AMDGPU::V_ADDC_U32_e64
8178 : AMDGPU::V_SUBB_U32_e64;
8179 const auto *CarryRC = RI.getWaveMaskRegClass();
8201 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8205 case AMDGPU::S_UADDO_PSEUDO:
8206 case AMDGPU::S_USUBO_PSEUDO: {
8212 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8213 ? AMDGPU::V_ADD_CO_U32_e64
8214 : AMDGPU::V_SUB_CO_U32_e64;
8226 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8230 case AMDGPU::S_LSHL1_ADD_U32:
8231 case AMDGPU::S_LSHL2_ADD_U32:
8232 case AMDGPU::S_LSHL3_ADD_U32:
8233 case AMDGPU::S_LSHL4_ADD_U32: {
8237 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8238 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8239 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8253 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8257 case AMDGPU::S_CSELECT_B32:
8258 case AMDGPU::S_CSELECT_B64:
8259 lowerSelect(Worklist, Inst, MDT);
8262 case AMDGPU::S_CMP_EQ_I32:
8263 case AMDGPU::S_CMP_LG_I32:
8264 case AMDGPU::S_CMP_GT_I32:
8265 case AMDGPU::S_CMP_GE_I32:
8266 case AMDGPU::S_CMP_LT_I32:
8267 case AMDGPU::S_CMP_LE_I32:
8268 case AMDGPU::S_CMP_EQ_U32:
8269 case AMDGPU::S_CMP_LG_U32:
8270 case AMDGPU::S_CMP_GT_U32:
8271 case AMDGPU::S_CMP_GE_U32:
8272 case AMDGPU::S_CMP_LT_U32:
8273 case AMDGPU::S_CMP_LE_U32:
8274 case AMDGPU::S_CMP_EQ_U64:
8275 case AMDGPU::S_CMP_LG_U64:
8276 case AMDGPU::S_CMP_LT_F32:
8277 case AMDGPU::S_CMP_EQ_F32:
8278 case AMDGPU::S_CMP_LE_F32:
8279 case AMDGPU::S_CMP_GT_F32:
8280 case AMDGPU::S_CMP_LG_F32:
8281 case AMDGPU::S_CMP_GE_F32:
8282 case AMDGPU::S_CMP_O_F32:
8283 case AMDGPU::S_CMP_U_F32:
8284 case AMDGPU::S_CMP_NGE_F32:
8285 case AMDGPU::S_CMP_NLG_F32:
8286 case AMDGPU::S_CMP_NGT_F32:
8287 case AMDGPU::S_CMP_NLE_F32:
8288 case AMDGPU::S_CMP_NEQ_F32:
8289 case AMDGPU::S_CMP_NLT_F32: {
8294 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8308 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8312 case AMDGPU::S_CMP_LT_F16:
8313 case AMDGPU::S_CMP_EQ_F16:
8314 case AMDGPU::S_CMP_LE_F16:
8315 case AMDGPU::S_CMP_GT_F16:
8316 case AMDGPU::S_CMP_LG_F16:
8317 case AMDGPU::S_CMP_GE_F16:
8318 case AMDGPU::S_CMP_O_F16:
8319 case AMDGPU::S_CMP_U_F16:
8320 case AMDGPU::S_CMP_NGE_F16:
8321 case AMDGPU::S_CMP_NLG_F16:
8322 case AMDGPU::S_CMP_NGT_F16:
8323 case AMDGPU::S_CMP_NLE_F16:
8324 case AMDGPU::S_CMP_NEQ_F16:
8325 case AMDGPU::S_CMP_NLT_F16: {
8348 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8352 case AMDGPU::S_CVT_HI_F32_F16: {
8355 if (ST.useRealTrue16Insts()) {
8360 .
addReg(TmpReg, {}, AMDGPU::hi16)
8376 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8380 case AMDGPU::S_MINIMUM_F32:
8381 case AMDGPU::S_MAXIMUM_F32: {
8393 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8397 case AMDGPU::S_MINIMUM_F16:
8398 case AMDGPU::S_MAXIMUM_F16: {
8400 ? &AMDGPU::VGPR_16RegClass
8401 : &AMDGPU::VGPR_32RegClass);
8413 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8417 case AMDGPU::V_S_EXP_F16_e64:
8418 case AMDGPU::V_S_LOG_F16_e64:
8419 case AMDGPU::V_S_RCP_F16_e64:
8420 case AMDGPU::V_S_RSQ_F16_e64:
8421 case AMDGPU::V_S_SQRT_F16_e64: {
8423 ? &AMDGPU::VGPR_16RegClass
8424 : &AMDGPU::VGPR_32RegClass);
8436 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8442 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8450 if (NewOpcode == Opcode) {
8457 V2SPhyCopiesToErase);
8465 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8472 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8503 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8507 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8513 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8520 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8522 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8527 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8535 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8545 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8546 AMDGPU::OpName::src0_modifiers) >= 0)
8550 NewInstr->addOperand(Src);
8553 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8556 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8558 NewInstr.addImm(
Size);
8559 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8563 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8568 "Scalar BFE is only implemented for constant width and offset");
8576 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8577 AMDGPU::OpName::src1_modifiers) >= 0)
8579 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8581 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8582 AMDGPU::OpName::src2_modifiers) >= 0)
8584 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8586 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8588 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8590 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8596 NewInstr->addOperand(
Op);
8603 if (
Op.getReg() == AMDGPU::SCC) {
8605 if (
Op.isDef() && !
Op.isDead())
8606 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8608 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8613 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8614 Register DstReg = NewInstr->getOperand(0).getReg();
8629 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8633std::pair<bool, MachineBasicBlock *>
8636 if (ST.hasAddNoCarryInsts()) {
8648 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8650 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8651 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8662 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8663 return std::pair(
true, NewBB);
8666 return std::pair(
false,
nullptr);
8683 bool IsSCC = (CondReg == AMDGPU::SCC);
8697 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8702 bool CopyFound =
false;
8703 for (MachineInstr &CandI :
8706 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8708 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8710 .
addReg(CandI.getOperand(1).getReg());
8722 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8731 MachineInstr *NewInst;
8732 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8733 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8748 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8763 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8764 : AMDGPU::V_SUB_CO_U32_e32;
8775 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8792 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8793 : AMDGPU::V_SUB_CO_U32_e32;
8806 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8820 if (ST.hasDLInsts()) {
8830 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8836 bool Src0IsSGPR = Src0.
isReg() &&
8838 bool Src1IsSGPR = Src1.
isReg() &&
8852 }
else if (Src1IsSGPR) {
8870 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8876 unsigned Opcode)
const {
8900 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8905 unsigned Opcode)
const {
8929 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8944 const MCInstrDesc &InstDesc =
get(Opcode);
8945 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8947 &AMDGPU::SGPR_32RegClass;
8949 const TargetRegisterClass *Src0SubRC =
8950 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8953 AMDGPU::sub0, Src0SubRC);
8956 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8957 const TargetRegisterClass *NewDestSubRC =
8958 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8961 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8964 AMDGPU::sub1, Src0SubRC);
8967 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8981 Worklist.
insert(&LoHalf);
8982 Worklist.
insert(&HiHalf);
8988 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9011 const TargetRegisterClass *Src0SubRC =
9012 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9013 if (RI.isSGPRClass(Src0SubRC))
9014 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9015 const TargetRegisterClass *Src1SubRC =
9016 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9017 if (RI.isSGPRClass(Src1SubRC))
9018 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9022 MachineOperand Op0L =
9024 MachineOperand Op1L =
9026 MachineOperand Op0H =
9028 MachineOperand Op1H =
9047 MachineInstr *Op1L_Op0H =
9053 MachineInstr *Op1H_Op0L =
9059 MachineInstr *Carry =
9064 MachineInstr *LoHalf =
9074 MachineInstr *HiHalf =
9097 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9120 const TargetRegisterClass *Src0SubRC =
9121 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9122 if (RI.isSGPRClass(Src0SubRC))
9123 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9124 const TargetRegisterClass *Src1SubRC =
9125 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9126 if (RI.isSGPRClass(Src1SubRC))
9127 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9131 MachineOperand Op0L =
9133 MachineOperand Op1L =
9137 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9138 ? AMDGPU::V_MUL_HI_U32_e64
9139 : AMDGPU::V_MUL_HI_I32_e64;
9140 MachineInstr *HiHalf =
9143 MachineInstr *LoHalf =
9162 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9178 const MCInstrDesc &InstDesc =
get(Opcode);
9179 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9181 &AMDGPU::SGPR_32RegClass;
9183 const TargetRegisterClass *Src0SubRC =
9184 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9185 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9187 &AMDGPU::SGPR_32RegClass;
9189 const TargetRegisterClass *Src1SubRC =
9190 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9193 AMDGPU::sub0, Src0SubRC);
9195 AMDGPU::sub0, Src1SubRC);
9197 AMDGPU::sub1, Src0SubRC);
9199 AMDGPU::sub1, Src1SubRC);
9202 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9203 const TargetRegisterClass *NewDestSubRC =
9204 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9207 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9212 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9225 Worklist.
insert(&LoHalf);
9226 Worklist.
insert(&HiHalf);
9229 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9249 MachineOperand* Op0;
9250 MachineOperand* Op1;
9252 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9285 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9286 const TargetRegisterClass *SrcRC = Src.isReg() ?
9288 &AMDGPU::SGPR_32RegClass;
9293 const TargetRegisterClass *SrcSubRC =
9294 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9297 AMDGPU::sub0, SrcSubRC);
9299 AMDGPU::sub1, SrcSubRC);
9309 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9328 Offset == 0 &&
"Not implemented");
9351 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9361 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9364 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9370 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9389 const MCInstrDesc &InstDesc =
get(Opcode);
9391 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9392 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9393 : AMDGPU::V_ADD_CO_U32_e32;
9395 const TargetRegisterClass *SrcRC =
9396 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9397 const TargetRegisterClass *SrcSubRC =
9398 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9400 MachineOperand SrcRegSub0 =
9402 MachineOperand SrcRegSub1 =
9415 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9421 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9425 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9428void SIInstrInfo::addUsersToMoveToVALUWorklist(
9432 MachineInstr &
UseMI = *MO.getParent();
9436 switch (
UseMI.getOpcode()) {
9439 case AMDGPU::SOFT_WQM:
9440 case AMDGPU::STRICT_WWM:
9441 case AMDGPU::STRICT_WQM:
9442 case AMDGPU::REG_SEQUENCE:
9444 case AMDGPU::INSERT_SUBREG:
9447 OpNo = MO.getOperandNo();
9454 if (!RI.hasVectorRegisters(OpRC))
9471 if (ST.useRealTrue16Insts()) {
9473 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9476 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9482 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9485 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9494 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9496 case AMDGPU::S_PACK_LL_B32_B16:
9498 .addReg(SrcReg0, {},
9499 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9500 .addImm(AMDGPU::lo16)
9501 .addReg(SrcReg1, {},
9502 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9503 .addImm(AMDGPU::hi16);
9505 case AMDGPU::S_PACK_LH_B32_B16:
9507 .addReg(SrcReg0, {},
9508 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9509 .addImm(AMDGPU::lo16)
9510 .addReg(SrcReg1, {}, AMDGPU::hi16)
9511 .addImm(AMDGPU::hi16);
9513 case AMDGPU::S_PACK_HL_B32_B16:
9514 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9515 .addImm(AMDGPU::lo16)
9516 .addReg(SrcReg1, {},
9517 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9518 .addImm(AMDGPU::hi16);
9520 case AMDGPU::S_PACK_HH_B32_B16:
9521 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9522 .addImm(AMDGPU::lo16)
9523 .addReg(SrcReg1, {}, AMDGPU::hi16)
9524 .addImm(AMDGPU::hi16);
9532 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9537 case AMDGPU::S_PACK_LL_B32_B16: {
9556 case AMDGPU::S_PACK_LH_B32_B16: {
9566 case AMDGPU::S_PACK_HL_B32_B16: {
9577 case AMDGPU::S_PACK_HH_B32_B16: {
9597 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9606 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9607 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9608 SmallVector<MachineInstr *, 4> CopyToDelete;
9611 for (MachineInstr &
MI :
9615 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9618 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9619 Register DestReg =
MI.getOperand(0).getReg();
9626 MI.getOperand(SCCIdx).setReg(NewCond);
9632 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9635 for (
auto &Copy : CopyToDelete)
9636 Copy->eraseFromParent();
9644void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9650 for (MachineInstr &
MI :
9653 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9655 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9664 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9672 case AMDGPU::REG_SEQUENCE:
9673 case AMDGPU::INSERT_SUBREG:
9675 case AMDGPU::SOFT_WQM:
9676 case AMDGPU::STRICT_WWM:
9677 case AMDGPU::STRICT_WQM: {
9679 if (RI.isAGPRClass(SrcRC)) {
9680 if (RI.isAGPRClass(NewDstRC))
9685 case AMDGPU::REG_SEQUENCE:
9686 case AMDGPU::INSERT_SUBREG:
9687 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9690 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9696 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9699 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9713 int OpIndices[3])
const {
9714 const MCInstrDesc &
Desc =
MI.getDesc();
9730 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9732 for (
unsigned i = 0; i < 3; ++i) {
9733 int Idx = OpIndices[i];
9737 const MachineOperand &MO =
MI.getOperand(Idx);
9743 const TargetRegisterClass *OpRC =
9744 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9745 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9752 if (RI.isSGPRClass(RegRC))
9770 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9771 SGPRReg = UsedSGPRs[0];
9774 if (!SGPRReg && UsedSGPRs[1]) {
9775 if (UsedSGPRs[1] == UsedSGPRs[2])
9776 SGPRReg = UsedSGPRs[1];
9783 AMDGPU::OpName OperandName)
const {
9784 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9787 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9791 return &
MI.getOperand(Idx);
9805 if (ST.isAmdHsaOS()) {
9808 RsrcDataFormat |= (1ULL << 56);
9813 RsrcDataFormat |= (2ULL << 59);
9816 return RsrcDataFormat;
9826 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9831 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9838 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9844 unsigned Opc =
MI.getOpcode();
9850 return get(
Opc).mayLoad() &&
9857 if (!Addr || !Addr->
isFI())
9866 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9868 return MI.getOperand(VDataIdx).getReg();
9878 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9880 return MI.getOperand(DataIdx).getReg();
9914 unsigned Opc =
MI.getOpcode();
9916 unsigned DescSize =
Desc.getSize();
9921 unsigned Size = DescSize;
9925 if (
MI.isBranch() && ST.hasOffset3fBug())
9936 bool HasLiteral =
false;
9937 unsigned LiteralSize = 4;
9938 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9943 if (ST.has64BitLiterals()) {
9944 switch (OpInfo.OperandType) {
9967 return HasLiteral ? DescSize + LiteralSize : DescSize;
9972 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9976 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9977 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9981 case TargetOpcode::BUNDLE:
9982 return getInstBundleSize(
MI);
9983 case TargetOpcode::INLINEASM:
9984 case TargetOpcode::INLINEASM_BR: {
9986 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9990 if (
MI.isMetaInstruction())
9994 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9997 unsigned LoInstOpcode = D16Info->LoOp;
9999 DescSize =
Desc.getSize();
10003 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
10006 DescSize =
Desc.getSize();
10015 if (
MI.isBranch() && ST.hasOffset3fBug())
10016 return InstSizeVerifyMode::NoVerify;
10017 return InstSizeVerifyMode::ExactSize;
10024 if (
MI.memoperands_empty())
10036 static const std::pair<int, const char *> TargetIndices[] = {
10075std::pair<unsigned, unsigned>
10082 static const std::pair<unsigned, const char *> TargetFlags[] = {
10100 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10116 return AMDGPU::WWM_COPY;
10118 return AMDGPU::COPY;
10135 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10139 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10140 return IsLRSplitInst;
10153 bool IsNullOrVectorRegister =
true;
10157 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10160 return IsNullOrVectorRegister &&
10162 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10163 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10171 if (ST.hasAddNoCarryInsts())
10187 if (ST.hasAddNoCarryInsts())
10191 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10193 : RS.scavengeRegisterBackwards(
10194 *RI.getBoolRC(),
I,
false,
10207 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10208 case AMDGPU::SI_KILL_I1_TERMINATOR:
10217 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10218 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10219 case AMDGPU::SI_KILL_I1_PSEUDO:
10220 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10232 const unsigned OffsetBits =
10234 return (1 << OffsetBits) - 1;
10238 if (!ST.isWave32())
10241 if (
MI.isInlineAsm())
10244 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10247 for (
auto &
Op :
MI.implicit_operands()) {
10248 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10249 Op.setReg(AMDGPU::VCC_LO);
10258 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10262 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10263 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10279 if (Imm > MaxImm) {
10280 if (Imm <= MaxImm + 64) {
10282 Overflow = Imm - MaxImm;
10301 if (Overflow > 0) {
10309 if (ST.hasRestrictedSOffset())
10314 SOffset = Overflow;
10352 if (!ST.hasFlatInstOffsets())
10360 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10372std::pair<int64_t, int64_t>
10375 int64_t RemainderOffset = COffsetVal;
10376 int64_t ImmField = 0;
10381 if (AllowNegative) {
10383 int64_t
D = 1LL << NumBits;
10384 RemainderOffset = (COffsetVal /
D) *
D;
10385 ImmField = COffsetVal - RemainderOffset;
10387 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10389 (ImmField % 4) != 0) {
10391 RemainderOffset += ImmField % 4;
10392 ImmField -= ImmField % 4;
10394 }
else if (COffsetVal >= 0) {
10396 RemainderOffset = COffsetVal - ImmField;
10400 assert(RemainderOffset + ImmField == COffsetVal);
10401 return {ImmField, RemainderOffset};
10405 if (ST.hasNegativeScratchOffsetBug() &&
10413 switch (ST.getGeneration()) {
10442 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10443 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10444 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10445 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10446 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10447 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10448 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10449 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10456#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10457 case OPCODE##_dpp: \
10458 case OPCODE##_e32: \
10459 case OPCODE##_e64: \
10460 case OPCODE##_e64_dpp: \
10461 case OPCODE##_sdwa:
10475 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10476 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10477 case AMDGPU::V_FMA_F16_gfx9_e64:
10478 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10479 case AMDGPU::V_INTERP_P2_F16:
10480 case AMDGPU::V_MAD_F16_e64:
10481 case AMDGPU::V_MAD_U16_e64:
10482 case AMDGPU::V_MAD_I16_e64:
10491 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10505 switch (ST.getGeneration()) {
10518 if (
isMAI(Opcode)) {
10526 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10529 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10536 if (ST.hasGFX90AInsts()) {
10537 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10538 if (ST.hasGFX940Insts())
10540 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10542 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10544 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10550 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10569 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10570 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10571 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10583 switch (
MI.getOpcode()) {
10585 case AMDGPU::REG_SEQUENCE:
10589 case AMDGPU::INSERT_SUBREG:
10590 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10607 if (!
P.Reg.isVirtual())
10612 while (
auto *
MI = DefInst) {
10614 switch (
MI->getOpcode()) {
10616 case AMDGPU::V_MOV_B32_e32: {
10617 auto &Op1 =
MI->getOperand(1);
10646 auto *DefBB =
DefMI.getParent();
10650 if (
UseMI.getParent() != DefBB)
10653 const int MaxInstScan = 20;
10657 auto E =
UseMI.getIterator();
10658 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10659 if (
I->isDebugInstr())
10662 if (++NumInst > MaxInstScan)
10665 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10678 auto *DefBB =
DefMI.getParent();
10680 const int MaxUseScan = 10;
10684 auto &UseInst = *
Use.getParent();
10687 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10690 if (++NumUse > MaxUseScan)
10697 const int MaxInstScan = 20;
10701 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10704 if (
I->isDebugInstr())
10707 if (++NumInst > MaxInstScan)
10720 if (Reg == VReg && --NumUse == 0)
10722 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10731 auto Cur =
MBB.begin();
10732 if (Cur !=
MBB.end())
10734 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10737 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10746 if (InsPt !=
MBB.end() &&
10747 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10748 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10749 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10750 InsPt->definesRegister(Src,
nullptr)) {
10754 .
addReg(Src, {}, SrcSubReg)
10797 if (isFullCopyInstr(
MI)) {
10798 Register DstReg =
MI.getOperand(0).getReg();
10799 Register SrcReg =
MI.getOperand(1).getReg();
10821 unsigned *PredCost)
const {
10822 if (
MI.isBundle()) {
10825 unsigned Lat = 0,
Count = 0;
10826 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10828 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10830 return Lat +
Count - 1;
10833 return SchedModel.computeInstrLatency(&
MI);
10840 return *CallAddrOp;
10847 unsigned Opcode =
MI.getOpcode();
10849 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10852 :
MI.getOperand(1).getReg();
10856 unsigned SrcAS = SrcTy.getAddressSpace();
10859 ST.hasGloballyAddressableScratch()
10867 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10868 return HandleAddrSpaceCast(
MI);
10871 auto IID = GI->getIntrinsicID();
10878 case Intrinsic::amdgcn_addrspacecast_nonnull:
10879 return HandleAddrSpaceCast(
MI);
10880 case Intrinsic::amdgcn_if:
10881 case Intrinsic::amdgcn_else:
10895 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10896 Opcode == AMDGPU::G_SEXTLOAD) {
10897 if (
MI.memoperands_empty())
10901 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10902 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10910 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10911 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10912 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10921 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10922 return Formatter.get();
10930 unsigned opcode =
MI.getOpcode();
10931 if (opcode == AMDGPU::V_READLANE_B32 ||
10932 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10933 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10936 if (isCopyInstr(
MI)) {
10940 RI.getPhysRegBaseClass(srcOp.
getReg());
10948 if (
MI.isPreISelOpcode())
10963 if (
MI.memoperands_empty())
10967 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10968 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10983 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10985 if (!
SrcOp.isReg())
10989 if (!Reg || !
SrcOp.readsReg())
10995 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
11022 F,
"ds_ordered_count unsupported for this calling conv"));
11036 Register &SrcReg2, int64_t &CmpMask,
11037 int64_t &CmpValue)
const {
11038 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
11041 switch (
MI.getOpcode()) {
11044 case AMDGPU::S_CMP_EQ_U32:
11045 case AMDGPU::S_CMP_EQ_I32:
11046 case AMDGPU::S_CMP_LG_U32:
11047 case AMDGPU::S_CMP_LG_I32:
11048 case AMDGPU::S_CMP_LT_U32:
11049 case AMDGPU::S_CMP_LT_I32:
11050 case AMDGPU::S_CMP_GT_U32:
11051 case AMDGPU::S_CMP_GT_I32:
11052 case AMDGPU::S_CMP_LE_U32:
11053 case AMDGPU::S_CMP_LE_I32:
11054 case AMDGPU::S_CMP_GE_U32:
11055 case AMDGPU::S_CMP_GE_I32:
11056 case AMDGPU::S_CMP_EQ_U64:
11057 case AMDGPU::S_CMP_LG_U64:
11058 SrcReg =
MI.getOperand(0).getReg();
11059 if (
MI.getOperand(1).isReg()) {
11060 if (
MI.getOperand(1).getSubReg())
11062 SrcReg2 =
MI.getOperand(1).getReg();
11064 }
else if (
MI.getOperand(1).isImm()) {
11066 CmpValue =
MI.getOperand(1).getImm();
11072 case AMDGPU::S_CMPK_EQ_U32:
11073 case AMDGPU::S_CMPK_EQ_I32:
11074 case AMDGPU::S_CMPK_LG_U32:
11075 case AMDGPU::S_CMPK_LG_I32:
11076 case AMDGPU::S_CMPK_LT_U32:
11077 case AMDGPU::S_CMPK_LT_I32:
11078 case AMDGPU::S_CMPK_GT_U32:
11079 case AMDGPU::S_CMPK_GT_I32:
11080 case AMDGPU::S_CMPK_LE_U32:
11081 case AMDGPU::S_CMPK_LE_I32:
11082 case AMDGPU::S_CMPK_GE_U32:
11083 case AMDGPU::S_CMPK_GE_I32:
11084 SrcReg =
MI.getOperand(0).getReg();
11086 CmpValue =
MI.getOperand(1).getImm();
11096 if (S->isLiveIn(AMDGPU::SCC))
11105bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
11108 bool SCCIsDead =
false;
11111 constexpr unsigned ScanLimit = 12;
11112 unsigned Count = 0;
11113 for (MachineInstr &
MI :
11115 if (++
Count > ScanLimit)
11117 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
11118 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
11119 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
11120 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11121 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11126 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11139 for (MachineInstr *
MI : InvertInstr) {
11140 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11141 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11143 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11144 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11145 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11146 ? AMDGPU::S_CBRANCH_SCC1
11147 : AMDGPU::S_CBRANCH_SCC0));
11160 bool NeedInversion)
const {
11161 MachineInstr *KillsSCC =
nullptr;
11166 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11168 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11171 if (NeedInversion && !invertSCCUse(SCCRedefine))
11173 if (MachineOperand *SccDef =
11175 SccDef->setIsDead(
false);
11183 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11184 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11186 bool Op1IsNonZeroImm =
11187 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11188 bool Op2IsZeroImm =
11189 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11190 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11196 unsigned &NewDefOpc) {
11199 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11200 Def.getOpcode() != AMDGPU::S_ADD_U32)
11206 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11212 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11214 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11217 NewDefOpc = AMDGPU::S_ADD_U32;
11219 NeedInversion = !NeedInversion;
11224 Register SrcReg2, int64_t CmpMask,
11233 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11234 this](
bool NeedInversion) ->
bool {
11258 unsigned NewDefOpc = Def->getOpcode();
11264 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11267 if (NewDefOpc != Def->getOpcode())
11268 Def->setDesc(
get(NewDefOpc));
11277 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11284 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11292 optimizeSCC(
Select, Def,
false);
11299 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11300 this](int64_t ExpectedValue,
unsigned SrcSize,
11301 bool IsReversible,
bool IsSigned) ->
bool {
11329 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11330 Def->getOpcode() != AMDGPU::S_AND_B64)
11334 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11345 SrcOp = &Def->getOperand(2);
11346 else if (isMask(&Def->getOperand(2)))
11347 SrcOp = &Def->getOperand(1);
11355 if (IsSigned && BitNo == SrcSize - 1)
11358 ExpectedValue <<= BitNo;
11360 bool IsReversedCC =
false;
11361 if (CmpValue != ExpectedValue) {
11364 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11369 Register DefReg = Def->getOperand(0).getReg();
11373 if (!optimizeSCC(Def, &CmpInstr,
false))
11384 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11385 : AMDGPU::S_BITCMP1_B32
11386 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11387 : AMDGPU::S_BITCMP1_B64;
11392 Def->eraseFromParent();
11400 case AMDGPU::S_CMP_EQ_U32:
11401 case AMDGPU::S_CMP_EQ_I32:
11402 case AMDGPU::S_CMPK_EQ_U32:
11403 case AMDGPU::S_CMPK_EQ_I32:
11404 return optimizeCmpAnd(1, 32,
true,
false) ||
11405 optimizeCmpSelect(
true);
11406 case AMDGPU::S_CMP_GE_U32:
11407 case AMDGPU::S_CMPK_GE_U32:
11408 return optimizeCmpAnd(1, 32,
false,
false);
11409 case AMDGPU::S_CMP_GE_I32:
11410 case AMDGPU::S_CMPK_GE_I32:
11411 return optimizeCmpAnd(1, 32,
false,
true);
11412 case AMDGPU::S_CMP_EQ_U64:
11413 return optimizeCmpAnd(1, 64,
true,
false);
11414 case AMDGPU::S_CMP_LG_U32:
11415 case AMDGPU::S_CMP_LG_I32:
11416 case AMDGPU::S_CMPK_LG_U32:
11417 case AMDGPU::S_CMPK_LG_I32:
11418 return optimizeCmpAnd(0, 32,
true,
false) ||
11419 optimizeCmpSelect(
false);
11420 case AMDGPU::S_CMP_GT_U32:
11421 case AMDGPU::S_CMPK_GT_U32:
11422 return optimizeCmpAnd(0, 32,
false,
false);
11423 case AMDGPU::S_CMP_GT_I32:
11424 case AMDGPU::S_CMPK_GT_I32:
11425 return optimizeCmpAnd(0, 32,
false,
true);
11426 case AMDGPU::S_CMP_LG_U64:
11427 return optimizeCmpAnd(0, 64,
true,
false) ||
11428 optimizeCmpSelect(
false);
11435 AMDGPU::OpName
OpName)
const {
11436 if (!ST.needsAlignedVGPRs())
11439 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11451 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11453 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11457 : &AMDGPU::VReg_64_Align2RegClass);
11459 .
addReg(DataReg, {},
Op.getSubReg())
11464 Op.setSubReg(AMDGPU::sub0);
11479 if (ST.hasGFX1250Insts())
11486 unsigned Opcode =
MI.getOpcode();
11492 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11493 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11496 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={})
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static MachineBasicBlock * generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr, ArrayRef< Register > PhySGPRs={})
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
uint64_t getZExtValue() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
InstSizeVerifyMode getInstSizeVerifyMode(const MachineInstr &MI) const override
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool hasRAWDependency(const MachineInstr &FirstMI, const MachineInstr &SecondMI) const
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
void handleCopyToPhysHelper(SIInstrWorklist &Worklist, Register DstReg, MachineInstr &Inst, MachineRegisterInfo &MRI, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool isVOPDAntidependencyAllowed(const MachineInstr &MI) const
If OpX is multicycle, anti-dependencies are not allowed.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void createWaterFallForSiCall(MachineInstr *MI, MachineDominatorTree *MDT, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={}) const
Wrapper function for generating waterfall for instruction MI This function take into consideration of...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void createReadFirstLaneFromCopyToPhysReg(MachineRegisterInfo &MRI, Register DstReg, MachineInstr &Inst) const
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.